feat(ultraplan-local): add lib/parsers/jaccard.mjs
This commit is contained in:
parent
38b801f534
commit
cf56fbbe27
2 changed files with 97 additions and 0 deletions
41
plugins/ultraplan-local/lib/parsers/jaccard.mjs
Normal file
41
plugins/ultraplan-local/lib/parsers/jaccard.mjs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// lib/parsers/jaccard.mjs
|
||||
// Jaccard similarity for SC4 determinism floor.
|
||||
//
|
||||
// jaccard(A, B) = |A ∩ B| / |A ∪ B|
|
||||
// Inputs are arrays of strings; deduplicated internally.
|
||||
// Both empty → 1.0 (vacuously identical). One empty → 0.0.
|
||||
|
||||
/**
|
||||
* Compute Jaccard similarity between two string sets.
|
||||
* @param {string[]} setA
|
||||
* @param {string[]} setB
|
||||
* @returns {number} similarity in [0, 1]
|
||||
*/
|
||||
export function jaccardSimilarity(setA, setB) {
|
||||
if (!Array.isArray(setA) || !Array.isArray(setB)) {
|
||||
throw new TypeError('jaccardSimilarity: both inputs must be arrays');
|
||||
}
|
||||
const a = new Set(setA);
|
||||
const b = new Set(setB);
|
||||
if (a.size === 0 && b.size === 0) return 1.0;
|
||||
if (a.size === 0 || b.size === 0) return 0.0;
|
||||
|
||||
let intersection = 0;
|
||||
for (const x of a) {
|
||||
if (b.has(x)) intersection += 1;
|
||||
}
|
||||
const union = a.size + b.size - intersection;
|
||||
return intersection / union;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a similarity meets a threshold.
|
||||
* @param {number} similarity
|
||||
* @param {number} threshold
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function meetsThreshold(similarity, threshold) {
|
||||
if (typeof similarity !== 'number' || typeof threshold !== 'number') return false;
|
||||
if (!Number.isFinite(similarity) || !Number.isFinite(threshold)) return false;
|
||||
return similarity >= threshold;
|
||||
}
|
||||
56
plugins/ultraplan-local/tests/lib/jaccard.test.mjs
Normal file
56
plugins/ultraplan-local/tests/lib/jaccard.test.mjs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { jaccardSimilarity, meetsThreshold } from '../../lib/parsers/jaccard.mjs';
|
||||
|
||||
test('jaccardSimilarity — identical sets → 1.0', () => {
|
||||
assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['a', 'b', 'c']), 1.0);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — disjoint sets → 0.0', () => {
|
||||
assert.equal(jaccardSimilarity(['a', 'b'], ['c', 'd']), 0.0);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — partial overlap [a,b,c] vs [b,c,d] → 0.5', () => {
|
||||
assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['b', 'c', 'd']), 0.5);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — both empty → 1.0', () => {
|
||||
assert.equal(jaccardSimilarity([], []), 1.0);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — one empty → 0.0', () => {
|
||||
assert.equal(jaccardSimilarity([], ['a']), 0.0);
|
||||
assert.equal(jaccardSimilarity(['a'], []), 0.0);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — duplicates deduplicated within each set', () => {
|
||||
// [a,a,b] dedup → {a,b}; [a,b,b] dedup → {a,b}; identical → 1.0
|
||||
assert.equal(jaccardSimilarity(['a', 'a', 'b'], ['a', 'b', 'b']), 1.0);
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — fixture sets {α..ε} vs {α..ζ} → 0.833 (SC4 anchor)', () => {
|
||||
// SC4 fixture math: A=5 IDs, B=A∪{ζ}=6 IDs, intersection=5, union=6 → 5/6
|
||||
const A = ['α', 'β', 'γ', 'δ', 'ε'];
|
||||
const B = ['α', 'β', 'γ', 'δ', 'ε', 'ζ'];
|
||||
const sim = jaccardSimilarity(A, B);
|
||||
assert.ok(Math.abs(sim - 5 / 6) < 1e-9);
|
||||
assert.ok(sim >= 0.70); // SC4 threshold
|
||||
});
|
||||
|
||||
test('jaccardSimilarity — non-array input throws TypeError', () => {
|
||||
assert.throws(() => jaccardSimilarity('a', ['b']), TypeError);
|
||||
assert.throws(() => jaccardSimilarity(['a'], null), TypeError);
|
||||
});
|
||||
|
||||
test('meetsThreshold — boundary 0.699 → false, 0.700 → true', () => {
|
||||
assert.equal(meetsThreshold(0.699, 0.7), false);
|
||||
assert.equal(meetsThreshold(0.7, 0.7), true);
|
||||
assert.equal(meetsThreshold(0.71, 0.7), true);
|
||||
});
|
||||
|
||||
test('meetsThreshold — non-finite or non-number → false', () => {
|
||||
assert.equal(meetsThreshold(NaN, 0.7), false);
|
||||
assert.equal(meetsThreshold(Infinity, 0.7), false);
|
||||
assert.equal(meetsThreshold('0.8', 0.7), false);
|
||||
assert.equal(meetsThreshold(0.8, null), false);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue