diff --git a/plugins/ultraplan-local/lib/parsers/jaccard.mjs b/plugins/ultraplan-local/lib/parsers/jaccard.mjs new file mode 100644 index 0000000..c5dc8c4 --- /dev/null +++ b/plugins/ultraplan-local/lib/parsers/jaccard.mjs @@ -0,0 +1,41 @@ +// lib/parsers/jaccard.mjs +// Jaccard similarity for SC4 determinism floor. +// +// jaccard(A, B) = |A ∩ B| / |A ∪ B| +// Inputs are arrays of strings; deduplicated internally. +// Both empty → 1.0 (vacuously identical). One empty → 0.0. + +/** + * Compute Jaccard similarity between two string sets. + * @param {string[]} setA + * @param {string[]} setB + * @returns {number} similarity in [0, 1] + */ +export function jaccardSimilarity(setA, setB) { + if (!Array.isArray(setA) || !Array.isArray(setB)) { + throw new TypeError('jaccardSimilarity: both inputs must be arrays'); + } + const a = new Set(setA); + const b = new Set(setB); + if (a.size === 0 && b.size === 0) return 1.0; + if (a.size === 0 || b.size === 0) return 0.0; + + let intersection = 0; + for (const x of a) { + if (b.has(x)) intersection += 1; + } + const union = a.size + b.size - intersection; + return intersection / union; +} + +/** + * Check whether a similarity meets a threshold. + * @param {number} similarity + * @param {number} threshold + * @returns {boolean} + */ +export function meetsThreshold(similarity, threshold) { + if (typeof similarity !== 'number' || typeof threshold !== 'number') return false; + if (!Number.isFinite(similarity) || !Number.isFinite(threshold)) return false; + return similarity >= threshold; +} diff --git a/plugins/ultraplan-local/tests/lib/jaccard.test.mjs b/plugins/ultraplan-local/tests/lib/jaccard.test.mjs new file mode 100644 index 0000000..5f4c9cc --- /dev/null +++ b/plugins/ultraplan-local/tests/lib/jaccard.test.mjs @@ -0,0 +1,56 @@ +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { jaccardSimilarity, meetsThreshold } from '../../lib/parsers/jaccard.mjs'; + +test('jaccardSimilarity — identical sets → 1.0', () => { + assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['a', 'b', 'c']), 1.0); +}); + +test('jaccardSimilarity — disjoint sets → 0.0', () => { + assert.equal(jaccardSimilarity(['a', 'b'], ['c', 'd']), 0.0); +}); + +test('jaccardSimilarity — partial overlap [a,b,c] vs [b,c,d] → 0.5', () => { + assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['b', 'c', 'd']), 0.5); +}); + +test('jaccardSimilarity — both empty → 1.0', () => { + assert.equal(jaccardSimilarity([], []), 1.0); +}); + +test('jaccardSimilarity — one empty → 0.0', () => { + assert.equal(jaccardSimilarity([], ['a']), 0.0); + assert.equal(jaccardSimilarity(['a'], []), 0.0); +}); + +test('jaccardSimilarity — duplicates deduplicated within each set', () => { + // [a,a,b] dedup → {a,b}; [a,b,b] dedup → {a,b}; identical → 1.0 + assert.equal(jaccardSimilarity(['a', 'a', 'b'], ['a', 'b', 'b']), 1.0); +}); + +test('jaccardSimilarity — fixture sets {α..ε} vs {α..ζ} → 0.833 (SC4 anchor)', () => { + // SC4 fixture math: A=5 IDs, B=A∪{ζ}=6 IDs, intersection=5, union=6 → 5/6 + const A = ['α', 'β', 'γ', 'δ', 'ε']; + const B = ['α', 'β', 'γ', 'δ', 'ε', 'ζ']; + const sim = jaccardSimilarity(A, B); + assert.ok(Math.abs(sim - 5 / 6) < 1e-9); + assert.ok(sim >= 0.70); // SC4 threshold +}); + +test('jaccardSimilarity — non-array input throws TypeError', () => { + assert.throws(() => jaccardSimilarity('a', ['b']), TypeError); + assert.throws(() => jaccardSimilarity(['a'], null), TypeError); +}); + +test('meetsThreshold — boundary 0.699 → false, 0.700 → true', () => { + assert.equal(meetsThreshold(0.699, 0.7), false); + assert.equal(meetsThreshold(0.7, 0.7), true); + assert.equal(meetsThreshold(0.71, 0.7), true); +}); + +test('meetsThreshold — non-finite or non-number → false', () => { + assert.equal(meetsThreshold(NaN, 0.7), false); + assert.equal(meetsThreshold(Infinity, 0.7), false); + assert.equal(meetsThreshold('0.8', 0.7), false); + assert.equal(meetsThreshold(0.8, null), false); +});