feat(ultraplan-local): add lib/parsers/jaccard.mjs

This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 13:28:44 +02:00
commit cf56fbbe27
2 changed files with 97 additions and 0 deletions

View file

@ -0,0 +1,41 @@
// lib/parsers/jaccard.mjs
// Jaccard similarity for SC4 determinism floor.
//
// jaccard(A, B) = |A ∩ B| / |A B|
// Inputs are arrays of strings; deduplicated internally.
// Both empty → 1.0 (vacuously identical). One empty → 0.0.
/**
* Compute Jaccard similarity between two string sets.
* @param {string[]} setA
* @param {string[]} setB
* @returns {number} similarity in [0, 1]
*/
export function jaccardSimilarity(setA, setB) {
if (!Array.isArray(setA) || !Array.isArray(setB)) {
throw new TypeError('jaccardSimilarity: both inputs must be arrays');
}
const a = new Set(setA);
const b = new Set(setB);
if (a.size === 0 && b.size === 0) return 1.0;
if (a.size === 0 || b.size === 0) return 0.0;
let intersection = 0;
for (const x of a) {
if (b.has(x)) intersection += 1;
}
const union = a.size + b.size - intersection;
return intersection / union;
}
/**
* Check whether a similarity meets a threshold.
* @param {number} similarity
* @param {number} threshold
* @returns {boolean}
*/
export function meetsThreshold(similarity, threshold) {
if (typeof similarity !== 'number' || typeof threshold !== 'number') return false;
if (!Number.isFinite(similarity) || !Number.isFinite(threshold)) return false;
return similarity >= threshold;
}

View file

@ -0,0 +1,56 @@
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import { jaccardSimilarity, meetsThreshold } from '../../lib/parsers/jaccard.mjs';
test('jaccardSimilarity — identical sets → 1.0', () => {
assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['a', 'b', 'c']), 1.0);
});
test('jaccardSimilarity — disjoint sets → 0.0', () => {
assert.equal(jaccardSimilarity(['a', 'b'], ['c', 'd']), 0.0);
});
test('jaccardSimilarity — partial overlap [a,b,c] vs [b,c,d] → 0.5', () => {
assert.equal(jaccardSimilarity(['a', 'b', 'c'], ['b', 'c', 'd']), 0.5);
});
test('jaccardSimilarity — both empty → 1.0', () => {
assert.equal(jaccardSimilarity([], []), 1.0);
});
test('jaccardSimilarity — one empty → 0.0', () => {
assert.equal(jaccardSimilarity([], ['a']), 0.0);
assert.equal(jaccardSimilarity(['a'], []), 0.0);
});
test('jaccardSimilarity — duplicates deduplicated within each set', () => {
// [a,a,b] dedup → {a,b}; [a,b,b] dedup → {a,b}; identical → 1.0
assert.equal(jaccardSimilarity(['a', 'a', 'b'], ['a', 'b', 'b']), 1.0);
});
test('jaccardSimilarity — fixture sets {α..ε} vs {α..ζ} → 0.833 (SC4 anchor)', () => {
// SC4 fixture math: A=5 IDs, B=A{ζ}=6 IDs, intersection=5, union=6 → 5/6
const A = ['α', 'β', 'γ', 'δ', 'ε'];
const B = ['α', 'β', 'γ', 'δ', 'ε', 'ζ'];
const sim = jaccardSimilarity(A, B);
assert.ok(Math.abs(sim - 5 / 6) < 1e-9);
assert.ok(sim >= 0.70); // SC4 threshold
});
test('jaccardSimilarity — non-array input throws TypeError', () => {
assert.throws(() => jaccardSimilarity('a', ['b']), TypeError);
assert.throws(() => jaccardSimilarity(['a'], null), TypeError);
});
test('meetsThreshold — boundary 0.699 → false, 0.700 → true', () => {
assert.equal(meetsThreshold(0.699, 0.7), false);
assert.equal(meetsThreshold(0.7, 0.7), true);
assert.equal(meetsThreshold(0.71, 0.7), true);
});
test('meetsThreshold — non-finite or non-number → false', () => {
assert.equal(meetsThreshold(NaN, 0.7), false);
assert.equal(meetsThreshold(Infinity, 0.7), false);
assert.equal(meetsThreshold('0.8', 0.7), false);
assert.equal(meetsThreshold(0.8, null), false);
});