// lib/parsers/jaccard.mjs // Jaccard similarity for SC4 determinism floor. // // jaccard(A, B) = |A ∩ B| / |A ∪ B| // Inputs are arrays of strings; deduplicated internally. // Both empty → 1.0 (vacuously identical). One empty → 0.0. /** * Compute Jaccard similarity between two string sets. * @param {string[]} setA * @param {string[]} setB * @returns {number} similarity in [0, 1] */ export function jaccardSimilarity(setA, setB) { if (!Array.isArray(setA) || !Array.isArray(setB)) { throw new TypeError('jaccardSimilarity: both inputs must be arrays'); } const a = new Set(setA); const b = new Set(setB); if (a.size === 0 && b.size === 0) return 1.0; if (a.size === 0 || b.size === 0) return 0.0; let intersection = 0; for (const x of a) { if (b.has(x)) intersection += 1; } const union = a.size + b.size - intersection; return intersection / union; } /** * Check whether a similarity meets a threshold. * @param {number} similarity * @param {number} threshold * @returns {boolean} */ export function meetsThreshold(similarity, threshold) { if (typeof similarity !== 'number' || typeof threshold !== 'number') return false; if (!Number.isFinite(similarity) || !Number.isFinite(threshold)) return false; return similarity >= threshold; }