// lib/parsers/profile-jaccard.mjs // String-normalisering helper for cross-tier Jaccard smoke-test (Step 18). // // Plan steps from different model tiers (sonnet vs opus) often differ // only in punctuation, casing, or trivial wording (`logger.info` vs // "logger.info" vs `logger info`). To avoid trivial false-negatives in // cross-tier Jaccard, every step title passes through `normalizeStep` // before set membership is computed. // // Normalisering rules (per research/02 §3.4): // 1. Lowercase the entire string. // 2. Strip backticks and parentheses (`...` and (...)). // 3. Collapse runs of whitespace to a single space. // 4. Trim leading + trailing whitespace. // // We do NOT stem or lemmatize — that would over-normalize and mask real // disagreement (e.g. "Add tests for X" vs "Verify tests for X" should // remain distinct). /** * Normalize a single step-title string. * @param {unknown} step * @returns {string} */ export function normalizeStep(step) { if (typeof step !== 'string') return ''; return step .toLowerCase() .replace(/[`()]/g, '') .replace(/\s+/g, ' ') .trim(); } /** * Normalize an array of step titles. * @param {string[]} steps * @returns {string[]} */ export function normalizeSteps(steps) { if (!Array.isArray(steps)) return []; return steps.map(normalizeStep).filter((s) => s.length > 0); } /** * Verify step-count parity within a tolerance band. * @param {string[]} stepsA * @param {string[]} stepsB * @param {number} tolerance fraction (default 0.20 = ±20%) * @returns {{ok: boolean, ratio: number, message: string}} */ export function checkStepCountParity(stepsA, stepsB, tolerance = 0.2) { const a = Array.isArray(stepsA) ? stepsA.length : 0; const b = Array.isArray(stepsB) ? stepsB.length : 0; if (a === 0 || b === 0) { return { ok: false, ratio: 0, message: `step-count parity failed: empty input (a=${a}, b=${b})`, }; } const ratio = Math.abs(a - b) / Math.max(a, b); return { ok: ratio <= tolerance, ratio, message: ratio <= tolerance ? `step-count parity OK (a=${a}, b=${b}, ratio=${ratio.toFixed(3)})` : `step-count parity exceeded ${tolerance}: a=${a}, b=${b}, ratio=${ratio.toFixed(3)}`, }; }