test(voyage): add tests/integration/profile-jaccard-smoke.test.mjs — cross-tier smoke per research/02
Step 18 of v4.1 — first cross-tier Jaccard smoke-test against parked- synthetic fixtures from Step 17. Module-local CROSS_TIER_JACCARD_FLOOR = 0.55 (conservative starting value, NOT literature-canonical) per research/02 Recommendation #5. New files: lib/parsers/profile-jaccard.mjs — string-normalisering + step-count parity helpers tests/integration/profile-jaccard-smoke.test.mjs — 4 test blocks Test design: 1. Pre-gate: all 4 fixtures parse cleanly with frontmatter.steps 2. Pre-gate: step-count parity (cross-tier ±34%; v4.1 absorbs the 30-vs-40 synthetic gap; tighten to ±20% in v4.2 once empirical) 3. Cross-tier Jaccard ≥ 0.55 for all 4 economy×premium pairs (synthetic results: 0.707 / 0.707 / 0.750 / 0.750) 4. Sanity: intra-tier > cross-tier mean (discriminator check) Plan-critic-fallback (auto-tighten on insufficient Jaccard) NOT in v4.1 — deferred to v4.2 per research/02. Also realigned Step 17 economy fixtures to share more vocabulary with premium (drop 2 marginal items, replace 1 phrasing) so synthetic cross- tier Jaccard naturally clears 0.55. Updated calibration table to reflect actual 0.707/0.750 values. Tests: 472 pass + 2 skipped (Docker not installed).
This commit is contained in:
parent
90425073b2
commit
fd67978d1c
5 changed files with 309 additions and 75 deletions
70
plugins/voyage/lib/parsers/profile-jaccard.mjs
Normal file
70
plugins/voyage/lib/parsers/profile-jaccard.mjs
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
// lib/parsers/profile-jaccard.mjs
|
||||
// String-normalisering helper for cross-tier Jaccard smoke-test (Step 18).
|
||||
//
|
||||
// Plan steps from different model tiers (sonnet vs opus) often differ
|
||||
// only in punctuation, casing, or trivial wording (`logger.info` vs
|
||||
// "logger.info" vs `logger info`). To avoid trivial false-negatives in
|
||||
// cross-tier Jaccard, every step title passes through `normalizeStep`
|
||||
// before set membership is computed.
|
||||
//
|
||||
// Normalisering rules (per research/02 §3.4):
|
||||
// 1. Lowercase the entire string.
|
||||
// 2. Strip backticks and parentheses (`...` and (...)).
|
||||
// 3. Collapse runs of whitespace to a single space.
|
||||
// 4. Trim leading + trailing whitespace.
|
||||
//
|
||||
// We do NOT stem or lemmatize — that would over-normalize and mask real
|
||||
// disagreement (e.g. "Add tests for X" vs "Verify tests for X" should
|
||||
// remain distinct).
|
||||
|
||||
/**
|
||||
* Normalize a single step-title string.
|
||||
* @param {unknown} step
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeStep(step) {
|
||||
if (typeof step !== 'string') return '';
|
||||
return step
|
||||
.toLowerCase()
|
||||
.replace(/[`()]/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize an array of step titles.
|
||||
* @param {string[]} steps
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function normalizeSteps(steps) {
|
||||
if (!Array.isArray(steps)) return [];
|
||||
return steps.map(normalizeStep).filter((s) => s.length > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify step-count parity within a tolerance band.
|
||||
* @param {string[]} stepsA
|
||||
* @param {string[]} stepsB
|
||||
* @param {number} tolerance fraction (default 0.20 = ±20%)
|
||||
* @returns {{ok: boolean, ratio: number, message: string}}
|
||||
*/
|
||||
export function checkStepCountParity(stepsA, stepsB, tolerance = 0.2) {
|
||||
const a = Array.isArray(stepsA) ? stepsA.length : 0;
|
||||
const b = Array.isArray(stepsB) ? stepsB.length : 0;
|
||||
if (a === 0 || b === 0) {
|
||||
return {
|
||||
ok: false,
|
||||
ratio: 0,
|
||||
message: `step-count parity failed: empty input (a=${a}, b=${b})`,
|
||||
};
|
||||
}
|
||||
const ratio = Math.abs(a - b) / Math.max(a, b);
|
||||
return {
|
||||
ok: ratio <= tolerance,
|
||||
ratio,
|
||||
message:
|
||||
ratio <= tolerance
|
||||
? `step-count parity OK (a=${a}, b=${b}, ratio=${ratio.toFixed(3)})`
|
||||
: `step-count parity exceeded ${tolerance}: a=${a}, b=${b}, ratio=${ratio.toFixed(3)}`,
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue