test(ultraplan-local): add plan-determinism + review-determinism synthetic fixtures (SC7 floor)
Adds 6 files in tests/synthetic/ exercising the determinism pipeline at the SC7 brief floor (Jaccard >= 0.833). Plan fixture pair: 40 step titles each with 38 shared (Jaccard 0.905). Review fixture pair: 30 finding-IDs each with 28 shared (Jaccard 0.875). Reuses lib/parsers/jaccard.mjs + lib/parsers/finding-id.mjs. The new pair coexists with tests/lib/review-determinism.test.mjs which holds the older SC4 (0.70) floor against tests/fixtures/ultrareview/. The lower floor protects pipeline regressions; the higher floor anchors the speedup brief's determinism aspiration. [skip-docs]
This commit is contained in:
parent
b1738b419c
commit
0c0a87e709
6 changed files with 425 additions and 0 deletions
|
|
@ -0,0 +1,79 @@
|
|||
// tests/synthetic/review-determinism.test.mjs
|
||||
// SC7 review-determinism floor — Jaccard pipeline test.
|
||||
//
|
||||
// Reads two synthetic review-run fixtures and asserts that
|
||||
// jaccardSimilarity(findingTokens(reviewA), findingTokens(reviewB)) >= 0.833.
|
||||
//
|
||||
// This is the SC7 (higher) floor. The companion
|
||||
// tests/lib/review-determinism.test.mjs holds the SC4 (0.70) floor against
|
||||
// tests/fixtures/ultrareview/. Both pairs coexist on purpose: the lower
|
||||
// floor protects against pipeline regressions, the higher one anchors the
|
||||
// determinism aspiration set in the speedup brief.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
||||
import { parseFindingId } from '../../lib/parsers/finding-id.mjs';
|
||||
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = join(HERE, '..', '..');
|
||||
|
||||
const SC7_THRESHOLD = 0.833;
|
||||
|
||||
function loadFindings(rel) {
|
||||
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
||||
const doc = parseDocument(text);
|
||||
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
||||
const findings = doc.parsed.frontmatter && doc.parsed.frontmatter.findings;
|
||||
assert.ok(Array.isArray(findings), `frontmatter.findings of ${rel} is not an array`);
|
||||
return findings;
|
||||
}
|
||||
|
||||
test('review determinism — Jaccard of synthetic review-run-A vs review-run-B meets SC7 threshold (0.833)', () => {
|
||||
const a = loadFindings('tests/synthetic/review-run-A.md');
|
||||
const b = loadFindings('tests/synthetic/review-run-B.md');
|
||||
const sim = jaccardSimilarity(a, b);
|
||||
assert.ok(
|
||||
sim >= SC7_THRESHOLD,
|
||||
`jaccardSimilarity(findingTokens(reviewA), findingTokens(reviewB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` +
|
||||
`Fixtures may have drifted — recompute IDs via lib/parsers/finding-id.mjs.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('review determinism — finding IDs are 40-char hex (parseFindingId valid)', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
for (const id of findings) {
|
||||
const parsed = parseFindingId(id);
|
||||
assert.ok(
|
||||
parsed.valid,
|
||||
`${rel}: ID ${JSON.stringify(id)} is not a 40-char lowercase hex string (parseFindingId rejected it)`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('review determinism — both fixtures contain at least 25 unique finding-IDs', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
assert.ok(
|
||||
new Set(findings).size >= 25,
|
||||
`${rel}: < 25 unique finding-IDs (got ${new Set(findings).size}). Synthetic fixtures must reflect a substantial review.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('review determinism — no duplicate IDs within run', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
assert.strictEqual(
|
||||
new Set(findings).size,
|
||||
findings.length,
|
||||
`${rel}: contains duplicate finding-IDs (${findings.length} entries vs ${new Set(findings).size} unique)`,
|
||||
);
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue