test(ultraplan-local): add review determinism integration test
3 integration tests using the run-A/run-B fixtures: - Jaccard(A, B) ≥ 0.70 (SC4 brief threshold) - IDs match 40-char hex shape (lib/parsers/finding-id.mjs format) - no duplicate IDs within a single run Tests the Jaccard PIPELINE; real-LLM determinism deferred to v1.1. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
5aa37941ed
commit
b69fdea883
1 changed files with 69 additions and 0 deletions
|
|
@ -0,0 +1,69 @@
|
|||
// tests/lib/review-determinism.test.mjs
|
||||
// SC4 determinism floor — Jaccard pipeline test.
|
||||
//
|
||||
// Reads two synthetic review-run fixtures (A ⊂ B), parses their findings
|
||||
// arrays from frontmatter, and asserts:
|
||||
// 1. Jaccard(A, B) ≥ 0.70 (the SC4 brief threshold)
|
||||
// 2. every finding-ID is 40-char hex (matches lib/parsers/finding-id.mjs format)
|
||||
// 3. no duplicate IDs within either run
|
||||
//
|
||||
// This test exercises the Jaccard PIPELINE on a known input. It does NOT
|
||||
// measure real-LLM determinism — that is deferred to v1.1, see
|
||||
// tests/fixtures/ultrareview/README.md.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
||||
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = join(HERE, '..', '..');
|
||||
|
||||
const HEX_ID_RE = /^[0-9a-f]{40}$/;
|
||||
const SC4_THRESHOLD = 0.70;
|
||||
|
||||
function loadFindings(rel) {
|
||||
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
||||
const doc = parseDocument(text);
|
||||
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
||||
const findings = doc.parsed.frontmatter && doc.parsed.frontmatter.findings;
|
||||
assert.ok(Array.isArray(findings), `frontmatter.findings of ${rel} is not an array`);
|
||||
return findings;
|
||||
}
|
||||
|
||||
test('review determinism — Jaccard of fixture run-A vs run-B meets SC4 threshold (0.70)', () => {
|
||||
const a = loadFindings('tests/fixtures/ultrareview/review-run-A.md');
|
||||
const b = loadFindings('tests/fixtures/ultrareview/review-run-B.md');
|
||||
const jaccard = jaccardSimilarity(a, b);
|
||||
assert.ok(
|
||||
jaccard >= SC4_THRESHOLD,
|
||||
`Jaccard(A, B) = ${jaccard} < ${SC4_THRESHOLD} (SC4 threshold). ` +
|
||||
`Fixtures may have drifted — recompute IDs via lib/parsers/finding-id.mjs.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('review determinism — finding IDs are 40-char hex', () => {
|
||||
for (const rel of ['tests/fixtures/ultrareview/review-run-A.md', 'tests/fixtures/ultrareview/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
for (const id of findings) {
|
||||
assert.ok(
|
||||
typeof id === 'string' && HEX_ID_RE.test(id),
|
||||
`${rel}: ID ${JSON.stringify(id)} is not a 40-char lowercase hex string`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('review determinism — no duplicate IDs within run', () => {
|
||||
for (const rel of ['tests/fixtures/ultrareview/review-run-A.md', 'tests/fixtures/ultrareview/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
assert.strictEqual(
|
||||
new Set(findings).size,
|
||||
findings.length,
|
||||
`${rel}: contains duplicate finding-IDs (${findings.length} entries vs ${new Set(findings).size} unique)`,
|
||||
);
|
||||
}
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue