Session 5 of voyage-rebrand (V6). Operator-authorized cross-plugin scope. - git mv plugins/ultraplan-local plugins/voyage (rename detected, history preserved) - .claude-plugin/marketplace.json: voyage entry replaces ultraplan-local - CLAUDE.md: voyage row in plugin list, voyage in design-system consumer list - README.md: bulk rename ultra*-local commands -> trek* commands; ultraplan-local refs -> voyage; type discriminators (type: trekbrief/trekreview); session-title pattern (voyage:<command>:<slug>); v4.0.0 release-note paragraph - plugins/voyage/.claude-plugin/plugin.json: homepage/repository URLs point to monorepo voyage path - plugins/voyage/verify.sh: drop URL whitelist exception (no longer needed) Closes voyage-rebrand. bash plugins/voyage/verify.sh PASS 7/7. npm test 361/361.
69 lines
2.8 KiB
JavaScript
69 lines
2.8 KiB
JavaScript
// tests/lib/review-determinism.test.mjs
|
|
// SC4 determinism floor — Jaccard pipeline test.
|
|
//
|
|
// Reads two synthetic review-run fixtures (A ⊂ B), parses their findings
|
|
// arrays from frontmatter, and asserts:
|
|
// 1. Jaccard(A, B) ≥ 0.70 (the SC4 brief threshold)
|
|
// 2. every finding-ID is 40-char hex (matches lib/parsers/finding-id.mjs format)
|
|
// 3. no duplicate IDs within either run
|
|
//
|
|
// This test exercises the Jaccard PIPELINE on a known input. It does NOT
|
|
// measure real-LLM determinism — that is deferred to v1.1, see
|
|
// tests/fixtures/trekreview/README.md.
|
|
|
|
import { test } from 'node:test';
|
|
import { strict as assert } from 'node:assert';
|
|
import { readFileSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
|
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
|
|
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
const ROOT = join(HERE, '..', '..');
|
|
|
|
const HEX_ID_RE = /^[0-9a-f]{40}$/;
|
|
const SC4_THRESHOLD = 0.70;
|
|
|
|
function loadFindings(rel) {
|
|
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
|
const doc = parseDocument(text);
|
|
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
|
const findings = doc.parsed.frontmatter && doc.parsed.frontmatter.findings;
|
|
assert.ok(Array.isArray(findings), `frontmatter.findings of ${rel} is not an array`);
|
|
return findings;
|
|
}
|
|
|
|
test('review determinism — Jaccard of fixture run-A vs run-B meets SC4 threshold (0.70)', () => {
|
|
const a = loadFindings('tests/fixtures/trekreview/review-run-A.md');
|
|
const b = loadFindings('tests/fixtures/trekreview/review-run-B.md');
|
|
const jaccard = jaccardSimilarity(a, b);
|
|
assert.ok(
|
|
jaccard >= SC4_THRESHOLD,
|
|
`Jaccard(A, B) = ${jaccard} < ${SC4_THRESHOLD} (SC4 threshold). ` +
|
|
`Fixtures may have drifted — recompute IDs via lib/parsers/finding-id.mjs.`,
|
|
);
|
|
});
|
|
|
|
test('review determinism — finding IDs are 40-char hex', () => {
|
|
for (const rel of ['tests/fixtures/trekreview/review-run-A.md', 'tests/fixtures/trekreview/review-run-B.md']) {
|
|
const findings = loadFindings(rel);
|
|
for (const id of findings) {
|
|
assert.ok(
|
|
typeof id === 'string' && HEX_ID_RE.test(id),
|
|
`${rel}: ID ${JSON.stringify(id)} is not a 40-char lowercase hex string`,
|
|
);
|
|
}
|
|
}
|
|
});
|
|
|
|
test('review determinism — no duplicate IDs within run', () => {
|
|
for (const rel of ['tests/fixtures/trekreview/review-run-A.md', 'tests/fixtures/trekreview/review-run-B.md']) {
|
|
const findings = loadFindings(rel);
|
|
assert.strictEqual(
|
|
new Set(findings).size,
|
|
findings.length,
|
|
`${rel}: contains duplicate finding-IDs (${findings.length} entries vs ${new Set(findings).size} unique)`,
|
|
);
|
|
}
|
|
});
|