Session 5 of voyage-rebrand (V6). Operator-authorized cross-plugin scope. - git mv plugins/ultraplan-local plugins/voyage (rename detected, history preserved) - .claude-plugin/marketplace.json: voyage entry replaces ultraplan-local - CLAUDE.md: voyage row in plugin list, voyage in design-system consumer list - README.md: bulk rename ultra*-local commands -> trek* commands; ultraplan-local refs -> voyage; type discriminators (type: trekbrief/trekreview); session-title pattern (voyage:<command>:<slug>); v4.0.0 release-note paragraph - plugins/voyage/.claude-plugin/plugin.json: homepage/repository URLs point to monorepo voyage path - plugins/voyage/verify.sh: drop URL whitelist exception (no longer needed) Closes voyage-rebrand. bash plugins/voyage/verify.sh PASS 7/7. npm test 361/361.
63 lines
2.6 KiB
JavaScript
63 lines
2.6 KiB
JavaScript
// tests/synthetic/plan-determinism.test.mjs
|
|
// SC7 plan-determinism floor — Jaccard pipeline test.
|
|
//
|
|
// Reads two synthetic plan-run fixtures and asserts that
|
|
// jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) >= 0.833.
|
|
//
|
|
// This exercises the determinism pipeline (parser + jaccard) on a known
|
|
// input pair. It does NOT measure real-LLM determinism — that is deferred
|
|
// to a future run of the pipeline against examples/01-add-verbose-flag/.
|
|
|
|
import { test } from 'node:test';
|
|
import { strict as assert } from 'node:assert';
|
|
import { readFileSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
|
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
|
|
|
const HERE = dirname(fileURLToPath(import.meta.url));
|
|
const ROOT = join(HERE, '..', '..');
|
|
|
|
const SC7_THRESHOLD = 0.833;
|
|
|
|
function loadSteps(rel) {
|
|
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
|
const doc = parseDocument(text);
|
|
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
|
const steps = doc.parsed.frontmatter && doc.parsed.frontmatter.steps;
|
|
assert.ok(Array.isArray(steps), `frontmatter.steps of ${rel} is not an array`);
|
|
return steps;
|
|
}
|
|
|
|
test('plan determinism — Jaccard of synthetic plan-run-A vs plan-run-B meets SC7 threshold (0.833)', () => {
|
|
const a = loadSteps('tests/synthetic/plan-run-A.md');
|
|
const b = loadSteps('tests/synthetic/plan-run-B.md');
|
|
const sim = jaccardSimilarity(a, b);
|
|
assert.ok(
|
|
sim >= SC7_THRESHOLD,
|
|
`jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` +
|
|
`Fixtures may have drifted — re-tune step titles to restore the overlap.`,
|
|
);
|
|
});
|
|
|
|
test('plan determinism — both fixtures contain at least 30 unique step titles', () => {
|
|
for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
|
|
const steps = loadSteps(rel);
|
|
assert.ok(
|
|
new Set(steps).size >= 30,
|
|
`${rel}: < 30 unique step titles (got ${new Set(steps).size}). Synthetic fixtures must reflect a substantial plan.`,
|
|
);
|
|
}
|
|
});
|
|
|
|
test('plan determinism — no duplicate step titles within run', () => {
|
|
for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
|
|
const steps = loadSteps(rel);
|
|
assert.strictEqual(
|
|
new Set(steps).size,
|
|
steps.length,
|
|
`${rel}: contains duplicate step titles (${steps.length} entries vs ${new Set(steps).size} unique)`,
|
|
);
|
|
}
|
|
});
|