// tests/synthetic/plan-determinism.test.mjs // SC7 plan-determinism floor — Jaccard pipeline test. // // Reads two synthetic plan-run fixtures and asserts that // jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) >= 0.833. // // This exercises the determinism pipeline (parser + jaccard) on a known // input pair. It does NOT measure real-LLM determinism — that is deferred // to a future run of the pipeline against examples/01-add-verbose-flag/. import { test } from 'node:test'; import { strict as assert } from 'node:assert'; import { readFileSync } from 'node:fs'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs'; import { parseDocument } from '../../lib/util/frontmatter.mjs'; const HERE = dirname(fileURLToPath(import.meta.url)); const ROOT = join(HERE, '..', '..'); const SC7_THRESHOLD = 0.833; function loadSteps(rel) { const text = readFileSync(join(ROOT, rel), 'utf-8'); const doc = parseDocument(text); assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`); const steps = doc.parsed.frontmatter && doc.parsed.frontmatter.steps; assert.ok(Array.isArray(steps), `frontmatter.steps of ${rel} is not an array`); return steps; } test('plan determinism — Jaccard of synthetic plan-run-A vs plan-run-B meets SC7 threshold (0.833)', () => { const a = loadSteps('tests/synthetic/plan-run-A.md'); const b = loadSteps('tests/synthetic/plan-run-B.md'); const sim = jaccardSimilarity(a, b); assert.ok( sim >= SC7_THRESHOLD, `jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` + `Fixtures may have drifted — re-tune step titles to restore the overlap.`, ); }); test('plan determinism — both fixtures contain at least 30 unique step titles', () => { for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) { const steps = loadSteps(rel); assert.ok( new Set(steps).size >= 30, `${rel}: < 30 unique step titles (got ${new Set(steps).size}). Synthetic fixtures must reflect a substantial plan.`, ); } }); test('plan determinism — no duplicate step titles within run', () => { for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) { const steps = loadSteps(rel); assert.strictEqual( new Set(steps).size, steps.length, `${rel}: contains duplicate step titles (${steps.length} entries vs ${new Set(steps).size} unique)`, ); } });