ktg-plugin-marketplace/plugins/voyage/tests/synthetic/plan-determinism.test.mjs

// tests/synthetic/plan-determinism.test.mjs
// SC7 plan-determinism floor — Jaccard pipeline test.
//
// Reads two synthetic plan-run fixtures and asserts that
// jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) >= 0.833.
//
// This exercises the determinism pipeline (parser + jaccard) on a known
// input pair. It does NOT measure real-LLM determinism — that is deferred
// to a future run of the pipeline against examples/01-add-verbose-flag/.

import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import { readFileSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
import { parseDocument } from '../../lib/util/frontmatter.mjs';

const HERE = dirname(fileURLToPath(import.meta.url));
const ROOT = join(HERE, '..', '..');

const SC7_THRESHOLD = 0.833;

function loadSteps(rel) {
  const text = readFileSync(join(ROOT, rel), 'utf-8');
  const doc = parseDocument(text);
  assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
  const steps = doc.parsed.frontmatter && doc.parsed.frontmatter.steps;
  assert.ok(Array.isArray(steps), `frontmatter.steps of ${rel} is not an array`);
  return steps;
}

test('plan determinism — Jaccard of synthetic plan-run-A vs plan-run-B meets SC7 threshold (0.833)', () => {
  const a = loadSteps('tests/synthetic/plan-run-A.md');
  const b = loadSteps('tests/synthetic/plan-run-B.md');
  const sim = jaccardSimilarity(a, b);
  assert.ok(
    sim >= SC7_THRESHOLD,
    `jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` +
    `Fixtures may have drifted — re-tune step titles to restore the overlap.`,
  );
});

test('plan determinism — both fixtures contain at least 30 unique step titles', () => {
  for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
    const steps = loadSteps(rel);
    assert.ok(
      new Set(steps).size >= 30,
      `${rel}: < 30 unique step titles (got ${new Set(steps).size}). Synthetic fixtures must reflect a substantial plan.`,
    );
  }
});

test('plan determinism — no duplicate step titles within run', () => {
  for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
    const steps = loadSteps(rel);
    assert.strictEqual(
      new Set(steps).size,
      steps.length,
      `${rel}: contains duplicate step titles (${steps.length} entries vs ${new Set(steps).size} unique)`,
    );
  }
});

// --- v4.1 forward-compat block (SC #10) ---
//
// Adding the optional frontmatter key `profile_used` (Step 3 OPTIONAL_STRING_KEYS)
// must not break parsing of EITHER:
//   - Existing plans WITHOUT profile_used (plan-run-A.md, plan-run-B.md)
//   - New plans WITH profile_used (profile-plan-run-{economy,premium}-*.md)
//
// This is the forward-compat assertion required by Step 19. Extend-in-place
// keeps the determinism + forward-compat checks colocated.

test('plan determinism — forward-compat: legacy fixtures (no profile_used) parse cleanly', () => {
  for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
    const text = readFileSync(join(ROOT, rel), 'utf-8');
    const doc = parseDocument(text);
    assert.ok(doc.valid, `${rel}: frontmatter parse failed: ${(doc.errors || []).map((e) => e.message).join(', ')}`);
    assert.equal(
      doc.parsed.frontmatter.profile_used,
      undefined,
      `${rel}: legacy fixture must NOT have profile_used set`,
    );
    assert.ok(
      Array.isArray(doc.parsed.frontmatter.steps),
      `${rel}: steps array still loads after parser extension`,
    );
  }
});

test('plan determinism — forward-compat: new fixtures with profile_used parse cleanly', () => {
  const cases = [
    { rel: 'tests/synthetic/profile-plan-run-economy-1.md', profile: 'economy' },
    { rel: 'tests/synthetic/profile-plan-run-economy-2.md', profile: 'economy' },
    { rel: 'tests/synthetic/profile-plan-run-premium-1.md', profile: 'premium' },
    { rel: 'tests/synthetic/profile-plan-run-premium-2.md', profile: 'premium' },
  ];
  for (const { rel, profile } of cases) {
    const text = readFileSync(join(ROOT, rel), 'utf-8');
    const doc = parseDocument(text);
    assert.ok(doc.valid, `${rel}: frontmatter parse failed: ${(doc.errors || []).map((e) => e.message).join(', ')}`);
    assert.equal(
      doc.parsed.frontmatter.profile_used,
      profile,
      `${rel}: profile_used must be ${profile}`,
    );
    assert.ok(
      Array.isArray(doc.parsed.frontmatter.steps) && doc.parsed.frontmatter.steps.length >= 10,
      `${rel}: steps array must be non-empty`,
    );
  }
});

test('plan determinism — forward-compat: synthetic v1.7 plan validates with --strict (no PLAN_VERSION_MISMATCH)', async () => {
  // Sanity check that adding profile_used to manifest-yaml schema doesn't
  // regress full plan-validator strict-mode behaviour on a v1.7 plan with
  // standard step + manifest structure. Uses a committed synthetic fixture
  // (plan-run-C.md) instead of a gitignored project plan so the assertion
  // is stable across worktrees and headless runs.
  const fixturePlan = 'tests/synthetic/plan-run-C.md';
  const { validatePlan } = await import('../../lib/validators/plan-validator.mjs');
  const result = await validatePlan(join(ROOT, fixturePlan), { strict: true });
  assert.equal(result.valid, true, `synthetic plan must validate strict: ${JSON.stringify(result.errors)}`);
  const versionMismatch = (result.warnings || []).find((w) => w.code === 'PLAN_VERSION_MISMATCH');
  assert.equal(versionMismatch, undefined, 'synthetic plan must NOT emit PLAN_VERSION_MISMATCH warning');
});