// tests/lib/plan-review-dedup.test.mjs // Cover lib/review/plan-review-dedup.mjs: // - identical findings dedupe to 1 (exact-id path) // - distinct findings stay separate // - jaccard threshold 0.7 catches near-duplicates // - empty / missing payloads tolerated // - CLI shim emits parseable JSON on stdout import { test } from 'node:test'; import { strict as assert } from 'node:assert'; import { execFileSync } from 'node:child_process'; import { writeFileSync, mkdtempSync, rmSync } from 'node:fs'; import { dirname, join } from 'node:path'; import { tmpdir } from 'node:os'; import { fileURLToPath } from 'node:url'; import { dedupFindings, tokenize, DEFAULT_THRESHOLD } from '../../lib/review/plan-review-dedup.mjs'; const HERE = dirname(fileURLToPath(import.meta.url)); const SHIM = join(HERE, '..', '..', 'lib', 'review', 'plan-review-dedup.mjs'); function tmp(prefix = 'plan-review-dedup-') { return mkdtempSync(join(tmpdir(), prefix)); } test('tokenize splits on non-word and lowercases', () => { assert.deepEqual( tokenize('Step 4 LACKS verifiable acceptance!'), ['step', '4', 'lacks', 'verifiable', 'acceptance'], ); assert.deepEqual(tokenize(''), []); assert.deepEqual(tokenize(undefined), []); }); test('DEFAULT_THRESHOLD is 0.7 per plan-v2 spec', () => { assert.equal(DEFAULT_THRESHOLD, 0.7); }); test('identical findings (same file/line/rule_key) dedupe to 1, raised_by merged', () => { const sources = [ { agent: 'plan-critic', payload: { agent: 'plan-critic', findings: [{ file: 'plan.md', line: 42, rule_key: 'PC1', text: 'Step 4 lacks verifiable acceptance criteria' }] } }, { agent: 'scope-guardian', payload: { agent: 'scope-guardian', findings: [{ file: 'plan.md', line: 42, rule_key: 'PC1', text: 'Step 4 lacks verifiable acceptance criteria' }] } }, ]; const r = dedupFindings(sources); assert.equal(r.findings.length, 1); assert.deepEqual(r.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']); assert.equal(r.dedup_stats.total_in, 2); assert.equal(r.dedup_stats.total_out, 1); assert.equal(r.dedup_stats.exact_id_dups, 1); }); test('distinct findings (different file/line/rule_key) stay separate', () => { const sources = [ { agent: 'plan-critic', payload: { findings: [ { file: 'plan.md', line: 10, rule_key: 'PC1', text: 'thing one' }, { file: 'plan.md', line: 20, rule_key: 'PC2', text: 'thing two unrelated entirely' }, ] } }, ]; const r = dedupFindings(sources); assert.equal(r.findings.length, 2); assert.equal(r.dedup_stats.exact_id_dups, 0); assert.equal(r.dedup_stats.jaccard_dups, 0); }); test('jaccard ≥ 0.7 on near-duplicate text merges (different file/line so id differs)', () => { const sources = [ { agent: 'plan-critic', payload: { findings: [{ file: 'plan.md', line: 10, rule_key: 'PC1', text: 'step lacks verifiable acceptance criteria for path A' }] } }, { agent: 'scope-guardian', payload: { findings: [{ file: 'plan.md', line: 11, rule_key: 'SG1', text: 'step lacks verifiable acceptance criteria for path A' }] } }, ]; const r = dedupFindings(sources); assert.equal(r.findings.length, 1, 'jaccard merge should collapse near-duplicates'); assert.deepEqual(r.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']); assert.equal(r.dedup_stats.jaccard_dups, 1); }); test('jaccard below threshold keeps both findings separate', () => { const sources = [ { agent: 'plan-critic', payload: { findings: [{ file: 'a.md', line: 1, rule_key: 'PC1', text: 'database migration risk' }] } }, { agent: 'scope-guardian', payload: { findings: [{ file: 'b.md', line: 2, rule_key: 'SG1', text: 'unrelated frontend hover state polish' }] } }, ]; const r = dedupFindings(sources); assert.equal(r.findings.length, 2); assert.equal(r.dedup_stats.jaccard_dups, 0); }); test('empty / missing payloads tolerated (single-agent input)', () => { const r = dedupFindings([ { agent: 'plan-critic', payload: { findings: [{ file: 'a.md', line: 1, rule_key: 'PC1', text: 'one' }] } }, { agent: 'scope-guardian', payload: null }, ]); assert.equal(r.findings.length, 1); assert.deepEqual(r.findings[0].raised_by, ['plan-critic']); }); test('all sources empty → empty result, dedup_stats zeros', () => { const r = dedupFindings([ { agent: 'plan-critic', payload: null }, { agent: 'scope-guardian', payload: { findings: [] } }, ]); assert.equal(r.findings.length, 0); assert.equal(r.dedup_stats.total_in, 0); assert.equal(r.dedup_stats.total_out, 0); }); test('CLI shim parses input files and emits valid deduped JSON', () => { const dir = tmp(); try { const planCritic = join(dir, 'pc.json'); const scopeGuardian = join(dir, 'sg.json'); writeFileSync(planCritic, JSON.stringify({ agent: 'plan-critic', findings: [{ file: 'plan.md', line: 5, rule_key: 'PC1', text: 'duplicate finding shared by both' }], })); writeFileSync(scopeGuardian, JSON.stringify({ agent: 'scope-guardian', findings: [{ file: 'plan.md', line: 5, rule_key: 'PC1', text: 'duplicate finding shared by both' }], })); const out = execFileSync(process.execPath, [ SHIM, '--plan-critic', planCritic, '--scope-guardian', scopeGuardian, ], { encoding: 'utf-8' }); const parsed = JSON.parse(out); assert.equal(parsed.findings.length, 1); assert.deepEqual(parsed.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']); assert.equal(parsed.dedup_stats.total_out, 1); } finally { rmSync(dir, { recursive: true, force: true }); } }); test('CLI shim tolerates missing input files (returns empty deduped JSON)', () => { const out = execFileSync(process.execPath, [SHIM], { encoding: 'utf-8' }); const parsed = JSON.parse(out); assert.equal(parsed.findings.length, 0); assert.equal(parsed.dedup_stats.total_in, 0); });