feat(ultraplan-local): add plan-review-dedup helper for Phase 9 finding dedup
Step 5 of plan-v2 (ultra-pipeline-speedup).
lib/review/plan-review-dedup.mjs (NEW)
Two-pass dedup:
1. Exact match — identical computeFindingId(file:line:rule_key) → merge.
2. Jaccard ≥ 0.7 on text-token sets → merge near-duplicates.
Provenance preserved in surviving finding's raised_by[] (which agents
raised it). Reuses lib/parsers/jaccard.mjs + lib/parsers/finding-id.mjs.
CLI shim:
node lib/review/plan-review-dedup.mjs \
--plan-critic /tmp/x.json --scope-guardian /tmp/y.json
Missing inputs tolerated (single-agent review still works).
Tests: 10 (tokenize + threshold + 6 dedup-logic cases + 2 CLI shim).
[skip-docs]
This commit is contained in:
parent
645f01625b
commit
bed14eae4a
2 changed files with 299 additions and 0 deletions
134
plugins/ultraplan-local/tests/lib/plan-review-dedup.test.mjs
Normal file
134
plugins/ultraplan-local/tests/lib/plan-review-dedup.test.mjs
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
// tests/lib/plan-review-dedup.test.mjs
|
||||
// Cover lib/review/plan-review-dedup.mjs:
|
||||
// - identical findings dedupe to 1 (exact-id path)
|
||||
// - distinct findings stay separate
|
||||
// - jaccard threshold 0.7 catches near-duplicates
|
||||
// - empty / missing payloads tolerated
|
||||
// - CLI shim emits parseable JSON on stdout
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import { writeFileSync, mkdtempSync, rmSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dedupFindings, tokenize, DEFAULT_THRESHOLD } from '../../lib/review/plan-review-dedup.mjs';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const SHIM = join(HERE, '..', '..', 'lib', 'review', 'plan-review-dedup.mjs');
|
||||
|
||||
function tmp(prefix = 'plan-review-dedup-') {
|
||||
return mkdtempSync(join(tmpdir(), prefix));
|
||||
}
|
||||
|
||||
test('tokenize splits on non-word and lowercases', () => {
|
||||
assert.deepEqual(
|
||||
tokenize('Step 4 LACKS verifiable acceptance!'),
|
||||
['step', '4', 'lacks', 'verifiable', 'acceptance'],
|
||||
);
|
||||
assert.deepEqual(tokenize(''), []);
|
||||
assert.deepEqual(tokenize(undefined), []);
|
||||
});
|
||||
|
||||
test('DEFAULT_THRESHOLD is 0.7 per plan-v2 spec', () => {
|
||||
assert.equal(DEFAULT_THRESHOLD, 0.7);
|
||||
});
|
||||
|
||||
test('identical findings (same file/line/rule_key) dedupe to 1, raised_by merged', () => {
|
||||
const sources = [
|
||||
{ agent: 'plan-critic', payload: { agent: 'plan-critic', findings: [{ file: 'plan.md', line: 42, rule_key: 'PC1', text: 'Step 4 lacks verifiable acceptance criteria' }] } },
|
||||
{ agent: 'scope-guardian', payload: { agent: 'scope-guardian', findings: [{ file: 'plan.md', line: 42, rule_key: 'PC1', text: 'Step 4 lacks verifiable acceptance criteria' }] } },
|
||||
];
|
||||
const r = dedupFindings(sources);
|
||||
assert.equal(r.findings.length, 1);
|
||||
assert.deepEqual(r.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']);
|
||||
assert.equal(r.dedup_stats.total_in, 2);
|
||||
assert.equal(r.dedup_stats.total_out, 1);
|
||||
assert.equal(r.dedup_stats.exact_id_dups, 1);
|
||||
});
|
||||
|
||||
test('distinct findings (different file/line/rule_key) stay separate', () => {
|
||||
const sources = [
|
||||
{ agent: 'plan-critic', payload: { findings: [
|
||||
{ file: 'plan.md', line: 10, rule_key: 'PC1', text: 'thing one' },
|
||||
{ file: 'plan.md', line: 20, rule_key: 'PC2', text: 'thing two unrelated entirely' },
|
||||
] } },
|
||||
];
|
||||
const r = dedupFindings(sources);
|
||||
assert.equal(r.findings.length, 2);
|
||||
assert.equal(r.dedup_stats.exact_id_dups, 0);
|
||||
assert.equal(r.dedup_stats.jaccard_dups, 0);
|
||||
});
|
||||
|
||||
test('jaccard ≥ 0.7 on near-duplicate text merges (different file/line so id differs)', () => {
|
||||
const sources = [
|
||||
{ agent: 'plan-critic', payload: { findings: [{ file: 'plan.md', line: 10, rule_key: 'PC1', text: 'step lacks verifiable acceptance criteria for path A' }] } },
|
||||
{ agent: 'scope-guardian', payload: { findings: [{ file: 'plan.md', line: 11, rule_key: 'SG1', text: 'step lacks verifiable acceptance criteria for path A' }] } },
|
||||
];
|
||||
const r = dedupFindings(sources);
|
||||
assert.equal(r.findings.length, 1, 'jaccard merge should collapse near-duplicates');
|
||||
assert.deepEqual(r.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']);
|
||||
assert.equal(r.dedup_stats.jaccard_dups, 1);
|
||||
});
|
||||
|
||||
test('jaccard below threshold keeps both findings separate', () => {
|
||||
const sources = [
|
||||
{ agent: 'plan-critic', payload: { findings: [{ file: 'a.md', line: 1, rule_key: 'PC1', text: 'database migration risk' }] } },
|
||||
{ agent: 'scope-guardian', payload: { findings: [{ file: 'b.md', line: 2, rule_key: 'SG1', text: 'unrelated frontend hover state polish' }] } },
|
||||
];
|
||||
const r = dedupFindings(sources);
|
||||
assert.equal(r.findings.length, 2);
|
||||
assert.equal(r.dedup_stats.jaccard_dups, 0);
|
||||
});
|
||||
|
||||
test('empty / missing payloads tolerated (single-agent input)', () => {
|
||||
const r = dedupFindings([
|
||||
{ agent: 'plan-critic', payload: { findings: [{ file: 'a.md', line: 1, rule_key: 'PC1', text: 'one' }] } },
|
||||
{ agent: 'scope-guardian', payload: null },
|
||||
]);
|
||||
assert.equal(r.findings.length, 1);
|
||||
assert.deepEqual(r.findings[0].raised_by, ['plan-critic']);
|
||||
});
|
||||
|
||||
test('all sources empty → empty result, dedup_stats zeros', () => {
|
||||
const r = dedupFindings([
|
||||
{ agent: 'plan-critic', payload: null },
|
||||
{ agent: 'scope-guardian', payload: { findings: [] } },
|
||||
]);
|
||||
assert.equal(r.findings.length, 0);
|
||||
assert.equal(r.dedup_stats.total_in, 0);
|
||||
assert.equal(r.dedup_stats.total_out, 0);
|
||||
});
|
||||
|
||||
test('CLI shim parses input files and emits valid deduped JSON', () => {
|
||||
const dir = tmp();
|
||||
try {
|
||||
const planCritic = join(dir, 'pc.json');
|
||||
const scopeGuardian = join(dir, 'sg.json');
|
||||
writeFileSync(planCritic, JSON.stringify({
|
||||
agent: 'plan-critic',
|
||||
findings: [{ file: 'plan.md', line: 5, rule_key: 'PC1', text: 'duplicate finding shared by both' }],
|
||||
}));
|
||||
writeFileSync(scopeGuardian, JSON.stringify({
|
||||
agent: 'scope-guardian',
|
||||
findings: [{ file: 'plan.md', line: 5, rule_key: 'PC1', text: 'duplicate finding shared by both' }],
|
||||
}));
|
||||
const out = execFileSync(process.execPath, [
|
||||
SHIM, '--plan-critic', planCritic, '--scope-guardian', scopeGuardian,
|
||||
], { encoding: 'utf-8' });
|
||||
const parsed = JSON.parse(out);
|
||||
assert.equal(parsed.findings.length, 1);
|
||||
assert.deepEqual(parsed.findings[0].raised_by.sort(), ['plan-critic', 'scope-guardian']);
|
||||
assert.equal(parsed.dedup_stats.total_out, 1);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test('CLI shim tolerates missing input files (returns empty deduped JSON)', () => {
|
||||
const out = execFileSync(process.execPath, [SHIM], { encoding: 'utf-8' });
|
||||
const parsed = JSON.parse(out);
|
||||
assert.equal(parsed.findings.length, 0);
|
||||
assert.equal(parsed.dedup_stats.total_in, 0);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue