ktg-plugin-marketplace/plugins/ultraplan-local/lib/review/plan-review-dedup.mjs
Kjell Tore Guttormsen 14ecda886c feat(voyage)!: bulk content rewrite ultra -> voyage/trek prose [skip-docs]
Sed-pipeline (16 patterns, longest-match-first) sweeper residuelle ultra*-treff
i prose, command-narrativ, agent-prompts, hook-kommentarer, doc-prosa.

Pipeline-utvidelser fra V4-prompten:
- BSD-syntax [[:<:]]ultra[[:>:]] istedenfor \bultra\b (BSD sed mangler \b)
- 6 compound-patterns for ultraplan/ultraexecute/ultraresearch/ultrabrief/
  ultrareview/ultracontinue uten -local-suffiks
- ultra*-stats glob -> trek*-stats glob
- Linje-eksklusjon redusert til ultra-cc-architect (Q8); session-state-
  eksklusjonen var over-protektiv
- File-eksklusjon utvidet til settings.json, package.json, plugin.json,
  hele .claude/-treet (gitignored + V5-territorium)

Q8-undantak holdt: architecture-discovery.mjs + project-discovery.mjs urort.
Filnavn-konvensjon holdt: .session-state.local.json + *.local.* preservert.

Manuell narrative-fix: tests/lib/agent-frontmatter.test.mjs linje 10
mangled "/ultra*-local" til "/voyage*-local" (ingen slik kommando finnes);
korrigert til "/trek*".

Residualer utenfor scope (V5 handterer): package.json + .claude-plugin/
plugin.json (Step 12-14 versjons-bump). .claude/* er gitignored
spec-historikk med tilsiktet BEFORE/AFTER-narrativ.

Part of voyage-rebrand session 3 (Wave 4 / Step 10).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-05 15:08:20 +02:00

165 lines
5 KiB
JavaScript

// lib/review/plan-review-dedup.mjs
// Phase-9 dedup helper for /trekplan adversarial review:
// merges plan-critic + scope-guardian findings into a single deduplicated
// stream, preserving provenance (which agent originally raised each finding).
//
// Two dedup signals:
// 1. Exact match — identical computeFindingId(file:line:rule_key) → merge.
// 2. Jaccard ≥ 0.7 on text-token sets → merge (catches near-duplicates).
//
// Provenance is preserved on the surviving finding's `raised_by` array.
//
// CLI shim:
// node lib/review/plan-review-dedup.mjs \
// --plan-critic /tmp/x.json --scope-guardian /tmp/y.json
// → stdout: deduped JSON, exit 0 on success.
//
// Empty / missing inputs are tolerated (single-agent review still works).
import { readFileSync } from 'node:fs';
import { jaccardSimilarity, meetsThreshold } from '../parsers/jaccard.mjs';
import { computeFindingId } from '../parsers/finding-id.mjs';
export const DEFAULT_THRESHOLD = 0.7;
/**
* Tokenize a finding's text for Jaccard comparison: lowercase, split on
* non-word, drop empties. Stable + deterministic.
*/
export function tokenize(text) {
if (typeof text !== 'string' || text.length === 0) return [];
return text.toLowerCase().split(/\W+/).filter(t => t.length > 0);
}
/**
* Normalize a single agent payload into an array of {agent, finding} pairs.
* Tolerates missing payload (returns []).
*/
function normalizeAgentPayload(payload, fallbackAgent) {
if (!payload || typeof payload !== 'object') return [];
const agent = (typeof payload.agent === 'string' && payload.agent.length > 0)
? payload.agent
: fallbackAgent;
const findings = Array.isArray(payload.findings) ? payload.findings : [];
return findings.map(f => ({ agent, finding: f }));
}
function annotate(finding, agent) {
const id = computeFindingId(
String(finding.file ?? 'unknown'),
finding.line ?? 0,
String(finding.rule_key ?? 'unknown'),
);
return {
id,
file: finding.file ?? null,
line: finding.line ?? null,
rule_key: finding.rule_key ?? null,
text: typeof finding.text === 'string' ? finding.text : '',
raised_by: [agent],
};
}
/**
* Dedup an arbitrary collection of agent payloads.
*
* @param {Array<{agent: string, payload: object | null | undefined}>} sources
* @param {{ threshold?: number }} [opts]
* @returns {{
* findings: Array<object>,
* dedup_stats: { total_in: number, total_out: number,
* exact_id_dups: number, jaccard_dups: number }
* }}
*/
export function dedupFindings(sources, opts = {}) {
const threshold = typeof opts.threshold === 'number' ? opts.threshold : DEFAULT_THRESHOLD;
const incoming = [];
for (const s of sources) {
for (const pair of normalizeAgentPayload(s.payload, s.agent)) {
incoming.push(annotate(pair.finding, pair.agent));
}
}
const total_in = incoming.length;
// Pass 1 — exact id dedup
const byId = new Map();
let exact_id_dups = 0;
for (const f of incoming) {
const existing = byId.get(f.id);
if (existing) {
for (const a of f.raised_by) {
if (!existing.raised_by.includes(a)) existing.raised_by.push(a);
}
exact_id_dups += 1;
} else {
byId.set(f.id, f);
}
}
// Pass 2 — jaccard on text tokens; merge near-duplicates
const survivors = [];
let jaccard_dups = 0;
for (const f of byId.values()) {
const tokens = tokenize(f.text);
let merged = false;
for (const s of survivors) {
const sim = jaccardSimilarity(tokens, tokenize(s.text));
if (meetsThreshold(sim, threshold)) {
for (const a of f.raised_by) {
if (!s.raised_by.includes(a)) s.raised_by.push(a);
}
jaccard_dups += 1;
merged = true;
break;
}
}
if (!merged) survivors.push(f);
}
return {
findings: survivors,
dedup_stats: {
total_in,
total_out: survivors.length,
exact_id_dups,
jaccard_dups,
},
};
}
// ---- CLI shim ----------------------------------------------------------------
function parseArgs(argv) {
const out = {};
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === '--plan-critic') out.planCritic = argv[++i];
else if (a === '--scope-guardian') out.scopeGuardian = argv[++i];
else if (a === '--threshold') out.threshold = Number(argv[++i]);
}
return out;
}
function readJsonOrNull(path) {
if (!path) return null;
try {
return JSON.parse(readFileSync(path, 'utf-8'));
} catch {
return null;
}
}
if (import.meta.url === `file://${process.argv[1]}`) {
const args = parseArgs(process.argv.slice(2));
const sources = [
{ agent: 'plan-critic', payload: readJsonOrNull(args.planCritic) },
{ agent: 'scope-guardian', payload: readJsonOrNull(args.scopeGuardian) },
];
const opts = {};
if (Number.isFinite(args.threshold)) opts.threshold = args.threshold;
const result = dedupFindings(sources, opts);
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
process.exit(0);
}