Full port of llm-security plugin for internal use on Windows with GitHub Copilot CLI. Protocol translation layer (copilot-hook-runner.mjs) normalizes Copilot camelCase I/O to Claude Code snake_case format — all original hook scripts run unmodified. - 8 hooks with protocol translation (stdin/stdout/exit code) - 18 SKILL.md skills (Agent Skills Open Standard) - 6 .agent.md agent definitions - 20 scanners + 14 scanner lib modules (unchanged) - 14 knowledge files (unchanged) - 39 test files including copilot-port-verify.mjs (17 tests) - Windows-ready: node:path, os.tmpdir(), process.execPath, no bash Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
276 lines
9.5 KiB
JavaScript
276 lines
9.5 KiB
JavaScript
// diff-engine.mjs — Baseline storage, finding fingerprinting, and diff categorization.
|
|
// Compares scan results against a stored baseline to classify findings as:
|
|
// new — present in current scan, absent from baseline
|
|
// resolved — present in baseline, absent from current scan
|
|
// unchanged — matched between baseline and current (line drift ≤3)
|
|
// moved — same finding, different location (line drift >3 or file renamed)
|
|
// Zero external dependencies.
|
|
|
|
import { createHash } from 'node:crypto';
|
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
import { join, resolve } from 'node:path';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Configuration
|
|
// ---------------------------------------------------------------------------
|
|
const LINE_FUZZY_THRESHOLD = 3; // ±3 lines = unchanged, >3 = moved
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Target hashing — deterministic key for baseline storage
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Create a stable hash for a target path to use as baseline filename.
|
|
* Uses the resolved absolute path so the same directory always maps
|
|
* to the same baseline regardless of how it was referenced.
|
|
* @param {string} targetPath
|
|
* @returns {string} 12-char hex hash
|
|
*/
|
|
export function targetHash(targetPath) {
|
|
const resolved = resolve(targetPath);
|
|
return createHash('sha256').update(resolved).digest('hex').slice(0, 12);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Finding fingerprinting — identity that survives line drift
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Generate a stable fingerprint for a finding.
|
|
* Combines scanner prefix + file + title + evidence to create an identity
|
|
* that is independent of line number (line drift is handled separately).
|
|
* @param {object} finding - A finding object from output.mjs
|
|
* @returns {string} hex fingerprint
|
|
*/
|
|
export function fingerprintFinding(finding) {
|
|
const parts = [
|
|
finding.scanner || '',
|
|
finding.file || '',
|
|
finding.title || '',
|
|
// Evidence provides content-level identity — two different findings
|
|
// in the same file with different evidence are distinct findings.
|
|
finding.evidence || '',
|
|
];
|
|
return createHash('sha256').update(parts.join('\x00')).digest('hex').slice(0, 16);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Baseline I/O
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Resolve the baseline file path for a given target.
|
|
* @param {string} baselinesDir - Path to reports/baselines/
|
|
* @param {string} targetPath
|
|
* @returns {string} Full path to baseline JSON file
|
|
*/
|
|
export function baselinePath(baselinesDir, targetPath) {
|
|
return join(baselinesDir, `${targetHash(targetPath)}.json`);
|
|
}
|
|
|
|
/**
|
|
* Save scan results as a baseline.
|
|
* @param {string} baselinesDir - Path to reports/baselines/
|
|
* @param {string} targetPath - The scanned target
|
|
* @param {object} scanEnvelope - Full scan output envelope from scan-orchestrator
|
|
* @returns {string} Path to saved baseline file
|
|
*/
|
|
export function saveBaseline(baselinesDir, targetPath, scanEnvelope) {
|
|
if (!existsSync(baselinesDir)) {
|
|
mkdirSync(baselinesDir, { recursive: true });
|
|
}
|
|
const filePath = baselinePath(baselinesDir, targetPath);
|
|
|
|
// Store a compact baseline: metadata + fingerprinted findings
|
|
const baseline = {
|
|
meta: {
|
|
target: scanEnvelope.meta.target,
|
|
timestamp: scanEnvelope.meta.timestamp,
|
|
version: '1', // baseline format version
|
|
},
|
|
aggregate: scanEnvelope.aggregate,
|
|
findings: extractFindings(scanEnvelope),
|
|
};
|
|
|
|
writeFileSync(filePath, JSON.stringify(baseline, null, 2) + '\n');
|
|
return filePath;
|
|
}
|
|
|
|
/**
|
|
* Load a baseline from disk.
|
|
* @param {string} baselinesDir
|
|
* @param {string} targetPath
|
|
* @returns {object|null} Baseline object or null if not found
|
|
*/
|
|
export function loadBaseline(baselinesDir, targetPath) {
|
|
const filePath = baselinePath(baselinesDir, targetPath);
|
|
if (!existsSync(filePath)) return null;
|
|
try {
|
|
return JSON.parse(readFileSync(filePath, 'utf8'));
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Finding extraction — flatten all scanner results into fingerprinted list
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Extract all findings from a scan envelope, adding fingerprints.
|
|
* @param {object} scanEnvelope
|
|
* @returns {object[]} Array of { fingerprint, scanner, severity, title, file, line, evidence, owasp, recommendation }
|
|
*/
|
|
export function extractFindings(scanEnvelope) {
|
|
const findings = [];
|
|
for (const [scannerName, result] of Object.entries(scanEnvelope.scanners || {})) {
|
|
for (const f of result.findings || []) {
|
|
findings.push({
|
|
fingerprint: fingerprintFinding(f),
|
|
scanner: f.scanner || scannerName.toUpperCase().slice(0, 3),
|
|
severity: f.severity,
|
|
title: f.title,
|
|
file: f.file || null,
|
|
line: f.line || null,
|
|
evidence: f.evidence || null,
|
|
owasp: f.owasp || null,
|
|
recommendation: f.recommendation || null,
|
|
});
|
|
}
|
|
}
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Diff algorithm
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Compare current scan findings against a baseline.
|
|
*
|
|
* Matching strategy (priority order):
|
|
* 1. Exact: fingerprint + file + line within ±LINE_FUZZY_THRESHOLD → unchanged
|
|
* 2. Moved: fingerprint matches but file or line drifted beyond threshold → moved
|
|
* 3. Unmatched current findings → new
|
|
* 4. Unmatched baseline findings → resolved
|
|
*
|
|
* @param {object[]} baselineFindings - From loadBaseline().findings
|
|
* @param {object[]} currentFindings - From extractFindings()
|
|
* @returns {object} { new, resolved, unchanged, moved, summary }
|
|
*/
|
|
export function diffFindings(baselineFindings, currentFindings) {
|
|
// Index baseline findings by fingerprint for O(n) lookup
|
|
// Multiple findings can share a fingerprint (same pattern, different locations)
|
|
const baselineByFp = new Map();
|
|
for (const f of baselineFindings) {
|
|
const existing = baselineByFp.get(f.fingerprint) || [];
|
|
existing.push({ ...f, matched: false });
|
|
baselineByFp.set(f.fingerprint, existing);
|
|
}
|
|
|
|
const results = {
|
|
new: [],
|
|
resolved: [],
|
|
unchanged: [],
|
|
moved: [],
|
|
};
|
|
|
|
// Pass 1: Match current findings against baseline
|
|
for (const current of currentFindings) {
|
|
const candidates = baselineByFp.get(current.fingerprint);
|
|
if (!candidates) {
|
|
results.new.push(current);
|
|
continue;
|
|
}
|
|
|
|
// Try exact match first (same file, line within threshold)
|
|
let matched = false;
|
|
for (const baseline of candidates) {
|
|
if (baseline.matched) continue;
|
|
if (baseline.file === current.file && isLineClose(baseline.line, current.line)) {
|
|
baseline.matched = true;
|
|
results.unchanged.push({
|
|
...current,
|
|
baseline_line: baseline.line,
|
|
});
|
|
matched = true;
|
|
break;
|
|
}
|
|
}
|
|
if (matched) continue;
|
|
|
|
// Try moved match (fingerprint matches, location differs)
|
|
for (const baseline of candidates) {
|
|
if (baseline.matched) continue;
|
|
baseline.matched = true;
|
|
results.moved.push({
|
|
...current,
|
|
previous_file: baseline.file,
|
|
previous_line: baseline.line,
|
|
});
|
|
matched = true;
|
|
break;
|
|
}
|
|
if (matched) continue;
|
|
|
|
// All candidates consumed — this is new
|
|
results.new.push(current);
|
|
}
|
|
|
|
// Pass 2: Unmatched baseline findings are resolved
|
|
for (const candidates of baselineByFp.values()) {
|
|
for (const baseline of candidates) {
|
|
if (!baseline.matched) {
|
|
const { matched: _, ...finding } = baseline;
|
|
results.resolved.push(finding);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
results.summary = {
|
|
new: results.new.length,
|
|
resolved: results.resolved.length,
|
|
unchanged: results.unchanged.length,
|
|
moved: results.moved.length,
|
|
total_current: currentFindings.length,
|
|
total_baseline: baselineFindings.length,
|
|
baseline_timestamp: null, // caller fills in
|
|
};
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Check if two line numbers are within the fuzzy threshold.
|
|
* Null lines always match (some findings are file-level, not line-level).
|
|
* @param {number|null} a
|
|
* @param {number|null} b
|
|
* @returns {boolean}
|
|
*/
|
|
function isLineClose(a, b) {
|
|
if (a == null || b == null) return true;
|
|
return Math.abs(a - b) <= LINE_FUZZY_THRESHOLD;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// High-level API — used by scan-orchestrator
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Run a full diff cycle: load baseline, compare, return diff results.
|
|
* @param {string} baselinesDir
|
|
* @param {string} targetPath
|
|
* @param {object} scanEnvelope - Current scan results
|
|
* @returns {object|null} Diff results with summary, or null if no baseline exists
|
|
*/
|
|
export function diffAgainstBaseline(baselinesDir, targetPath, scanEnvelope) {
|
|
const baseline = loadBaseline(baselinesDir, targetPath);
|
|
if (!baseline) return null;
|
|
|
|
const currentFindings = extractFindings(scanEnvelope);
|
|
const diff = diffFindings(baseline.findings, currentFindings);
|
|
diff.summary.baseline_timestamp = baseline.meta.timestamp;
|
|
|
|
return diff;
|
|
}
|