// diff-engine.mjs — Baseline storage, finding fingerprinting, and diff categorization. // Compares scan results against a stored baseline to classify findings as: // new — present in current scan, absent from baseline // resolved — present in baseline, absent from current scan // unchanged — matched between baseline and current (line drift ≤3) // moved — same finding, different location (line drift >3 or file renamed) // Zero external dependencies. import { createHash } from 'node:crypto'; import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { join, resolve } from 'node:path'; // --------------------------------------------------------------------------- // Configuration // --------------------------------------------------------------------------- const LINE_FUZZY_THRESHOLD = 3; // ±3 lines = unchanged, >3 = moved // --------------------------------------------------------------------------- // Target hashing — deterministic key for baseline storage // --------------------------------------------------------------------------- /** * Create a stable hash for a target path to use as baseline filename. * Uses the resolved absolute path so the same directory always maps * to the same baseline regardless of how it was referenced. * @param {string} targetPath * @returns {string} 12-char hex hash */ export function targetHash(targetPath) { const resolved = resolve(targetPath); return createHash('sha256').update(resolved).digest('hex').slice(0, 12); } // --------------------------------------------------------------------------- // Finding fingerprinting — identity that survives line drift // --------------------------------------------------------------------------- /** * Generate a stable fingerprint for a finding. * Combines scanner prefix + file + title + evidence to create an identity * that is independent of line number (line drift is handled separately). * @param {object} finding - A finding object from output.mjs * @returns {string} hex fingerprint */ export function fingerprintFinding(finding) { const parts = [ finding.scanner || '', finding.file || '', finding.title || '', // Evidence provides content-level identity — two different findings // in the same file with different evidence are distinct findings. finding.evidence || '', ]; return createHash('sha256').update(parts.join('\x00')).digest('hex').slice(0, 16); } // --------------------------------------------------------------------------- // Baseline I/O // --------------------------------------------------------------------------- /** * Resolve the baseline file path for a given target. * @param {string} baselinesDir - Path to reports/baselines/ * @param {string} targetPath * @returns {string} Full path to baseline JSON file */ export function baselinePath(baselinesDir, targetPath) { return join(baselinesDir, `${targetHash(targetPath)}.json`); } /** * Save scan results as a baseline. * @param {string} baselinesDir - Path to reports/baselines/ * @param {string} targetPath - The scanned target * @param {object} scanEnvelope - Full scan output envelope from scan-orchestrator * @returns {string} Path to saved baseline file */ export function saveBaseline(baselinesDir, targetPath, scanEnvelope) { if (!existsSync(baselinesDir)) { mkdirSync(baselinesDir, { recursive: true }); } const filePath = baselinePath(baselinesDir, targetPath); // Store a compact baseline: metadata + fingerprinted findings const baseline = { meta: { target: scanEnvelope.meta.target, timestamp: scanEnvelope.meta.timestamp, version: '1', // baseline format version }, aggregate: scanEnvelope.aggregate, findings: extractFindings(scanEnvelope), }; writeFileSync(filePath, JSON.stringify(baseline, null, 2) + '\n'); return filePath; } /** * Load a baseline from disk. * @param {string} baselinesDir * @param {string} targetPath * @returns {object|null} Baseline object or null if not found */ export function loadBaseline(baselinesDir, targetPath) { const filePath = baselinePath(baselinesDir, targetPath); if (!existsSync(filePath)) return null; try { return JSON.parse(readFileSync(filePath, 'utf8')); } catch { return null; } } // --------------------------------------------------------------------------- // Finding extraction — flatten all scanner results into fingerprinted list // --------------------------------------------------------------------------- /** * Extract all findings from a scan envelope, adding fingerprints. * @param {object} scanEnvelope * @returns {object[]} Array of { fingerprint, scanner, severity, title, file, line, evidence, owasp, recommendation } */ export function extractFindings(scanEnvelope) { const findings = []; for (const [scannerName, result] of Object.entries(scanEnvelope.scanners || {})) { for (const f of result.findings || []) { findings.push({ fingerprint: fingerprintFinding(f), scanner: f.scanner || scannerName.toUpperCase().slice(0, 3), severity: f.severity, title: f.title, file: f.file || null, line: f.line || null, evidence: f.evidence || null, owasp: f.owasp || null, recommendation: f.recommendation || null, }); } } return findings; } // --------------------------------------------------------------------------- // Diff algorithm // --------------------------------------------------------------------------- /** * Compare current scan findings against a baseline. * * Matching strategy (priority order): * 1. Exact: fingerprint + file + line within ±LINE_FUZZY_THRESHOLD → unchanged * 2. Moved: fingerprint matches but file or line drifted beyond threshold → moved * 3. Unmatched current findings → new * 4. Unmatched baseline findings → resolved * * @param {object[]} baselineFindings - From loadBaseline().findings * @param {object[]} currentFindings - From extractFindings() * @returns {object} { new, resolved, unchanged, moved, summary } */ export function diffFindings(baselineFindings, currentFindings) { // Index baseline findings by fingerprint for O(n) lookup // Multiple findings can share a fingerprint (same pattern, different locations) const baselineByFp = new Map(); for (const f of baselineFindings) { const existing = baselineByFp.get(f.fingerprint) || []; existing.push({ ...f, matched: false }); baselineByFp.set(f.fingerprint, existing); } const results = { new: [], resolved: [], unchanged: [], moved: [], }; // Pass 1: Match current findings against baseline for (const current of currentFindings) { const candidates = baselineByFp.get(current.fingerprint); if (!candidates) { results.new.push(current); continue; } // Try exact match first (same file, line within threshold) let matched = false; for (const baseline of candidates) { if (baseline.matched) continue; if (baseline.file === current.file && isLineClose(baseline.line, current.line)) { baseline.matched = true; results.unchanged.push({ ...current, baseline_line: baseline.line, }); matched = true; break; } } if (matched) continue; // Try moved match (fingerprint matches, location differs) for (const baseline of candidates) { if (baseline.matched) continue; baseline.matched = true; results.moved.push({ ...current, previous_file: baseline.file, previous_line: baseline.line, }); matched = true; break; } if (matched) continue; // All candidates consumed — this is new results.new.push(current); } // Pass 2: Unmatched baseline findings are resolved for (const candidates of baselineByFp.values()) { for (const baseline of candidates) { if (!baseline.matched) { const { matched: _, ...finding } = baseline; results.resolved.push(finding); } } } // Summary results.summary = { new: results.new.length, resolved: results.resolved.length, unchanged: results.unchanged.length, moved: results.moved.length, total_current: currentFindings.length, total_baseline: baselineFindings.length, baseline_timestamp: null, // caller fills in }; return results; } /** * Check if two line numbers are within the fuzzy threshold. * Null lines always match (some findings are file-level, not line-level). * @param {number|null} a * @param {number|null} b * @returns {boolean} */ function isLineClose(a, b) { if (a == null || b == null) return true; return Math.abs(a - b) <= LINE_FUZZY_THRESHOLD; } // --------------------------------------------------------------------------- // High-level API — used by scan-orchestrator // --------------------------------------------------------------------------- /** * Run a full diff cycle: load baseline, compare, return diff results. * @param {string} baselinesDir * @param {string} targetPath * @param {object} scanEnvelope - Current scan results * @returns {object|null} Diff results with summary, or null if no baseline exists */ export function diffAgainstBaseline(baselinesDir, targetPath, scanEnvelope) { const baseline = loadBaseline(baselinesDir, targetPath); if (!baseline) return null; const currentFindings = extractFindings(scanEnvelope); const diff = diffFindings(baseline.findings, currentFindings); diff.summary.baseline_timestamp = baseline.meta.timestamp; return diff; }