// scan-pipeline.test.mjs — End-to-end test of the scan orchestrator. // // Purpose: prove the full deterministic scanner pipeline produces the // expected verdict, risk score, scanner enumeration, and OWASP coverage // when run against fixture projects representing two ends of the // security-posture spectrum. // // What this exercises: // - scanners/scan-orchestrator.mjs as a CLI (real spawn) // - All 10 orchestrated scanners: unicode, entropy, permission, dep, // taint, git, network, memory, supply-chain, workflow, plus the // toxic-flow correlator that runs LAST. // - The aggregate envelope: verdict, risk_score, risk_band, counts, // OWASP breakdown, scanner status (ok / error / skipped). // - The exit-code contract: 0 (PASS), 1 (WARNING), 2 (BLOCK). // // Two contrasting fixtures: // POISONED: tests/fixtures/memory-scan/poisoned-project — multi-vector // attack: tampered CLAUDE.md, suspicious git history, network leaks, // embedded credentials, etc. Must produce BLOCK verdict. // CLEAN: tests/fixtures/posture-scan/grade-a-project — well-built // project with appropriate hooks, settings, and code. Must produce // a verdict no worse than WARNING and a risk_score below the BLOCK // threshold (65). // // Runtime: each orchestrator run takes ~7-30s. The whole suite runs // in well under 2 minutes on a 2026-era developer machine. import { describe, it, before } from 'node:test'; import assert from 'node:assert/strict'; import { resolve, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { spawn } from 'node:child_process'; const __dirname = dirname(fileURLToPath(import.meta.url)); const ORCHESTRATOR = resolve(__dirname, '../../scanners/scan-orchestrator.mjs'); const POISONED = resolve(__dirname, '../fixtures/memory-scan/poisoned-project'); const CLEAN = resolve(__dirname, '../fixtures/posture-scan/grade-a-project'); const EXPECTED_SCANNERS = [ 'unicode', 'entropy', 'permission', 'dep', 'taint', 'git', 'network', 'memory', 'supply-chain', 'workflow', 'toxic-flow', ]; function runOrchestrator(target, extraArgs = [], timeout = 180_000) { return new Promise((resolveP) => { const stdout = []; const stderr = []; const child = spawn('node', [ORCHESTRATOR, target, ...extraArgs], { timeout, stdio: ['ignore', 'pipe', 'pipe'], }); child.stdout.on('data', (c) => stdout.push(c)); child.stderr.on('data', (c) => stderr.push(c)); child.on('close', (code) => { resolveP({ code: code ?? 1, stdout: Buffer.concat(stdout).toString('utf8'), stderr: Buffer.concat(stderr).toString('utf8'), }); }); }); } function tryParse(text) { try { return JSON.parse(text); } catch { return null; } } // We run each fixture once and reuse the result across multiple assertions // to keep the suite fast. node:test's `before` does the heavy work. describe('e2e scan-pipeline — POISONED project', () => { let result; let envelope; before(async () => { result = await runOrchestrator(POISONED); envelope = tryParse(result.stdout); }); it('emits a parseable JSON envelope on stdout', () => { assert.ok(envelope, 'orchestrator stdout must be valid JSON'); assert.equal(typeof envelope, 'object'); }); it('exits with the BLOCK exit code (2)', () => { assert.equal(result.code, 2, 'BLOCK verdict must map to exit 2'); }); it('runs all 10 expected scanners + toxic-flow correlator', () => { assert.ok(envelope.scanners, 'envelope.scanners must exist'); const got = Object.keys(envelope.scanners); for (const name of EXPECTED_SCANNERS) { assert.ok(got.includes(name), `scanner "${name}" must be present`); } }); it('verdict is BLOCK', () => { const a = envelope.aggregate; assert.ok(a, 'aggregate must exist'); assert.equal(a.verdict, 'BLOCK', 'verdict must be BLOCK on poisoned project'); }); it('risk_score ≥ BLOCK cutoff (65) and risk_band Severe-or-Extreme', () => { const a = envelope.aggregate; assert.ok(a.risk_score >= 65, `risk_score ${a.risk_score} must be ≥ 65 (BLOCK cutoff)`); assert.match( a.risk_band || '', /Severe|Extreme/i, `risk_band ${a.risk_band} must be Severe or Extreme` ); }); it('produces critical AND high severity findings', () => { const counts = envelope.aggregate.counts || {}; assert.ok(counts.critical >= 1, `expected ≥1 critical, got ${counts.critical}`); assert.ok(counts.high >= 1, `expected ≥1 high, got ${counts.high}`); }); it('total_findings is non-zero and matches counts', () => { const a = envelope.aggregate; assert.ok(a.total_findings >= 5, `expected ≥5 total findings, got ${a.total_findings}`); const sum = (a.counts.critical || 0) + (a.counts.high || 0) + (a.counts.medium || 0) + (a.counts.low || 0) + (a.counts.info || 0); assert.equal(a.total_findings, sum, 'total_findings must equal sum of severity counts'); }); it('OWASP breakdown covers at least one LLM Top 10 category', () => { const owasp = envelope.aggregate.owasp_breakdown || {}; const keys = Object.keys(owasp); assert.ok(keys.length >= 1, 'expected at least one OWASP category'); const llmCategories = keys.filter((k) => /^LLM\d{2}$/.test(k)); assert.ok( llmCategories.length >= 1, `expected at least one LLM01-LLM10 category, got: ${keys.join(', ')}` ); }); it('memory-poisoning scanner found findings (CLAUDE.md tampering signal)', () => { const memory = envelope.scanners.memory; assert.ok(memory, 'memory scanner result must be present'); const findings = memory.findings || []; assert.ok( findings.length >= 1, `expected memory-poisoning findings on a fixture named "poisoned-project", got ${findings.length}` ); }); it('all scanners completed without error', () => { const a = envelope.aggregate; assert.equal(a.scanners_error, 0, `scanners_error must be 0, got ${a.scanners_error}`); assert.ok(a.scanners_ok >= 1, 'at least one scanner must report ok'); }); }); describe('e2e scan-pipeline — CLEAN (grade-a) project', () => { let result; let envelope; before(async () => { result = await runOrchestrator(CLEAN); envelope = tryParse(result.stdout); }); it('emits a parseable JSON envelope on stdout', () => { assert.ok(envelope, 'orchestrator stdout must be valid JSON'); }); it('exits with code 0 or 1 (PASS or WARNING) — never BLOCK', () => { assert.notEqual(result.code, 2, 'grade-a fixture must NOT produce BLOCK verdict'); assert.ok([0, 1].includes(result.code), `expected exit 0 or 1, got ${result.code}`); }); it('verdict is PASS or WARNING — never BLOCK', () => { const a = envelope.aggregate; assert.ok(['PASS', 'WARNING'].includes(a.verdict), `expected PASS/WARNING, got ${a.verdict}`); }); it('risk_score is below BLOCK cutoff (65)', () => { const a = envelope.aggregate; assert.ok(a.risk_score < 65, `risk_score ${a.risk_score} must be < 65 for clean fixture`); }); it('produces ZERO critical findings (defining property of grade-a)', () => { const counts = envelope.aggregate.counts || {}; assert.equal(counts.critical, 0, `grade-a fixture must have 0 critical, got ${counts.critical}`); }); it('runs all 10 scanners + toxic-flow correlator on the clean project too', () => { const got = Object.keys(envelope.scanners || {}); for (const name of EXPECTED_SCANNERS) { assert.ok(got.includes(name), `scanner "${name}" must run on clean project too`); } }); }); describe('e2e scan-pipeline — narrative coherence: BLOCK is genuinely worse than WARNING', () => { // This single test cross-checks that the verdict ordering matches the // numeric risk scoring. It is the core narrative-coherence assertion: // a BLOCK-verdict scan cannot have a lower risk_score than a WARNING // scan of a different project. If this ever fails, severity-mapping // logic has drifted and the v2 risk-score model is broken. let pa, pb; before(async () => { const [poisoned, clean] = await Promise.all([ runOrchestrator(POISONED), runOrchestrator(CLEAN), ]); pa = tryParse(poisoned.stdout); pb = tryParse(clean.stdout); }); it('poisoned.risk_score > clean.risk_score', () => { assert.ok(pa && pb, 'both envelopes must parse'); const aScore = pa.aggregate.risk_score; const bScore = pb.aggregate.risk_score; assert.ok( aScore > bScore, `poisoned (${aScore}) must outscore clean (${bScore}) — risk-band coherence` ); }); it('poisoned has more critical findings than clean', () => { const aCrit = pa.aggregate.counts.critical || 0; const bCrit = pb.aggregate.counts.critical || 0; assert.ok(aCrit > bCrit, `poisoned criticals (${aCrit}) must exceed clean criticals (${bCrit})`); }); it('verdict ordering matches risk-band ordering (BLOCK > WARNING > PASS)', () => { const order = ['PASS', 'WARNING', 'BLOCK']; const aIdx = order.indexOf(pa.aggregate.verdict); const bIdx = order.indexOf(pb.aggregate.verdict); assert.ok(aIdx >= 0 && bIdx >= 0, 'both verdicts must be on the canonical scale'); assert.ok( aIdx > bIdx, `verdict ordering inverted: poisoned=${pa.aggregate.verdict} clean=${pb.aggregate.verdict}` ); }); });