Three new files in tests/e2e/ (45 tests, 1777 -> 1822): - attack-chain.test.mjs (17): full hook stack against attack payloads in sequence -- prompt injection at the gate; T1/T5/T8 bash evasions; pathguard on .env / .ssh; secrets hook on AWS-shaped keys and PEM headers; markdown link-title and HTML-comment poisoning in tool output; trifecta accumulation over a single session with dedup on the next benign call. - multi-session.test.mjs (9): state persistence across simulated session boundaries. Uses the fact that a hook child's process.ppid equals the test runner's process.pid, so writing the session state file directly simulates "previous session" history. Covers slow-burn trifecta (legs spread >50 calls), MCP cumulative description drift via LLM_SECURITY_MCP_CACHE_FILE override, and pre-compact transcript poisoning in warn / block / clean / missing-file modes. - scan-pipeline.test.mjs (19): scan-orchestrator + all 10 scanners + toxic-flow correlator against poisoned-project (BLOCK / 95 / Extreme) and grade-a-project (WARNING / 48 / High). Asserts envelope shape, verdict, risk_score, severity counts, OWASP coverage, scanner enumeration, and a narrative-coherence cross-check that the BLOCK scan strictly outranks the WARNING scan along every axis. Test files build credential-shaped payloads at runtime via concatenation so they contain no literal matches for the pre-edit-secrets regexes (memory rule feedback_secrets_hook_test_fixtures.md). Doc updates in same commit per marketplace policy: - CLAUDE.md header: 1777+ -> 1822+ tests, mentions tests/e2e/ - README.md badge tests-1777 -> tests-1822, body text updated - CHANGELOG.md: new [Unreleased] Added section describing scope No version bump. No behavior changes outside tests/.
241 lines
9.2 KiB
JavaScript
241 lines
9.2 KiB
JavaScript
// scan-pipeline.test.mjs — End-to-end test of the scan orchestrator.
|
|
//
|
|
// Purpose: prove the full deterministic scanner pipeline produces the
|
|
// expected verdict, risk score, scanner enumeration, and OWASP coverage
|
|
// when run against fixture projects representing two ends of the
|
|
// security-posture spectrum.
|
|
//
|
|
// What this exercises:
|
|
// - scanners/scan-orchestrator.mjs as a CLI (real spawn)
|
|
// - All 10 orchestrated scanners: unicode, entropy, permission, dep,
|
|
// taint, git, network, memory, supply-chain, workflow, plus the
|
|
// toxic-flow correlator that runs LAST.
|
|
// - The aggregate envelope: verdict, risk_score, risk_band, counts,
|
|
// OWASP breakdown, scanner status (ok / error / skipped).
|
|
// - The exit-code contract: 0 (PASS), 1 (WARNING), 2 (BLOCK).
|
|
//
|
|
// Two contrasting fixtures:
|
|
// POISONED: tests/fixtures/memory-scan/poisoned-project — multi-vector
|
|
// attack: tampered CLAUDE.md, suspicious git history, network leaks,
|
|
// embedded credentials, etc. Must produce BLOCK verdict.
|
|
// CLEAN: tests/fixtures/posture-scan/grade-a-project — well-built
|
|
// project with appropriate hooks, settings, and code. Must produce
|
|
// a verdict no worse than WARNING and a risk_score below the BLOCK
|
|
// threshold (65).
|
|
//
|
|
// Runtime: each orchestrator run takes ~7-30s. The whole suite runs
|
|
// in well under 2 minutes on a 2026-era developer machine.
|
|
|
|
import { describe, it, before } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { resolve, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { spawn } from 'node:child_process';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const ORCHESTRATOR = resolve(__dirname, '../../scanners/scan-orchestrator.mjs');
|
|
const POISONED = resolve(__dirname, '../fixtures/memory-scan/poisoned-project');
|
|
const CLEAN = resolve(__dirname, '../fixtures/posture-scan/grade-a-project');
|
|
|
|
const EXPECTED_SCANNERS = [
|
|
'unicode', 'entropy', 'permission', 'dep', 'taint',
|
|
'git', 'network', 'memory', 'supply-chain', 'workflow', 'toxic-flow',
|
|
];
|
|
|
|
function runOrchestrator(target, extraArgs = [], timeout = 180_000) {
|
|
return new Promise((resolveP) => {
|
|
const stdout = [];
|
|
const stderr = [];
|
|
const child = spawn('node', [ORCHESTRATOR, target, ...extraArgs], {
|
|
timeout,
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
});
|
|
child.stdout.on('data', (c) => stdout.push(c));
|
|
child.stderr.on('data', (c) => stderr.push(c));
|
|
child.on('close', (code) => {
|
|
resolveP({
|
|
code: code ?? 1,
|
|
stdout: Buffer.concat(stdout).toString('utf8'),
|
|
stderr: Buffer.concat(stderr).toString('utf8'),
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
function tryParse(text) {
|
|
try { return JSON.parse(text); } catch { return null; }
|
|
}
|
|
|
|
// We run each fixture once and reuse the result across multiple assertions
|
|
// to keep the suite fast. node:test's `before` does the heavy work.
|
|
|
|
describe('e2e scan-pipeline — POISONED project', () => {
|
|
let result;
|
|
let envelope;
|
|
|
|
before(async () => {
|
|
result = await runOrchestrator(POISONED);
|
|
envelope = tryParse(result.stdout);
|
|
});
|
|
|
|
it('emits a parseable JSON envelope on stdout', () => {
|
|
assert.ok(envelope, 'orchestrator stdout must be valid JSON');
|
|
assert.equal(typeof envelope, 'object');
|
|
});
|
|
|
|
it('exits with the BLOCK exit code (2)', () => {
|
|
assert.equal(result.code, 2, 'BLOCK verdict must map to exit 2');
|
|
});
|
|
|
|
it('runs all 10 expected scanners + toxic-flow correlator', () => {
|
|
assert.ok(envelope.scanners, 'envelope.scanners must exist');
|
|
const got = Object.keys(envelope.scanners);
|
|
for (const name of EXPECTED_SCANNERS) {
|
|
assert.ok(got.includes(name), `scanner "${name}" must be present`);
|
|
}
|
|
});
|
|
|
|
it('verdict is BLOCK', () => {
|
|
const a = envelope.aggregate;
|
|
assert.ok(a, 'aggregate must exist');
|
|
assert.equal(a.verdict, 'BLOCK', 'verdict must be BLOCK on poisoned project');
|
|
});
|
|
|
|
it('risk_score ≥ BLOCK cutoff (65) and risk_band Severe-or-Extreme', () => {
|
|
const a = envelope.aggregate;
|
|
assert.ok(a.risk_score >= 65, `risk_score ${a.risk_score} must be ≥ 65 (BLOCK cutoff)`);
|
|
assert.match(
|
|
a.risk_band || '',
|
|
/Severe|Extreme/i,
|
|
`risk_band ${a.risk_band} must be Severe or Extreme`
|
|
);
|
|
});
|
|
|
|
it('produces critical AND high severity findings', () => {
|
|
const counts = envelope.aggregate.counts || {};
|
|
assert.ok(counts.critical >= 1, `expected ≥1 critical, got ${counts.critical}`);
|
|
assert.ok(counts.high >= 1, `expected ≥1 high, got ${counts.high}`);
|
|
});
|
|
|
|
it('total_findings is non-zero and matches counts', () => {
|
|
const a = envelope.aggregate;
|
|
assert.ok(a.total_findings >= 5, `expected ≥5 total findings, got ${a.total_findings}`);
|
|
const sum =
|
|
(a.counts.critical || 0) + (a.counts.high || 0) +
|
|
(a.counts.medium || 0) + (a.counts.low || 0) + (a.counts.info || 0);
|
|
assert.equal(a.total_findings, sum, 'total_findings must equal sum of severity counts');
|
|
});
|
|
|
|
it('OWASP breakdown covers at least one LLM Top 10 category', () => {
|
|
const owasp = envelope.aggregate.owasp_breakdown || {};
|
|
const keys = Object.keys(owasp);
|
|
assert.ok(keys.length >= 1, 'expected at least one OWASP category');
|
|
const llmCategories = keys.filter((k) => /^LLM\d{2}$/.test(k));
|
|
assert.ok(
|
|
llmCategories.length >= 1,
|
|
`expected at least one LLM01-LLM10 category, got: ${keys.join(', ')}`
|
|
);
|
|
});
|
|
|
|
it('memory-poisoning scanner found findings (CLAUDE.md tampering signal)', () => {
|
|
const memory = envelope.scanners.memory;
|
|
assert.ok(memory, 'memory scanner result must be present');
|
|
const findings = memory.findings || [];
|
|
assert.ok(
|
|
findings.length >= 1,
|
|
`expected memory-poisoning findings on a fixture named "poisoned-project", got ${findings.length}`
|
|
);
|
|
});
|
|
|
|
it('all scanners completed without error', () => {
|
|
const a = envelope.aggregate;
|
|
assert.equal(a.scanners_error, 0, `scanners_error must be 0, got ${a.scanners_error}`);
|
|
assert.ok(a.scanners_ok >= 1, 'at least one scanner must report ok');
|
|
});
|
|
});
|
|
|
|
describe('e2e scan-pipeline — CLEAN (grade-a) project', () => {
|
|
let result;
|
|
let envelope;
|
|
|
|
before(async () => {
|
|
result = await runOrchestrator(CLEAN);
|
|
envelope = tryParse(result.stdout);
|
|
});
|
|
|
|
it('emits a parseable JSON envelope on stdout', () => {
|
|
assert.ok(envelope, 'orchestrator stdout must be valid JSON');
|
|
});
|
|
|
|
it('exits with code 0 or 1 (PASS or WARNING) — never BLOCK', () => {
|
|
assert.notEqual(result.code, 2, 'grade-a fixture must NOT produce BLOCK verdict');
|
|
assert.ok([0, 1].includes(result.code), `expected exit 0 or 1, got ${result.code}`);
|
|
});
|
|
|
|
it('verdict is PASS or WARNING — never BLOCK', () => {
|
|
const a = envelope.aggregate;
|
|
assert.ok(['PASS', 'WARNING'].includes(a.verdict), `expected PASS/WARNING, got ${a.verdict}`);
|
|
});
|
|
|
|
it('risk_score is below BLOCK cutoff (65)', () => {
|
|
const a = envelope.aggregate;
|
|
assert.ok(a.risk_score < 65, `risk_score ${a.risk_score} must be < 65 for clean fixture`);
|
|
});
|
|
|
|
it('produces ZERO critical findings (defining property of grade-a)', () => {
|
|
const counts = envelope.aggregate.counts || {};
|
|
assert.equal(counts.critical, 0, `grade-a fixture must have 0 critical, got ${counts.critical}`);
|
|
});
|
|
|
|
it('runs all 10 scanners + toxic-flow correlator on the clean project too', () => {
|
|
const got = Object.keys(envelope.scanners || {});
|
|
for (const name of EXPECTED_SCANNERS) {
|
|
assert.ok(got.includes(name), `scanner "${name}" must run on clean project too`);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('e2e scan-pipeline — narrative coherence: BLOCK is genuinely worse than WARNING', () => {
|
|
// This single test cross-checks that the verdict ordering matches the
|
|
// numeric risk scoring. It is the core narrative-coherence assertion:
|
|
// a BLOCK-verdict scan cannot have a lower risk_score than a WARNING
|
|
// scan of a different project. If this ever fails, severity-mapping
|
|
// logic has drifted and the v2 risk-score model is broken.
|
|
let pa, pb;
|
|
|
|
before(async () => {
|
|
const [poisoned, clean] = await Promise.all([
|
|
runOrchestrator(POISONED),
|
|
runOrchestrator(CLEAN),
|
|
]);
|
|
pa = tryParse(poisoned.stdout);
|
|
pb = tryParse(clean.stdout);
|
|
});
|
|
|
|
it('poisoned.risk_score > clean.risk_score', () => {
|
|
assert.ok(pa && pb, 'both envelopes must parse');
|
|
const aScore = pa.aggregate.risk_score;
|
|
const bScore = pb.aggregate.risk_score;
|
|
assert.ok(
|
|
aScore > bScore,
|
|
`poisoned (${aScore}) must outscore clean (${bScore}) — risk-band coherence`
|
|
);
|
|
});
|
|
|
|
it('poisoned has more critical findings than clean', () => {
|
|
const aCrit = pa.aggregate.counts.critical || 0;
|
|
const bCrit = pb.aggregate.counts.critical || 0;
|
|
assert.ok(aCrit > bCrit, `poisoned criticals (${aCrit}) must exceed clean criticals (${bCrit})`);
|
|
});
|
|
|
|
it('verdict ordering matches risk-band ordering (BLOCK > WARNING > PASS)', () => {
|
|
const order = ['PASS', 'WARNING', 'BLOCK'];
|
|
const aIdx = order.indexOf(pa.aggregate.verdict);
|
|
const bIdx = order.indexOf(pb.aggregate.verdict);
|
|
assert.ok(aIdx >= 0 && bIdx >= 0, 'both verdicts must be on the canonical scale');
|
|
assert.ok(
|
|
aIdx > bIdx,
|
|
`verdict ordering inverted: poisoned=${pa.aggregate.verdict} clean=${pb.aggregate.verdict}`
|
|
);
|
|
});
|
|
});
|