diff --git a/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs b/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs new file mode 100644 index 0000000..aa8bf30 --- /dev/null +++ b/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs @@ -0,0 +1,167 @@ +// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1) +// +// Tests the deterministic input layer that feeds skill-scanner-agent: +// content-extractor + entropy scanner against the hyperframes-like fixture. +// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1 +// (Step 2.5 context-first severity, Suppressed Signals section, +// finding-body forbidden-phrase contract) are covered here only at the +// input/contract level — agent output testing has no precedent in this +// codebase and is intentionally out of scope. + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { readFile, mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { discoverFiles } from '../../scanners/lib/file-discovery.mjs'; +import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs'; +import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs'; + +const execFileP = promisify(execFile); +const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like'); +const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs'); + +describe('skill-scanner narrative coherence — v7.1.1', () => { + let tmpDir; + let extractorJsonPath; + let extractorResult; + + before(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-')); + extractorJsonPath = join(tmpDir, 'evidence.json'); + await execFileP('node', [ + CONTENT_EXTRACTOR, + FIXTURE, + '--output-file', + extractorJsonPath, + ]); + extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8')); + }); + + after(async () => { + await rm(tmpDir, { recursive: true, force: true }); + }); + + describe('content-extractor against hyperframes-like fixture', () => { + it('exits 0 (no extractor crash on animation markup)', () => { + assert.ok(extractorResult, 'extractor produced output'); + }); + + it('detects exactly one HIGH HITL trap signal', () => { + const highSignals = (extractorResult.injection_findings || []) + .filter((f) => f.severity === 'high'); + assert.equal( + highSignals.length, + 1, + `expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` + + JSON.stringify(highSignals.map((s) => s.label || s.pattern)), + ); + }); + + it('detects framework env-var references (process-env-access >= 2)', () => { + const envRefs = (extractorResult.credential_references || []).filter( + (r) => r.label === 'process-env-access', + ); + assert.ok( + envRefs.length >= 2, + `expected >= 2 process.env credential refs, got ${envRefs.length}`, + ); + }); + + it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => { + // content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0` + // (any severity); has_critical_injection is the critical-only counter. + // The HITL trap is a HIGH signal so has_injection flips true. + assert.equal( + extractorResult.deterministic_verdict?.has_injection, + true, + 'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)', + ); + }); + + it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => { + // The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only + // one HIGH HITL trap. has_critical_injection must be false. + assert.equal( + extractorResult.deterministic_verdict?.has_critical_injection, + false, + 'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals', + ); + }); + }); + + describe('entropy scanner against hyperframes-like fixture', () => { + let entropyResult; + + before(async () => { + const discovery = await discoverFiles(FIXTURE); + entropyResult = await entropyScan(FIXTURE, discovery); + }); + + it('produces a calibration block (suppression telemetry present)', () => { + assert.ok( + entropyResult.calibration, + 'entropy scanner must surface a calibration block (v7.0.0+)', + ); + }); + + it('emits at most one entropy finding for the fixture', () => { + // Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed + // by the line-level rules added in v7.0.0. The fixture is designed so + // entropy noise is fully absorbed by suppression. + assert.ok( + entropyResult.findings.length <= 1, + `entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` + + JSON.stringify(entropyResult.findings.map((f) => f.title)), + ); + }); + }); + + describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => { + it('{ high: 1 } → verdict WARNING, band High', () => { + // Independently asserts the invariant the agent must preserve when it + // emits exactly 1 HIGH after suppressing context noise. Mirrors the + // sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so + // the narrative-coherence file fails fast if the invariant drifts. + const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 }; + const score = riskScore(counts); + assert.equal(verdict(counts), 'WARNING'); + assert.equal(riskBand(score), 'High'); + }); + + it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => { + const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 }; + assert.equal(verdict(counts), 'WARNING'); + }); + }); + + describe('agent prompt contract (static asserts on prompt + template files)', () => { + // Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be + // tested without invoking the LLM, per D8). Verifies the prompt + // mandates context-first severity, the new field name, and v2 cutoffs. + + it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => { + const text = await readFile( + resolve('agents/skill-scanner-agent.md'), + 'utf8', + ); + assert.match(text, /Step 2\.5: Context-First Severity Assignment/); + assert.match(text, /summary\.narrative_audit\.suppressed_findings/); + assert.match(text, /score>=65/); + assert.doesNotMatch(text, /score >= 61/); + }); + + it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => { + const text = await readFile( + resolve('templates/unified-report.md'), + 'utf8', + ); + assert.match(text, /Narrative Audit/); + assert.match(text, /SUPPRESSED_FINDINGS_COUNT/); + assert.match(text, /WARNING if high>=1 OR score>=15/); + assert.doesNotMatch(text, /score >= 61/); + }); + }); +});