test(llm-security): narrative-coherence contract test (v7.1.1)

11 assertions across 4 describe groups against tests/fixtures/skill-scan/ hyperframes-like/. Tests the deterministic input layer that feeds skill-scanner-agent — does NOT invoke the LLM (no precedent in 1511 tests). Coverage: - content-extractor (5 it): exit 0 on animation markup; exactly 1 HIGH HITL trap; >= 2 process.env credential refs; has_injection=true (any injection signal flips it); has_critical_injection=false (no CRITICAL in fixture). - entropy scanner (2 it): calibration block present; <= 1 finding (rest suppressed via line-context rules). - co-monotonicity (2 it): {high:1} → WARNING/High; {high:1, info:1} → WARNING (info scoring-inert). Inline guard mirrors the sweep at tests/lib/severity.test.mjs:252-303 so this file fails fast if the invariant drifts. - agent prompt contract (2 it): static asserts that agents/skill-scanner-agent.md contains 'Step 2.5: Context-First Severity Assignment', 'summary.narrative_audit.suppressed_findings', 'score>=65', AND zero remaining 'score >= 61' references; same v2- cutoff + narrative-audit contract on templates/unified-report.md. Part of v7.1.1 narrative-coherence patch. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 12:50:27 +02:00 · 2026-04-29 12:50:27 +02:00 · 5cfbc70472
commit 5cfbc70472
parent 3abd7ffeab
1 changed files with 167 additions and 0 deletions
--- a/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs
+++ b/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs
@ -0,0 +1,167 @@
+// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1)
+//
+// Tests the deterministic input layer that feeds skill-scanner-agent:
+// content-extractor + entropy scanner against the hyperframes-like fixture.
+// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1
+// (Step 2.5 context-first severity, Suppressed Signals section,
+// finding-body forbidden-phrase contract) are covered here only at the
+// input/contract level — agent output testing has no precedent in this
+// codebase and is intentionally out of scope.
+
+import { describe, it, before, after } from 'node:test';
+import assert from 'node:assert/strict';
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { readFile, mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join, resolve } from 'node:path';
+import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
+import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs';
+import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs';
+
+const execFileP = promisify(execFile);
+const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like');
+const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs');
+
+describe('skill-scanner narrative coherence — v7.1.1', () => {
+  let tmpDir;
+  let extractorJsonPath;
+  let extractorResult;
+
+  before(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-'));
+    extractorJsonPath = join(tmpDir, 'evidence.json');
+    await execFileP('node', [
+      CONTENT_EXTRACTOR,
+      FIXTURE,
+      '--output-file',
+      extractorJsonPath,
+    ]);
+    extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8'));
+  });
+
+  after(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  describe('content-extractor against hyperframes-like fixture', () => {
+    it('exits 0 (no extractor crash on animation markup)', () => {
+      assert.ok(extractorResult, 'extractor produced output');
+    });
+
+    it('detects exactly one HIGH HITL trap signal', () => {
+      const highSignals = (extractorResult.injection_findings || [])
+        .filter((f) => f.severity === 'high');
+      assert.equal(
+        highSignals.length,
+        1,
+        `expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` +
+          JSON.stringify(highSignals.map((s) => s.label || s.pattern)),
+      );
+    });
+
+    it('detects framework env-var references (process-env-access >= 2)', () => {
+      const envRefs = (extractorResult.credential_references || []).filter(
+        (r) => r.label === 'process-env-access',
+      );
+      assert.ok(
+        envRefs.length >= 2,
+        `expected >= 2 process.env credential refs, got ${envRefs.length}`,
+      );
+    });
+
+    it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => {
+      // content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0`
+      // (any severity); has_critical_injection is the critical-only counter.
+      // The HITL trap is a HIGH signal so has_injection flips true.
+      assert.equal(
+        extractorResult.deterministic_verdict?.has_injection,
+        true,
+        'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)',
+      );
+    });
+
+    it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => {
+      // The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only
+      // one HIGH HITL trap. has_critical_injection must be false.
+      assert.equal(
+        extractorResult.deterministic_verdict?.has_critical_injection,
+        false,
+        'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals',
+      );
+    });
+  });
+
+  describe('entropy scanner against hyperframes-like fixture', () => {
+    let entropyResult;
+
+    before(async () => {
+      const discovery = await discoverFiles(FIXTURE);
+      entropyResult = await entropyScan(FIXTURE, discovery);
+    });
+
+    it('produces a calibration block (suppression telemetry present)', () => {
+      assert.ok(
+        entropyResult.calibration,
+        'entropy scanner must surface a calibration block (v7.0.0+)',
+      );
+    });
+
+    it('emits at most one entropy finding for the fixture', () => {
+      // Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed
+      // by the line-level rules added in v7.0.0. The fixture is designed so
+      // entropy noise is fully absorbed by suppression.
+      assert.ok(
+        entropyResult.findings.length <= 1,
+        `entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` +
+          JSON.stringify(entropyResult.findings.map((f) => f.title)),
+      );
+    });
+  });
+
+  describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => {
+    it('{ high: 1 } → verdict WARNING, band High', () => {
+      // Independently asserts the invariant the agent must preserve when it
+      // emits exactly 1 HIGH after suppressing context noise. Mirrors the
+      // sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so
+      // the narrative-coherence file fails fast if the invariant drifts.
+      const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 };
+      const score = riskScore(counts);
+      assert.equal(verdict(counts), 'WARNING');
+      assert.equal(riskBand(score), 'High');
+    });
+
+    it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => {
+      const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 };
+      assert.equal(verdict(counts), 'WARNING');
+    });
+  });
+
+  describe('agent prompt contract (static asserts on prompt + template files)', () => {
+    // Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be
+    // tested without invoking the LLM, per D8). Verifies the prompt
+    // mandates context-first severity, the new field name, and v2 cutoffs.
+
+    it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => {
+      const text = await readFile(
+        resolve('agents/skill-scanner-agent.md'),
+        'utf8',
+      );
+      assert.match(text, /Step 2\.5: Context-First Severity Assignment/);
+      assert.match(text, /summary\.narrative_audit\.suppressed_findings/);
+      assert.match(text, /score>=65/);
+      assert.doesNotMatch(text, /score >= 61/);
+    });
+
+    it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => {
+      const text = await readFile(
+        resolve('templates/unified-report.md'),
+        'utf8',
+      );
+      assert.match(text, /Narrative Audit/);
+      assert.match(text, /SUPPRESSED_FINDINGS_COUNT/);
+      assert.match(text, /WARNING if high>=1 OR score>=15/);
+      assert.doesNotMatch(text, /score >= 61/);
+    });
+  });
+});