ktg-plugin-marketplace/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs
Kjell Tore Guttormsen 5cfbc70472 test(llm-security): narrative-coherence contract test (v7.1.1)
11 assertions across 4 describe groups against tests/fixtures/skill-scan/
hyperframes-like/. Tests the deterministic input layer that feeds
skill-scanner-agent — does NOT invoke the LLM (no precedent in 1511 tests).

Coverage:
- content-extractor (5 it): exit 0 on animation markup; exactly 1 HIGH
  HITL trap; >= 2 process.env credential refs; has_injection=true (any
  injection signal flips it); has_critical_injection=false (no CRITICAL
  in fixture).
- entropy scanner (2 it): calibration block present; <= 1 finding (rest
  suppressed via line-context rules).
- co-monotonicity (2 it): {high:1} → WARNING/High; {high:1, info:1} →
  WARNING (info scoring-inert). Inline guard mirrors the sweep at
  tests/lib/severity.test.mjs:252-303 so this file fails fast if the
  invariant drifts.
- agent prompt contract (2 it): static asserts that
  agents/skill-scanner-agent.md contains 'Step 2.5: Context-First
  Severity Assignment', 'summary.narrative_audit.suppressed_findings',
  'score>=65', AND zero remaining 'score >= 61' references; same v2-
  cutoff + narrative-audit contract on templates/unified-report.md.

Part of v7.1.1 narrative-coherence patch.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 12:50:27 +02:00

167 lines
6.7 KiB
JavaScript

// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1)
//
// Tests the deterministic input layer that feeds skill-scanner-agent:
// content-extractor + entropy scanner against the hyperframes-like fixture.
// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1
// (Step 2.5 context-first severity, Suppressed Signals section,
// finding-body forbidden-phrase contract) are covered here only at the
// input/contract level — agent output testing has no precedent in this
// codebase and is intentionally out of scope.
import { describe, it, before, after } from 'node:test';
import assert from 'node:assert/strict';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { readFile, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs';
import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs';
const execFileP = promisify(execFile);
const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like');
const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs');
describe('skill-scanner narrative coherence — v7.1.1', () => {
let tmpDir;
let extractorJsonPath;
let extractorResult;
before(async () => {
tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-'));
extractorJsonPath = join(tmpDir, 'evidence.json');
await execFileP('node', [
CONTENT_EXTRACTOR,
FIXTURE,
'--output-file',
extractorJsonPath,
]);
extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8'));
});
after(async () => {
await rm(tmpDir, { recursive: true, force: true });
});
describe('content-extractor against hyperframes-like fixture', () => {
it('exits 0 (no extractor crash on animation markup)', () => {
assert.ok(extractorResult, 'extractor produced output');
});
it('detects exactly one HIGH HITL trap signal', () => {
const highSignals = (extractorResult.injection_findings || [])
.filter((f) => f.severity === 'high');
assert.equal(
highSignals.length,
1,
`expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` +
JSON.stringify(highSignals.map((s) => s.label || s.pattern)),
);
});
it('detects framework env-var references (process-env-access >= 2)', () => {
const envRefs = (extractorResult.credential_references || []).filter(
(r) => r.label === 'process-env-access',
);
assert.ok(
envRefs.length >= 2,
`expected >= 2 process.env credential refs, got ${envRefs.length}`,
);
});
it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => {
// content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0`
// (any severity); has_critical_injection is the critical-only counter.
// The HITL trap is a HIGH signal so has_injection flips true.
assert.equal(
extractorResult.deterministic_verdict?.has_injection,
true,
'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)',
);
});
it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => {
// The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only
// one HIGH HITL trap. has_critical_injection must be false.
assert.equal(
extractorResult.deterministic_verdict?.has_critical_injection,
false,
'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals',
);
});
});
describe('entropy scanner against hyperframes-like fixture', () => {
let entropyResult;
before(async () => {
const discovery = await discoverFiles(FIXTURE);
entropyResult = await entropyScan(FIXTURE, discovery);
});
it('produces a calibration block (suppression telemetry present)', () => {
assert.ok(
entropyResult.calibration,
'entropy scanner must surface a calibration block (v7.0.0+)',
);
});
it('emits at most one entropy finding for the fixture', () => {
// Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed
// by the line-level rules added in v7.0.0. The fixture is designed so
// entropy noise is fully absorbed by suppression.
assert.ok(
entropyResult.findings.length <= 1,
`entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` +
JSON.stringify(entropyResult.findings.map((f) => f.title)),
);
});
});
describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => {
it('{ high: 1 } → verdict WARNING, band High', () => {
// Independently asserts the invariant the agent must preserve when it
// emits exactly 1 HIGH after suppressing context noise. Mirrors the
// sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so
// the narrative-coherence file fails fast if the invariant drifts.
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 };
const score = riskScore(counts);
assert.equal(verdict(counts), 'WARNING');
assert.equal(riskBand(score), 'High');
});
it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => {
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 };
assert.equal(verdict(counts), 'WARNING');
});
});
describe('agent prompt contract (static asserts on prompt + template files)', () => {
// Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be
// tested without invoking the LLM, per D8). Verifies the prompt
// mandates context-first severity, the new field name, and v2 cutoffs.
it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => {
const text = await readFile(
resolve('agents/skill-scanner-agent.md'),
'utf8',
);
assert.match(text, /Step 2\.5: Context-First Severity Assignment/);
assert.match(text, /summary\.narrative_audit\.suppressed_findings/);
assert.match(text, /score>=65/);
assert.doesNotMatch(text, /score >= 61/);
});
it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => {
const text = await readFile(
resolve('templates/unified-report.md'),
'utf8',
);
assert.match(text, /Narrative Audit/);
assert.match(text, /SUPPRESSED_FINDINGS_COUNT/);
assert.match(text, /WARNING if high>=1 OR score>=15/);
assert.doesNotMatch(text, /score >= 61/);
});
});
});