test(llm-security): narrative-coherence contract test (v7.1.1)
11 assertions across 4 describe groups against tests/fixtures/skill-scan/
hyperframes-like/. Tests the deterministic input layer that feeds
skill-scanner-agent — does NOT invoke the LLM (no precedent in 1511 tests).
Coverage:
- content-extractor (5 it): exit 0 on animation markup; exactly 1 HIGH
HITL trap; >= 2 process.env credential refs; has_injection=true (any
injection signal flips it); has_critical_injection=false (no CRITICAL
in fixture).
- entropy scanner (2 it): calibration block present; <= 1 finding (rest
suppressed via line-context rules).
- co-monotonicity (2 it): {high:1} → WARNING/High; {high:1, info:1} →
WARNING (info scoring-inert). Inline guard mirrors the sweep at
tests/lib/severity.test.mjs:252-303 so this file fails fast if the
invariant drifts.
- agent prompt contract (2 it): static asserts that
agents/skill-scanner-agent.md contains 'Step 2.5: Context-First
Severity Assignment', 'summary.narrative_audit.suppressed_findings',
'score>=65', AND zero remaining 'score >= 61' references; same v2-
cutoff + narrative-audit contract on templates/unified-report.md.
Part of v7.1.1 narrative-coherence patch.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
3abd7ffeab
commit
5cfbc70472
1 changed files with 167 additions and 0 deletions
|
|
@ -0,0 +1,167 @@
|
|||
// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1)
|
||||
//
|
||||
// Tests the deterministic input layer that feeds skill-scanner-agent:
|
||||
// content-extractor + entropy scanner against the hyperframes-like fixture.
|
||||
// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1
|
||||
// (Step 2.5 context-first severity, Suppressed Signals section,
|
||||
// finding-body forbidden-phrase contract) are covered here only at the
|
||||
// input/contract level — agent output testing has no precedent in this
|
||||
// codebase and is intentionally out of scope.
|
||||
|
||||
import { describe, it, before, after } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import { readFile, mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join, resolve } from 'node:path';
|
||||
import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
|
||||
import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs';
|
||||
import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs';
|
||||
|
||||
const execFileP = promisify(execFile);
|
||||
const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like');
|
||||
const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs');
|
||||
|
||||
describe('skill-scanner narrative coherence — v7.1.1', () => {
|
||||
let tmpDir;
|
||||
let extractorJsonPath;
|
||||
let extractorResult;
|
||||
|
||||
before(async () => {
|
||||
tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-'));
|
||||
extractorJsonPath = join(tmpDir, 'evidence.json');
|
||||
await execFileP('node', [
|
||||
CONTENT_EXTRACTOR,
|
||||
FIXTURE,
|
||||
'--output-file',
|
||||
extractorJsonPath,
|
||||
]);
|
||||
extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8'));
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('content-extractor against hyperframes-like fixture', () => {
|
||||
it('exits 0 (no extractor crash on animation markup)', () => {
|
||||
assert.ok(extractorResult, 'extractor produced output');
|
||||
});
|
||||
|
||||
it('detects exactly one HIGH HITL trap signal', () => {
|
||||
const highSignals = (extractorResult.injection_findings || [])
|
||||
.filter((f) => f.severity === 'high');
|
||||
assert.equal(
|
||||
highSignals.length,
|
||||
1,
|
||||
`expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` +
|
||||
JSON.stringify(highSignals.map((s) => s.label || s.pattern)),
|
||||
);
|
||||
});
|
||||
|
||||
it('detects framework env-var references (process-env-access >= 2)', () => {
|
||||
const envRefs = (extractorResult.credential_references || []).filter(
|
||||
(r) => r.label === 'process-env-access',
|
||||
);
|
||||
assert.ok(
|
||||
envRefs.length >= 2,
|
||||
`expected >= 2 process.env credential refs, got ${envRefs.length}`,
|
||||
);
|
||||
});
|
||||
|
||||
it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => {
|
||||
// content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0`
|
||||
// (any severity); has_critical_injection is the critical-only counter.
|
||||
// The HITL trap is a HIGH signal so has_injection flips true.
|
||||
assert.equal(
|
||||
extractorResult.deterministic_verdict?.has_injection,
|
||||
true,
|
||||
'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)',
|
||||
);
|
||||
});
|
||||
|
||||
it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => {
|
||||
// The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only
|
||||
// one HIGH HITL trap. has_critical_injection must be false.
|
||||
assert.equal(
|
||||
extractorResult.deterministic_verdict?.has_critical_injection,
|
||||
false,
|
||||
'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('entropy scanner against hyperframes-like fixture', () => {
|
||||
let entropyResult;
|
||||
|
||||
before(async () => {
|
||||
const discovery = await discoverFiles(FIXTURE);
|
||||
entropyResult = await entropyScan(FIXTURE, discovery);
|
||||
});
|
||||
|
||||
it('produces a calibration block (suppression telemetry present)', () => {
|
||||
assert.ok(
|
||||
entropyResult.calibration,
|
||||
'entropy scanner must surface a calibration block (v7.0.0+)',
|
||||
);
|
||||
});
|
||||
|
||||
it('emits at most one entropy finding for the fixture', () => {
|
||||
// Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed
|
||||
// by the line-level rules added in v7.0.0. The fixture is designed so
|
||||
// entropy noise is fully absorbed by suppression.
|
||||
assert.ok(
|
||||
entropyResult.findings.length <= 1,
|
||||
`entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` +
|
||||
JSON.stringify(entropyResult.findings.map((f) => f.title)),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => {
|
||||
it('{ high: 1 } → verdict WARNING, band High', () => {
|
||||
// Independently asserts the invariant the agent must preserve when it
|
||||
// emits exactly 1 HIGH after suppressing context noise. Mirrors the
|
||||
// sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so
|
||||
// the narrative-coherence file fails fast if the invariant drifts.
|
||||
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 };
|
||||
const score = riskScore(counts);
|
||||
assert.equal(verdict(counts), 'WARNING');
|
||||
assert.equal(riskBand(score), 'High');
|
||||
});
|
||||
|
||||
it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => {
|
||||
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 };
|
||||
assert.equal(verdict(counts), 'WARNING');
|
||||
});
|
||||
});
|
||||
|
||||
describe('agent prompt contract (static asserts on prompt + template files)', () => {
|
||||
// Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be
|
||||
// tested without invoking the LLM, per D8). Verifies the prompt
|
||||
// mandates context-first severity, the new field name, and v2 cutoffs.
|
||||
|
||||
it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => {
|
||||
const text = await readFile(
|
||||
resolve('agents/skill-scanner-agent.md'),
|
||||
'utf8',
|
||||
);
|
||||
assert.match(text, /Step 2\.5: Context-First Severity Assignment/);
|
||||
assert.match(text, /summary\.narrative_audit\.suppressed_findings/);
|
||||
assert.match(text, /score>=65/);
|
||||
assert.doesNotMatch(text, /score >= 61/);
|
||||
});
|
||||
|
||||
it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => {
|
||||
const text = await readFile(
|
||||
resolve('templates/unified-report.md'),
|
||||
'utf8',
|
||||
);
|
||||
assert.match(text, /Narrative Audit/);
|
||||
assert.match(text, /SUPPRESSED_FINDINGS_COUNT/);
|
||||
assert.match(text, /WARNING if high>=1 OR score>=15/);
|
||||
assert.doesNotMatch(text, /score >= 61/);
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue