11 assertions across 4 describe groups against tests/fixtures/skill-scan/
hyperframes-like/. Tests the deterministic input layer that feeds
skill-scanner-agent — does NOT invoke the LLM (no precedent in 1511 tests).
Coverage:
- content-extractor (5 it): exit 0 on animation markup; exactly 1 HIGH
HITL trap; >= 2 process.env credential refs; has_injection=true (any
injection signal flips it); has_critical_injection=false (no CRITICAL
in fixture).
- entropy scanner (2 it): calibration block present; <= 1 finding (rest
suppressed via line-context rules).
- co-monotonicity (2 it): {high:1} → WARNING/High; {high:1, info:1} →
WARNING (info scoring-inert). Inline guard mirrors the sweep at
tests/lib/severity.test.mjs:252-303 so this file fails fast if the
invariant drifts.
- agent prompt contract (2 it): static asserts that
agents/skill-scanner-agent.md contains 'Step 2.5: Context-First
Severity Assignment', 'summary.narrative_audit.suppressed_findings',
'score>=65', AND zero remaining 'score >= 61' references; same v2-
cutoff + narrative-audit contract on templates/unified-report.md.
Part of v7.1.1 narrative-coherence patch.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
167 lines
6.7 KiB
JavaScript
167 lines
6.7 KiB
JavaScript
// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1)
|
|
//
|
|
// Tests the deterministic input layer that feeds skill-scanner-agent:
|
|
// content-extractor + entropy scanner against the hyperframes-like fixture.
|
|
// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1
|
|
// (Step 2.5 context-first severity, Suppressed Signals section,
|
|
// finding-body forbidden-phrase contract) are covered here only at the
|
|
// input/contract level — agent output testing has no precedent in this
|
|
// codebase and is intentionally out of scope.
|
|
|
|
import { describe, it, before, after } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { execFile } from 'node:child_process';
|
|
import { promisify } from 'node:util';
|
|
import { readFile, mkdtemp, rm } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join, resolve } from 'node:path';
|
|
import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
|
|
import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs';
|
|
import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs';
|
|
|
|
const execFileP = promisify(execFile);
|
|
const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like');
|
|
const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs');
|
|
|
|
describe('skill-scanner narrative coherence — v7.1.1', () => {
|
|
let tmpDir;
|
|
let extractorJsonPath;
|
|
let extractorResult;
|
|
|
|
before(async () => {
|
|
tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-'));
|
|
extractorJsonPath = join(tmpDir, 'evidence.json');
|
|
await execFileP('node', [
|
|
CONTENT_EXTRACTOR,
|
|
FIXTURE,
|
|
'--output-file',
|
|
extractorJsonPath,
|
|
]);
|
|
extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8'));
|
|
});
|
|
|
|
after(async () => {
|
|
await rm(tmpDir, { recursive: true, force: true });
|
|
});
|
|
|
|
describe('content-extractor against hyperframes-like fixture', () => {
|
|
it('exits 0 (no extractor crash on animation markup)', () => {
|
|
assert.ok(extractorResult, 'extractor produced output');
|
|
});
|
|
|
|
it('detects exactly one HIGH HITL trap signal', () => {
|
|
const highSignals = (extractorResult.injection_findings || [])
|
|
.filter((f) => f.severity === 'high');
|
|
assert.equal(
|
|
highSignals.length,
|
|
1,
|
|
`expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` +
|
|
JSON.stringify(highSignals.map((s) => s.label || s.pattern)),
|
|
);
|
|
});
|
|
|
|
it('detects framework env-var references (process-env-access >= 2)', () => {
|
|
const envRefs = (extractorResult.credential_references || []).filter(
|
|
(r) => r.label === 'process-env-access',
|
|
);
|
|
assert.ok(
|
|
envRefs.length >= 2,
|
|
`expected >= 2 process.env credential refs, got ${envRefs.length}`,
|
|
);
|
|
});
|
|
|
|
it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => {
|
|
// content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0`
|
|
// (any severity); has_critical_injection is the critical-only counter.
|
|
// The HITL trap is a HIGH signal so has_injection flips true.
|
|
assert.equal(
|
|
extractorResult.deterministic_verdict?.has_injection,
|
|
true,
|
|
'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)',
|
|
);
|
|
});
|
|
|
|
it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => {
|
|
// The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only
|
|
// one HIGH HITL trap. has_critical_injection must be false.
|
|
assert.equal(
|
|
extractorResult.deterministic_verdict?.has_critical_injection,
|
|
false,
|
|
'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals',
|
|
);
|
|
});
|
|
});
|
|
|
|
describe('entropy scanner against hyperframes-like fixture', () => {
|
|
let entropyResult;
|
|
|
|
before(async () => {
|
|
const discovery = await discoverFiles(FIXTURE);
|
|
entropyResult = await entropyScan(FIXTURE, discovery);
|
|
});
|
|
|
|
it('produces a calibration block (suppression telemetry present)', () => {
|
|
assert.ok(
|
|
entropyResult.calibration,
|
|
'entropy scanner must surface a calibration block (v7.0.0+)',
|
|
);
|
|
});
|
|
|
|
it('emits at most one entropy finding for the fixture', () => {
|
|
// Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed
|
|
// by the line-level rules added in v7.0.0. The fixture is designed so
|
|
// entropy noise is fully absorbed by suppression.
|
|
assert.ok(
|
|
entropyResult.findings.length <= 1,
|
|
`entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` +
|
|
JSON.stringify(entropyResult.findings.map((f) => f.title)),
|
|
);
|
|
});
|
|
});
|
|
|
|
describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => {
|
|
it('{ high: 1 } → verdict WARNING, band High', () => {
|
|
// Independently asserts the invariant the agent must preserve when it
|
|
// emits exactly 1 HIGH after suppressing context noise. Mirrors the
|
|
// sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so
|
|
// the narrative-coherence file fails fast if the invariant drifts.
|
|
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 };
|
|
const score = riskScore(counts);
|
|
assert.equal(verdict(counts), 'WARNING');
|
|
assert.equal(riskBand(score), 'High');
|
|
});
|
|
|
|
it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => {
|
|
const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 };
|
|
assert.equal(verdict(counts), 'WARNING');
|
|
});
|
|
});
|
|
|
|
describe('agent prompt contract (static asserts on prompt + template files)', () => {
|
|
// Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be
|
|
// tested without invoking the LLM, per D8). Verifies the prompt
|
|
// mandates context-first severity, the new field name, and v2 cutoffs.
|
|
|
|
it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => {
|
|
const text = await readFile(
|
|
resolve('agents/skill-scanner-agent.md'),
|
|
'utf8',
|
|
);
|
|
assert.match(text, /Step 2\.5: Context-First Severity Assignment/);
|
|
assert.match(text, /summary\.narrative_audit\.suppressed_findings/);
|
|
assert.match(text, /score>=65/);
|
|
assert.doesNotMatch(text, /score >= 61/);
|
|
});
|
|
|
|
it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => {
|
|
const text = await readFile(
|
|
resolve('templates/unified-report.md'),
|
|
'utf8',
|
|
);
|
|
assert.match(text, /Narrative Audit/);
|
|
assert.match(text, /SUPPRESSED_FINDINGS_COUNT/);
|
|
assert.match(text, /WARNING if high>=1 OR score>=15/);
|
|
assert.doesNotMatch(text, /score >= 61/);
|
|
});
|
|
});
|
|
});
|