ktg-plugin-marketplace/plugins/llm-security/tests/scanners/skill-scanner-narrative.test.mjs

// skill-scanner-narrative.test.mjs — narrative-coherence contract test (v7.1.1)
//
// Tests the deterministic input layer that feeds skill-scanner-agent:
// content-extractor + entropy scanner against the hyperframes-like fixture.
// Does NOT invoke the LLM agent. The agent prompt rules added in v7.1.1
// (Step 2.5 context-first severity, Suppressed Signals section,
// finding-body forbidden-phrase contract) are covered here only at the
// input/contract level — agent output testing has no precedent in this
// codebase and is intentionally out of scope.

import { describe, it, before, after } from 'node:test';
import assert from 'node:assert/strict';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { readFile, mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
import { scan as entropyScan } from '../../scanners/entropy-scanner.mjs';
import { riskScore, verdict, riskBand } from '../../scanners/lib/severity.mjs';

const execFileP = promisify(execFile);
const FIXTURE = resolve('tests/fixtures/skill-scan/hyperframes-like');
const CONTENT_EXTRACTOR = resolve('scanners/content-extractor.mjs');

describe('skill-scanner narrative coherence — v7.1.1', () => {
  let tmpDir;
  let extractorJsonPath;
  let extractorResult;

  before(async () => {
    tmpDir = await mkdtemp(join(tmpdir(), 'narrative-test-'));
    extractorJsonPath = join(tmpDir, 'evidence.json');
    await execFileP('node', [
      CONTENT_EXTRACTOR,
      FIXTURE,
      '--output-file',
      extractorJsonPath,
    ]);
    extractorResult = JSON.parse(await readFile(extractorJsonPath, 'utf8'));
  });

  after(async () => {
    await rm(tmpDir, { recursive: true, force: true });
  });

  describe('content-extractor against hyperframes-like fixture', () => {
    it('exits 0 (no extractor crash on animation markup)', () => {
      assert.ok(extractorResult, 'extractor produced output');
    });

    it('detects exactly one HIGH HITL trap signal', () => {
      const highSignals = (extractorResult.injection_findings || [])
        .filter((f) => f.severity === 'high');
      assert.equal(
        highSignals.length,
        1,
        `expected 1 HIGH injection signal (HITL trap), got ${highSignals.length}: ` +
          JSON.stringify(highSignals.map((s) => s.label || s.pattern)),
      );
    });

    it('detects framework env-var references (process-env-access >= 2)', () => {
      const envRefs = (extractorResult.credential_references || []).filter(
        (r) => r.label === 'process-env-access',
      );
      assert.ok(
        envRefs.length >= 2,
        `expected >= 2 process.env credential refs, got ${envRefs.length}`,
      );
    });

    it('deterministic_verdict.has_injection is TRUE (any injection signal flips it)', () => {
      // content-extractor.mjs:392 — has_injection is `injectionFindings.length > 0`
      // (any severity); has_critical_injection is the critical-only counter.
      // The HITL trap is a HIGH signal so has_injection flips true.
      assert.equal(
        extractorResult.deterministic_verdict?.has_injection,
        true,
        'has_injection must flip true when ANY injection signal exists (HIGH HITL trap qualifies)',
      );
    });

    it('deterministic_verdict.has_critical_injection is FALSE (no CRITICAL signals in fixture)', () => {
      // The fixture is designed to have ZERO CRITICAL_PATTERNS hits — only
      // one HIGH HITL trap. has_critical_injection must be false.
      assert.equal(
        extractorResult.deterministic_verdict?.has_critical_injection,
        false,
        'has_critical_injection must stay false when fixture has only HIGH/MEDIUM signals',
      );
    });
  });

  describe('entropy scanner against hyperframes-like fixture', () => {
    let entropyResult;

    before(async () => {
      const discovery = await discoverFiles(FIXTURE);
      entropyResult = await entropyScan(FIXTURE, discovery);
    });

    it('produces a calibration block (suppression telemetry present)', () => {
      assert.ok(
        entropyResult.calibration,
        'entropy scanner must surface a calibration block (v7.0.0+)',
      );
    });

    it('emits at most one entropy finding for the fixture', () => {
      // Animation markup, CSS-in-JS, and inline SVG data URIs are suppressed
      // by the line-level rules added in v7.0.0. The fixture is designed so
      // entropy noise is fully absorbed by suppression.
      assert.ok(
        entropyResult.findings.length <= 1,
        `entropy.findings.length should be <= 1; got ${entropyResult.findings.length}: ` +
          JSON.stringify(entropyResult.findings.map((f) => f.title)),
      );
    });
  });

  describe('co-monotonicity invariants (1 high) — narrative-coherence guard', () => {
    it('{ high: 1 } → verdict WARNING, band High', () => {
      // Independently asserts the invariant the agent must preserve when it
      // emits exactly 1 HIGH after suppressing context noise. Mirrors the
      // sweep in tests/lib/severity.test.mjs:255-303 — duplicated here so
      // the narrative-coherence file fails fast if the invariant drifts.
      const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 0 };
      const score = riskScore(counts);
      assert.equal(verdict(counts), 'WARNING');
      assert.equal(riskBand(score), 'High');
    });

    it('{ high: 1, info: 1 } → verdict still WARNING (info is scoring-inert)', () => {
      const counts = { critical: 0, high: 1, medium: 0, low: 0, info: 1 };
      assert.equal(verdict(counts), 'WARNING');
    });
  });

  describe('agent prompt contract (static asserts on prompt + template files)', () => {
    // Spec-level coverage of brief SC#2 and SC#3 (agent output cannot be
    // tested without invoking the LLM, per D8). Verifies the prompt
    // mandates context-first severity, the new field name, and v2 cutoffs.

    it('skill-scanner-agent.md mandates context-first severity (Step 2.5)', async () => {
      const text = await readFile(
        resolve('agents/skill-scanner-agent.md'),
        'utf8',
      );
      assert.match(text, /Step 2\.5: Context-First Severity Assignment/);
      assert.match(text, /summary\.narrative_audit\.suppressed_findings/);
      assert.match(text, /score>=65/);
      assert.doesNotMatch(text, /score >= 61/);
    });

    it('unified-report.md uses v2 cutoffs and exposes Narrative Audit', async () => {
      const text = await readFile(
        resolve('templates/unified-report.md'),
        'utf8',
      );
      assert.match(text, /Narrative Audit/);
      assert.match(text, /SUPPRESSED_FINDINGS_COUNT/);
      assert.match(text, /WARNING if high>=1 OR score>=15/);
      assert.doesNotMatch(text, /score >= 61/);
    });
  });
});