ktg-plugin-marketplace/plugins/llm-security/examples/toxic-agent-demo/run-toxic-flow.mjs
Kjell Tore Guttormsen 92fb0087fa feat(llm-security): add toxic-agent-demo example for TFA scanner [skip-docs]
Single-component lethal-trifecta walkthrough that drives
scanners/toxic-flow-analyzer.mjs against a deliberately
misconfigured fixture plugin. The fixture agent declares
tools: [Bash, Read, WebFetch], which alone covers all three
trifecta legs (input surface + data access + exfil sink). No
hooks/hooks.json is shipped, so TFA's mitigation logic finds
no active guards and emits a CRITICAL "Lethal trifecta:"
finding without downgrade.

Plugin marker is plugin.fixture.json (recognised by isPlugin())
rather than .claude-plugin/plugin.json — the latter is blocked
by the plugin's own pre-write-pathguard hook, and
plugin.fixture.json exists in isPlugin() specifically so
example fixtures can self-mark without touching guarded paths.

Three independent assertions (3/3 must pass): direct trifecta
present and CRITICAL; finding mentions the exfil-helper
component; description confirms "no hook guards detected"
(proves the mitigation path stayed inactive). expected-findings.md
documents the contract.

OWASP / framework mapping: ASI01, ASI02, ASI05, LLM01, LLM02, LLM06.

Docs updated: plugin README "Other runnable examples", plugin
CLAUDE.md "Examples" tabellen, CHANGELOG [Unreleased] Added.
[skip-docs] is appropriate because examples don't change what
the plugin "synes å dekke utad" — marketplace root README is
unaffected.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-05 15:15:04 +02:00

124 lines
4.7 KiB
JavaScript

#!/usr/bin/env node
// run-toxic-flow.mjs — Toxic-flow analyzer (TFA) walkthrough
// Drives scanners/toxic-flow-analyzer.mjs against a deliberately
// misconfigured plugin fixture and verifies that the lethal-trifecta
// detector emits at least one CRITICAL finding for the single-component
// trifecta planted in fixture/agents/exfil-helper.fixture.md.
//
// TFA is the only scanner in this plugin that operates at the
// component level (not the line/file level). Other scanners catch
// dangerous *content*; TFA catches dangerous *capability combinations*
// across a plugin's commands/agents/skills surface.
//
// Usage:
// cd plugins/llm-security
// node examples/toxic-agent-demo/run-toxic-flow.mjs
// node examples/toxic-agent-demo/run-toxic-flow.mjs --verbose
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const PLUGIN_ROOT = resolve(__dirname, '../..');
const FIXTURE = resolve(__dirname, 'fixture');
const VERBOSE = process.argv.includes('--verbose');
const { discoverFiles } = await import(resolve(PLUGIN_ROOT, 'scanners/lib/file-discovery.mjs'));
const { scan } = await import(resolve(PLUGIN_ROOT, 'scanners/toxic-flow-analyzer.mjs'));
console.log('TOXIC-FLOW ANALYZER (TFA) WALKTHROUGH');
console.log('=====================================\n');
console.log(`Fixture: ${FIXTURE}`);
console.log('Component in scope:');
console.log(' - agents/exfil-helper.fixture.md (tools: [Bash, Read, WebFetch])');
console.log('Plugin marker: plugin.fixture.json (recognised by isPlugin())');
console.log('Hook guards: none (no hooks/hooks.json) — keeps trifecta at CRITICAL\n');
const discovery = await discoverFiles(FIXTURE);
const result = await scan(FIXTURE, discovery, {});
const findings = result.findings || [];
const directTrifectas = findings.filter(f =>
typeof f.title === 'string' && f.title.startsWith('Lethal trifecta:')
);
const crossTrifectas = findings.filter(f =>
typeof f.title === 'string' && f.title.startsWith('Cross-component')
);
const projectLevel = findings.filter(f =>
typeof f.title === 'string' && f.title.startsWith('Project-level trifecta')
);
const expectations = [
{
label: 'Direct trifecta — single component covers all 3 legs',
bucket: directTrifectas,
minCount: 1,
expectSeverity: 'critical',
},
{
label: 'Trifecta finding mentions exfil-helper component',
bucket: directTrifectas.filter(f =>
typeof f.title === 'string' && /exfil-helper/i.test(f.title)
),
minCount: 1,
},
{
label: 'No mitigation — guards line is "No hook guards detected"',
bucket: directTrifectas.filter(f =>
typeof f.description === 'string' &&
/no hook guards detected/i.test(f.description)
),
minCount: 1,
},
];
let pass = 0;
let fail = 0;
for (const exp of expectations) {
const ok = exp.bucket.length >= exp.minCount &&
(!exp.expectSeverity || exp.bucket.some(f =>
String(f.severity || '').toLowerCase() === exp.expectSeverity
));
if (ok) pass++; else fail++;
console.log(`[${ok ? 'PASS' : 'FAIL'}] ${exp.label}`);
console.log(` findings: ${exp.bucket.length} (need >= ${exp.minCount})`);
if (exp.expectSeverity) {
console.log(` expected severity: ${exp.expectSeverity}`);
}
for (const f of exp.bucket.slice(0, 1)) {
const sev = String(f.severity || '').toUpperCase().padEnd(8);
const title = (f.title || '').slice(0, 90);
console.log(` ${sev} ${title}`);
}
console.log();
}
console.log(`Total TFA findings: ${findings.length}`);
console.log(` direct trifectas: ${directTrifectas.length}`);
console.log(` cross-component: ${crossTrifectas.length}`);
console.log(` project-level fallback: ${projectLevel.length}`);
console.log(`Files scanned (components): ${result.files_scanned ?? '?'}`);
console.log(`Scanner status: ${result.status}`);
if (VERBOSE) {
console.log('\nFull findings list:');
for (const f of findings) {
const sev = String(f.severity || '').toUpperCase().padEnd(8);
console.log(` ${sev} [${f.file || '-'}] ${(f.title || '').slice(0, 110)}`);
if (f.evidence) console.log(` evidence: ${String(f.evidence).slice(0, 150)}`);
}
}
console.log('\n---');
console.log(`Result: ${pass} pass, ${fail} fail`);
if (fail > 0) {
console.log('\nFAILURE — TFA did not emit the expected single-component trifecta.');
console.log('Inspect verbose output (--verbose) to see what was actually returned.');
process.exit(1);
}
console.log('\nSUCCESS — TFA flagged the planted lethal trifecta as CRITICAL.');
console.log('Read examples/toxic-agent-demo/README.md for the OWASP / framework mapping.');
process.exit(0);