204 lines
6.8 KiB
JavaScript
204 lines
6.8 KiB
JavaScript
#!/usr/bin/env node
|
|
// run-showcase.mjs — Prompt Injection Detection Showcase
|
|
// Feeds payloads to llm-security hooks and reports detection results.
|
|
//
|
|
// Usage:
|
|
// node examples/prompt-injection-showcase/run-showcase.mjs
|
|
// node examples/prompt-injection-showcase/run-showcase.mjs --category "Bash Evasion"
|
|
// node examples/prompt-injection-showcase/run-showcase.mjs --verbose
|
|
|
|
import { execFile } from 'node:child_process';
|
|
import { readFileSync } from 'node:fs';
|
|
import { resolve, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const PLUGIN_ROOT = resolve(__dirname, '../..');
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Hook paths
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const HOOKS = {
|
|
'pre-prompt-inject-scan': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-prompt-inject-scan.mjs'),
|
|
'post-mcp-verify': resolve(PLUGIN_ROOT, 'hooks/scripts/post-mcp-verify.mjs'),
|
|
'pre-bash-destructive': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-bash-destructive.mjs'),
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Payload builders (match hook stdin protocol)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function buildInput(payload) {
|
|
switch (payload.hook) {
|
|
case 'pre-prompt-inject-scan':
|
|
return {
|
|
session_id: 'showcase',
|
|
message: { role: 'user', content: payload.payload },
|
|
};
|
|
case 'post-mcp-verify':
|
|
return {
|
|
tool_name: payload.inputTool || 'mcp__server__tool',
|
|
tool_input: {},
|
|
tool_output: payload.payload,
|
|
};
|
|
case 'pre-bash-destructive':
|
|
return {
|
|
tool_name: 'Bash',
|
|
tool_input: { command: payload.payload },
|
|
};
|
|
default:
|
|
throw new Error(`Unknown hook: ${payload.hook}`);
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Hook runner
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function runHook(scriptPath, input) {
|
|
return new Promise((resolve) => {
|
|
const child = execFile(
|
|
'node',
|
|
[scriptPath],
|
|
{ timeout: 5000 },
|
|
(err, stdout, stderr) => {
|
|
resolve({
|
|
code: child.exitCode ?? 1,
|
|
stdout: stdout || '',
|
|
stderr: stderr || '',
|
|
});
|
|
}
|
|
);
|
|
child.stdin.end(JSON.stringify(input));
|
|
});
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Result classification
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function classify(result) {
|
|
if (result.code === 2) return 'block';
|
|
if (result.code === 0 && result.stdout.trim()) {
|
|
try {
|
|
const parsed = JSON.parse(result.stdout);
|
|
if (parsed.systemMessage || parsed.decision) return 'advisory';
|
|
} catch { /* not JSON */ }
|
|
return 'advisory';
|
|
}
|
|
return 'allow';
|
|
}
|
|
|
|
function passed(expected, actual) {
|
|
return expected === actual;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Output formatting
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const BOLD = '\x1b[1m';
|
|
const GREEN = '\x1b[32m';
|
|
const RED = '\x1b[31m';
|
|
const YELLOW = '\x1b[33m';
|
|
const CYAN = '\x1b[36m';
|
|
const DIM = '\x1b[2m';
|
|
const RESET = '\x1b[0m';
|
|
|
|
function severityColor(severity) {
|
|
switch (severity) {
|
|
case 'CRITICAL': return RED;
|
|
case 'HIGH': return YELLOW;
|
|
case 'MEDIUM': return CYAN;
|
|
default: return DIM;
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const args = process.argv.slice(2);
|
|
const verbose = args.includes('--verbose');
|
|
const categoryFilter = args.includes('--category')
|
|
? args[args.indexOf('--category') + 1]
|
|
: null;
|
|
|
|
const payloads = JSON.parse(readFileSync(resolve(__dirname, 'payloads.json'), 'utf-8'));
|
|
const filtered = categoryFilter
|
|
? payloads.filter(p => p.category.toLowerCase().includes(categoryFilter.toLowerCase()))
|
|
: payloads;
|
|
|
|
console.log(`${BOLD}=== LLM Security — Prompt Injection Detection Showcase ===${RESET}`);
|
|
console.log(`Payloads: ${filtered.length}${categoryFilter ? ` (filtered: "${categoryFilter}")` : ''}`);
|
|
console.log(`Hooks: ${Object.keys(HOOKS).join(', ')}`);
|
|
console.log('');
|
|
|
|
let totalPass = 0;
|
|
let totalFail = 0;
|
|
let currentCategory = '';
|
|
const categoryStats = {};
|
|
|
|
for (const payload of filtered) {
|
|
// Category header
|
|
if (payload.category !== currentCategory) {
|
|
currentCategory = payload.category;
|
|
categoryStats[currentCategory] = { pass: 0, fail: 0 };
|
|
console.log(`${BOLD}--- ${currentCategory} ---${RESET}`);
|
|
}
|
|
|
|
const hookPath = HOOKS[payload.hook];
|
|
if (!hookPath) {
|
|
console.log(` ${RED}SKIP${RESET} ${payload.id}: unknown hook ${payload.hook}`);
|
|
continue;
|
|
}
|
|
|
|
const input = buildInput(payload);
|
|
const result = await runHook(hookPath, input);
|
|
const actual = classify(result);
|
|
const ok = passed(payload.expected, actual);
|
|
|
|
if (ok) {
|
|
totalPass++;
|
|
categoryStats[currentCategory].pass++;
|
|
const icon = payload.expected === 'block' ? 'BLOCKED' :
|
|
payload.expected === 'advisory' ? 'DETECTED' : 'CLEAN';
|
|
console.log(` ${GREEN}PASS${RESET} ${payload.id}: ${icon} — ${payload.description} ${DIM}(v${payload.since})${RESET}`);
|
|
} else {
|
|
totalFail++;
|
|
categoryStats[currentCategory].fail++;
|
|
console.log(` ${RED}FAIL${RESET} ${payload.id}: expected=${payload.expected} got=${actual} — ${payload.description}`);
|
|
}
|
|
|
|
if (verbose && (result.stderr || result.stdout.trim())) {
|
|
if (result.stderr) console.log(` ${DIM}stderr: ${result.stderr.trim().split('\n')[0]}${RESET}`);
|
|
if (result.stdout.trim()) console.log(` ${DIM}stdout: ${result.stdout.trim().split('\n')[0]}${RESET}`);
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Summary
|
|
// ---------------------------------------------------------------------------
|
|
|
|
console.log('');
|
|
console.log(`${BOLD}--- Summary by Category ---${RESET}`);
|
|
for (const [cat, stats] of Object.entries(categoryStats)) {
|
|
const status = stats.fail === 0 ? `${GREEN}ALL PASS${RESET}` : `${RED}${stats.fail} FAIL${RESET}`;
|
|
console.log(` ${cat}: ${stats.pass}/${stats.pass + stats.fail} ${status}`);
|
|
}
|
|
|
|
console.log('');
|
|
console.log(`${BOLD}--- Results ---${RESET}`);
|
|
console.log(` Passed: ${GREEN}${totalPass}${RESET}`);
|
|
console.log(` Failed: ${totalFail > 0 ? RED : GREEN}${totalFail}${RESET}`);
|
|
console.log(` Total: ${totalPass + totalFail}`);
|
|
console.log('');
|
|
|
|
if (totalFail === 0) {
|
|
console.log(`${GREEN}${BOLD}=== ALL PAYLOADS DETECTED AS EXPECTED ===${RESET}`);
|
|
process.exit(0);
|
|
} else {
|
|
console.log(`${RED}${BOLD}=== ${totalFail} PAYLOAD(S) DID NOT MATCH EXPECTED RESULT ===${RESET}`);
|
|
process.exit(1);
|
|
}
|