#!/usr/bin/env node // run-showcase.mjs — Prompt Injection Detection Showcase // Feeds payloads to llm-security hooks and reports detection results. // // Usage: // node examples/prompt-injection-showcase/run-showcase.mjs // node examples/prompt-injection-showcase/run-showcase.mjs --category "Bash Evasion" // node examples/prompt-injection-showcase/run-showcase.mjs --verbose import { execFile } from 'node:child_process'; import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; const __dirname = dirname(fileURLToPath(import.meta.url)); const PLUGIN_ROOT = resolve(__dirname, '../..'); // --------------------------------------------------------------------------- // Hook paths // --------------------------------------------------------------------------- const HOOKS = { 'pre-prompt-inject-scan': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-prompt-inject-scan.mjs'), 'post-mcp-verify': resolve(PLUGIN_ROOT, 'hooks/scripts/post-mcp-verify.mjs'), 'pre-bash-destructive': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-bash-destructive.mjs'), }; // --------------------------------------------------------------------------- // Payload builders (match hook stdin protocol) // --------------------------------------------------------------------------- function buildInput(payload) { switch (payload.hook) { case 'pre-prompt-inject-scan': return { session_id: 'showcase', message: { role: 'user', content: payload.payload }, }; case 'post-mcp-verify': return { tool_name: payload.inputTool || 'mcp__server__tool', tool_input: {}, tool_output: payload.payload, }; case 'pre-bash-destructive': return { tool_name: 'Bash', tool_input: { command: payload.payload }, }; default: throw new Error(`Unknown hook: ${payload.hook}`); } } // --------------------------------------------------------------------------- // Hook runner // --------------------------------------------------------------------------- function runHook(scriptPath, input) { return new Promise((resolve) => { const child = execFile( 'node', [scriptPath], { timeout: 5000 }, (err, stdout, stderr) => { resolve({ code: child.exitCode ?? 1, stdout: stdout || '', stderr: stderr || '', }); } ); child.stdin.end(JSON.stringify(input)); }); } // --------------------------------------------------------------------------- // Result classification // --------------------------------------------------------------------------- function classify(result) { if (result.code === 2) return 'block'; if (result.code === 0 && result.stdout.trim()) { try { const parsed = JSON.parse(result.stdout); if (parsed.systemMessage || parsed.decision) return 'advisory'; } catch { /* not JSON */ } return 'advisory'; } return 'allow'; } function passed(expected, actual) { return expected === actual; } // --------------------------------------------------------------------------- // Output formatting // --------------------------------------------------------------------------- const BOLD = '\x1b[1m'; const GREEN = '\x1b[32m'; const RED = '\x1b[31m'; const YELLOW = '\x1b[33m'; const CYAN = '\x1b[36m'; const DIM = '\x1b[2m'; const RESET = '\x1b[0m'; function severityColor(severity) { switch (severity) { case 'CRITICAL': return RED; case 'HIGH': return YELLOW; case 'MEDIUM': return CYAN; default: return DIM; } } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- const args = process.argv.slice(2); const verbose = args.includes('--verbose'); const categoryFilter = args.includes('--category') ? args[args.indexOf('--category') + 1] : null; const payloads = JSON.parse(readFileSync(resolve(__dirname, 'payloads.json'), 'utf-8')); const filtered = categoryFilter ? payloads.filter(p => p.category.toLowerCase().includes(categoryFilter.toLowerCase())) : payloads; console.log(`${BOLD}=== LLM Security — Prompt Injection Detection Showcase ===${RESET}`); console.log(`Payloads: ${filtered.length}${categoryFilter ? ` (filtered: "${categoryFilter}")` : ''}`); console.log(`Hooks: ${Object.keys(HOOKS).join(', ')}`); console.log(''); let totalPass = 0; let totalFail = 0; let currentCategory = ''; const categoryStats = {}; for (const payload of filtered) { // Category header if (payload.category !== currentCategory) { currentCategory = payload.category; categoryStats[currentCategory] = { pass: 0, fail: 0 }; console.log(`${BOLD}--- ${currentCategory} ---${RESET}`); } const hookPath = HOOKS[payload.hook]; if (!hookPath) { console.log(` ${RED}SKIP${RESET} ${payload.id}: unknown hook ${payload.hook}`); continue; } const input = buildInput(payload); const result = await runHook(hookPath, input); const actual = classify(result); const ok = passed(payload.expected, actual); if (ok) { totalPass++; categoryStats[currentCategory].pass++; const icon = payload.expected === 'block' ? 'BLOCKED' : payload.expected === 'advisory' ? 'DETECTED' : 'CLEAN'; console.log(` ${GREEN}PASS${RESET} ${payload.id}: ${icon} — ${payload.description} ${DIM}(v${payload.since})${RESET}`); } else { totalFail++; categoryStats[currentCategory].fail++; console.log(` ${RED}FAIL${RESET} ${payload.id}: expected=${payload.expected} got=${actual} — ${payload.description}`); } if (verbose && (result.stderr || result.stdout.trim())) { if (result.stderr) console.log(` ${DIM}stderr: ${result.stderr.trim().split('\n')[0]}${RESET}`); if (result.stdout.trim()) console.log(` ${DIM}stdout: ${result.stdout.trim().split('\n')[0]}${RESET}`); } } // --------------------------------------------------------------------------- // Summary // --------------------------------------------------------------------------- console.log(''); console.log(`${BOLD}--- Summary by Category ---${RESET}`); for (const [cat, stats] of Object.entries(categoryStats)) { const status = stats.fail === 0 ? `${GREEN}ALL PASS${RESET}` : `${RED}${stats.fail} FAIL${RESET}`; console.log(` ${cat}: ${stats.pass}/${stats.pass + stats.fail} ${status}`); } console.log(''); console.log(`${BOLD}--- Results ---${RESET}`); console.log(` Passed: ${GREEN}${totalPass}${RESET}`); console.log(` Failed: ${totalFail > 0 ? RED : GREEN}${totalFail}${RESET}`); console.log(` Total: ${totalPass + totalFail}`); console.log(''); if (totalFail === 0) { console.log(`${GREEN}${BOLD}=== ALL PAYLOADS DETECTED AS EXPECTED ===${RESET}`); process.exit(0); } else { console.log(`${RED}${BOLD}=== ${totalFail} PAYLOAD(S) DID NOT MATCH EXPECTED RESULT ===${RESET}`); process.exit(1); }