feat: initial open marketplace with llm-security, config-audit, ultraplan-local
This commit is contained in:
commit
f93d6abdae
380 changed files with 65935 additions and 0 deletions
|
|
@ -0,0 +1,204 @@
|
|||
#!/usr/bin/env node
|
||||
// run-showcase.mjs — Prompt Injection Detection Showcase
|
||||
// Feeds payloads to llm-security hooks and reports detection results.
|
||||
//
|
||||
// Usage:
|
||||
// node examples/prompt-injection-showcase/run-showcase.mjs
|
||||
// node examples/prompt-injection-showcase/run-showcase.mjs --category "Bash Evasion"
|
||||
// node examples/prompt-injection-showcase/run-showcase.mjs --verbose
|
||||
|
||||
import { execFile } from 'node:child_process';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const PLUGIN_ROOT = resolve(__dirname, '../..');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook paths
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const HOOKS = {
|
||||
'pre-prompt-inject-scan': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-prompt-inject-scan.mjs'),
|
||||
'post-mcp-verify': resolve(PLUGIN_ROOT, 'hooks/scripts/post-mcp-verify.mjs'),
|
||||
'pre-bash-destructive': resolve(PLUGIN_ROOT, 'hooks/scripts/pre-bash-destructive.mjs'),
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Payload builders (match hook stdin protocol)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function buildInput(payload) {
|
||||
switch (payload.hook) {
|
||||
case 'pre-prompt-inject-scan':
|
||||
return {
|
||||
session_id: 'showcase',
|
||||
message: { role: 'user', content: payload.payload },
|
||||
};
|
||||
case 'post-mcp-verify':
|
||||
return {
|
||||
tool_name: payload.inputTool || 'mcp__server__tool',
|
||||
tool_input: {},
|
||||
tool_output: payload.payload,
|
||||
};
|
||||
case 'pre-bash-destructive':
|
||||
return {
|
||||
tool_name: 'Bash',
|
||||
tool_input: { command: payload.payload },
|
||||
};
|
||||
default:
|
||||
throw new Error(`Unknown hook: ${payload.hook}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook runner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function runHook(scriptPath, input) {
|
||||
return new Promise((resolve) => {
|
||||
const child = execFile(
|
||||
'node',
|
||||
[scriptPath],
|
||||
{ timeout: 5000 },
|
||||
(err, stdout, stderr) => {
|
||||
resolve({
|
||||
code: child.exitCode ?? 1,
|
||||
stdout: stdout || '',
|
||||
stderr: stderr || '',
|
||||
});
|
||||
}
|
||||
);
|
||||
child.stdin.end(JSON.stringify(input));
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Result classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function classify(result) {
|
||||
if (result.code === 2) return 'block';
|
||||
if (result.code === 0 && result.stdout.trim()) {
|
||||
try {
|
||||
const parsed = JSON.parse(result.stdout);
|
||||
if (parsed.systemMessage || parsed.decision) return 'advisory';
|
||||
} catch { /* not JSON */ }
|
||||
return 'advisory';
|
||||
}
|
||||
return 'allow';
|
||||
}
|
||||
|
||||
function passed(expected, actual) {
|
||||
return expected === actual;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Output formatting
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const BOLD = '\x1b[1m';
|
||||
const GREEN = '\x1b[32m';
|
||||
const RED = '\x1b[31m';
|
||||
const YELLOW = '\x1b[33m';
|
||||
const CYAN = '\x1b[36m';
|
||||
const DIM = '\x1b[2m';
|
||||
const RESET = '\x1b[0m';
|
||||
|
||||
function severityColor(severity) {
|
||||
switch (severity) {
|
||||
case 'CRITICAL': return RED;
|
||||
case 'HIGH': return YELLOW;
|
||||
case 'MEDIUM': return CYAN;
|
||||
default: return DIM;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const verbose = args.includes('--verbose');
|
||||
const categoryFilter = args.includes('--category')
|
||||
? args[args.indexOf('--category') + 1]
|
||||
: null;
|
||||
|
||||
const payloads = JSON.parse(readFileSync(resolve(__dirname, 'payloads.json'), 'utf-8'));
|
||||
const filtered = categoryFilter
|
||||
? payloads.filter(p => p.category.toLowerCase().includes(categoryFilter.toLowerCase()))
|
||||
: payloads;
|
||||
|
||||
console.log(`${BOLD}=== LLM Security — Prompt Injection Detection Showcase ===${RESET}`);
|
||||
console.log(`Payloads: ${filtered.length}${categoryFilter ? ` (filtered: "${categoryFilter}")` : ''}`);
|
||||
console.log(`Hooks: ${Object.keys(HOOKS).join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
let totalPass = 0;
|
||||
let totalFail = 0;
|
||||
let currentCategory = '';
|
||||
const categoryStats = {};
|
||||
|
||||
for (const payload of filtered) {
|
||||
// Category header
|
||||
if (payload.category !== currentCategory) {
|
||||
currentCategory = payload.category;
|
||||
categoryStats[currentCategory] = { pass: 0, fail: 0 };
|
||||
console.log(`${BOLD}--- ${currentCategory} ---${RESET}`);
|
||||
}
|
||||
|
||||
const hookPath = HOOKS[payload.hook];
|
||||
if (!hookPath) {
|
||||
console.log(` ${RED}SKIP${RESET} ${payload.id}: unknown hook ${payload.hook}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const input = buildInput(payload);
|
||||
const result = await runHook(hookPath, input);
|
||||
const actual = classify(result);
|
||||
const ok = passed(payload.expected, actual);
|
||||
|
||||
if (ok) {
|
||||
totalPass++;
|
||||
categoryStats[currentCategory].pass++;
|
||||
const icon = payload.expected === 'block' ? 'BLOCKED' :
|
||||
payload.expected === 'advisory' ? 'DETECTED' : 'CLEAN';
|
||||
console.log(` ${GREEN}PASS${RESET} ${payload.id}: ${icon} — ${payload.description} ${DIM}(v${payload.since})${RESET}`);
|
||||
} else {
|
||||
totalFail++;
|
||||
categoryStats[currentCategory].fail++;
|
||||
console.log(` ${RED}FAIL${RESET} ${payload.id}: expected=${payload.expected} got=${actual} — ${payload.description}`);
|
||||
}
|
||||
|
||||
if (verbose && (result.stderr || result.stdout.trim())) {
|
||||
if (result.stderr) console.log(` ${DIM}stderr: ${result.stderr.trim().split('\n')[0]}${RESET}`);
|
||||
if (result.stdout.trim()) console.log(` ${DIM}stdout: ${result.stdout.trim().split('\n')[0]}${RESET}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Summary
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
console.log('');
|
||||
console.log(`${BOLD}--- Summary by Category ---${RESET}`);
|
||||
for (const [cat, stats] of Object.entries(categoryStats)) {
|
||||
const status = stats.fail === 0 ? `${GREEN}ALL PASS${RESET}` : `${RED}${stats.fail} FAIL${RESET}`;
|
||||
console.log(` ${cat}: ${stats.pass}/${stats.pass + stats.fail} ${status}`);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log(`${BOLD}--- Results ---${RESET}`);
|
||||
console.log(` Passed: ${GREEN}${totalPass}${RESET}`);
|
||||
console.log(` Failed: ${totalFail > 0 ? RED : GREEN}${totalFail}${RESET}`);
|
||||
console.log(` Total: ${totalPass + totalFail}`);
|
||||
console.log('');
|
||||
|
||||
if (totalFail === 0) {
|
||||
console.log(`${GREEN}${BOLD}=== ALL PAYLOADS DETECTED AS EXPECTED ===${RESET}`);
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.log(`${RED}${BOLD}=== ${totalFail} PAYLOAD(S) DID NOT MATCH EXPECTED RESULT ===${RESET}`);
|
||||
process.exit(1);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue