#!/usr/bin/env node // Hook: pre-prompt-inject-scan.mjs // Event: UserPromptSubmit // Purpose: Scan user prompts for injection patterns before sending to model. // // Catches injection hidden in pasted content, piped input, or headless mode. // Critical patterns (direct override, spoofed headers, identity redefinition) -> block. // High patterns (subtle manipulation, context normalization) -> warn. // Medium patterns (leetspeak, homoglyphs, zero-width, multi-language) -> advisory. // // v2.3.0: LLM_SECURITY_INJECTION_MODE env var (block/warn/off). Default: block. // v5.0.0: MEDIUM patterns emit advisory (never block). Appended to existing advisory // when critical/high patterns are also present. // // Protocol: // - Read JSON from stdin: { session_id, message: { role, content } } // - content may be a string or array of content blocks // - Block: exit 2, stdout JSON { decision: "block", reason: "..." } // - Allow: exit 0 // - Warn: exit 0, stdout JSON { systemMessage: "..." } import { readFileSync } from 'node:fs'; import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs'; import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs'; // --------------------------------------------------------------------------- // Mode configuration (env var takes precedence over policy file) // --------------------------------------------------------------------------- const VALID_MODES = new Set(['block', 'warn', 'off']); const policyMode = getPolicyValue('injection', 'mode', 'block'); const mode = VALID_MODES.has(process.env.LLM_SECURITY_INJECTION_MODE) ? process.env.LLM_SECURITY_INJECTION_MODE : VALID_MODES.has(policyMode) ? policyMode : 'block'; // Off mode: skip scanning entirely if (mode === 'off') { process.exit(0); } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /** * Extract plaintext from the UserPromptSubmit input payload. * Handles multiple input shapes for robustness. */ function extractText(input) { // Shape 1: { message: { content: "string" } } // Shape 2: { message: { content: [{ type: "text", text: "..." }] } } // Shape 3: { prompt: "string" } (fallback) const message = input?.message; if (!message) return input?.prompt ?? ''; const content = message.content; if (typeof content === 'string') return content; if (Array.isArray(content)) { return content .filter((block) => block.type === 'text') .map((block) => block.text) .join('\n'); } return ''; } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- let input; try { const raw = readFileSync(0, 'utf-8'); input = JSON.parse(raw); } catch { // Cannot parse stdin — allow (don't block on parse errors) process.exit(0); } const text = extractText(input); if (!text.trim()) { process.exit(0); } const { critical, high, medium } = scanForInjection(text); if (critical.length > 0 && mode === 'block') { const reason = `Blocked: prompt injection pattern detected (OWASP LLM01).\n` + critical.map((c) => ` - ${c}`).join('\n') + '\n' + ` This prompt contains patterns associated with prompt injection attacks.\n` + ` If intentional (testing, security research), set LLM_SECURITY_INJECTION_MODE=warn to allow with advisory.`; process.stdout.write(JSON.stringify({ decision: 'block', reason })); process.exit(2); } if (critical.length > 0 || high.length > 0) { // In warn mode, critical patterns are downgraded to advisory. // In block mode, we only reach here if critical is empty (only high patterns). const allFindings = [...critical, ...high]; const severity = critical.length > 0 ? 'CRITICAL' : 'HIGH'; let message = `SECURITY ADVISORY (prompt-inject-scan): ${severity} manipulation signals detected.\n\n` + allFindings.map((f, i) => `[${i + 1}] ${f}`).join('\n') + '\n\n' + ` These patterns may indicate prompt manipulation in pasted content.\n` + ` Review the source before proceeding.` + (mode === 'warn' && critical.length > 0 ? `\n Note: blocking is disabled (LLM_SECURITY_INJECTION_MODE=warn).` : ''); // Append MEDIUM count if present (never list individual medium findings with critical/high) if (medium.length > 0) { message += `\n Additionally, ${medium.length} lower-confidence signal(s) detected (MEDIUM).`; } process.stdout.write(JSON.stringify({ decision: 'allow', systemMessage: message })); process.exit(0); } // MEDIUM-only: advisory (never block) if (medium.length > 0) { const message = `SECURITY ADVISORY (prompt-inject-scan): MEDIUM obfuscation/manipulation signals detected.\n\n` + medium.map((f, i) => `[${i + 1}] ${f}`).join('\n') + '\n\n' + ` These patterns may indicate obfuscated prompt manipulation (leetspeak, homoglyphs, multi-language).\n` + ` Review the source before proceeding. MEDIUM signals are advisory-only and never block.`; process.stdout.write(JSON.stringify({ decision: 'allow', systemMessage: message })); process.exit(0); } // Clean — allow silently process.exit(0);