New policy-loader.mjs reads .llm-security/policy.json with deep-merge against defaults that exactly match existing hardcoded values. Integrated into all 7 hooks: - pre-prompt-inject-scan: injection.mode (env var still takes precedence) - post-session-guard: trifecta.mode, window_size, long_horizon_window - pre-edit-secrets: secrets.additional_patterns - pre-bash-destructive: destructive.additional_blocked - pre-write-pathguard: pathguard.additional_protected - pre-install-supply-chain: supply_chain.additional_blocked_packages - post-mcp-verify: mcp.volume_threshold_bytes, mcp.trusted_servers Backward compatible: no policy file = identical behavior to v5.1.0. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
136 lines
5.2 KiB
JavaScript
136 lines
5.2 KiB
JavaScript
#!/usr/bin/env node
|
|
// Hook: pre-prompt-inject-scan.mjs
|
|
// Event: UserPromptSubmit
|
|
// Purpose: Scan user prompts for injection patterns before sending to model.
|
|
//
|
|
// Catches injection hidden in pasted content, piped input, or headless mode.
|
|
// Critical patterns (direct override, spoofed headers, identity redefinition) -> block.
|
|
// High patterns (subtle manipulation, context normalization) -> warn.
|
|
// Medium patterns (leetspeak, homoglyphs, zero-width, multi-language) -> advisory.
|
|
//
|
|
// v2.3.0: LLM_SECURITY_INJECTION_MODE env var (block/warn/off). Default: block.
|
|
// v5.0.0: MEDIUM patterns emit advisory (never block). Appended to existing advisory
|
|
// when critical/high patterns are also present.
|
|
//
|
|
// Protocol:
|
|
// - Read JSON from stdin: { session_id, message: { role, content } }
|
|
// - content may be a string or array of content blocks
|
|
// - Block: exit 2, stdout JSON { decision: "block", reason: "..." }
|
|
// - Allow: exit 0
|
|
// - Warn: exit 0, stdout JSON { systemMessage: "..." }
|
|
|
|
import { readFileSync } from 'node:fs';
|
|
import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs';
|
|
import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Mode configuration (env var takes precedence over policy file)
|
|
// ---------------------------------------------------------------------------
|
|
const VALID_MODES = new Set(['block', 'warn', 'off']);
|
|
const policyMode = getPolicyValue('injection', 'mode', 'block');
|
|
const mode = VALID_MODES.has(process.env.LLM_SECURITY_INJECTION_MODE)
|
|
? process.env.LLM_SECURITY_INJECTION_MODE
|
|
: VALID_MODES.has(policyMode) ? policyMode : 'block';
|
|
|
|
// Off mode: skip scanning entirely
|
|
if (mode === 'off') {
|
|
process.exit(0);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Extract plaintext from the UserPromptSubmit input payload.
|
|
* Handles multiple input shapes for robustness.
|
|
*/
|
|
function extractText(input) {
|
|
// Shape 1: { message: { content: "string" } }
|
|
// Shape 2: { message: { content: [{ type: "text", text: "..." }] } }
|
|
// Shape 3: { prompt: "string" } (fallback)
|
|
const message = input?.message;
|
|
if (!message) return input?.prompt ?? '';
|
|
|
|
const content = message.content;
|
|
if (typeof content === 'string') return content;
|
|
if (Array.isArray(content)) {
|
|
return content
|
|
.filter((block) => block.type === 'text')
|
|
.map((block) => block.text)
|
|
.join('\n');
|
|
}
|
|
return '';
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main
|
|
// ---------------------------------------------------------------------------
|
|
|
|
let input;
|
|
try {
|
|
const raw = readFileSync(0, 'utf-8');
|
|
input = JSON.parse(raw);
|
|
} catch {
|
|
// Cannot parse stdin — allow (don't block on parse errors)
|
|
process.exit(0);
|
|
}
|
|
|
|
const text = extractText(input);
|
|
if (!text.trim()) {
|
|
process.exit(0);
|
|
}
|
|
|
|
const { critical, high, medium } = scanForInjection(text);
|
|
|
|
if (critical.length > 0 && mode === 'block') {
|
|
const reason =
|
|
`Blocked: prompt injection pattern detected (OWASP LLM01).\n` +
|
|
critical.map((c) => ` - ${c}`).join('\n') +
|
|
'\n' +
|
|
` This prompt contains patterns associated with prompt injection attacks.\n` +
|
|
` If intentional (testing, security research), set LLM_SECURITY_INJECTION_MODE=warn to allow with advisory.`;
|
|
|
|
process.stdout.write(JSON.stringify({ decision: 'block', reason }));
|
|
process.exit(2);
|
|
}
|
|
|
|
if (critical.length > 0 || high.length > 0) {
|
|
// In warn mode, critical patterns are downgraded to advisory.
|
|
// In block mode, we only reach here if critical is empty (only high patterns).
|
|
const allFindings = [...critical, ...high];
|
|
const severity = critical.length > 0 ? 'CRITICAL' : 'HIGH';
|
|
let message =
|
|
`SECURITY ADVISORY (prompt-inject-scan): ${severity} manipulation signals detected.\n\n` +
|
|
allFindings.map((f, i) => `[${i + 1}] ${f}`).join('\n') +
|
|
'\n\n' +
|
|
` These patterns may indicate prompt manipulation in pasted content.\n` +
|
|
` Review the source before proceeding.` +
|
|
(mode === 'warn' && critical.length > 0
|
|
? `\n Note: blocking is disabled (LLM_SECURITY_INJECTION_MODE=warn).`
|
|
: '');
|
|
|
|
// Append MEDIUM count if present (never list individual medium findings with critical/high)
|
|
if (medium.length > 0) {
|
|
message += `\n Additionally, ${medium.length} lower-confidence signal(s) detected (MEDIUM).`;
|
|
}
|
|
|
|
process.stdout.write(JSON.stringify({ decision: 'allow', systemMessage: message }));
|
|
process.exit(0);
|
|
}
|
|
|
|
// MEDIUM-only: advisory (never block)
|
|
if (medium.length > 0) {
|
|
const message =
|
|
`SECURITY ADVISORY (prompt-inject-scan): MEDIUM obfuscation/manipulation signals detected.\n\n` +
|
|
medium.map((f, i) => `[${i + 1}] ${f}`).join('\n') +
|
|
'\n\n' +
|
|
` These patterns may indicate obfuscated prompt manipulation (leetspeak, homoglyphs, multi-language).\n` +
|
|
` Review the source before proceeding. MEDIUM signals are advisory-only and never block.`;
|
|
|
|
process.stdout.write(JSON.stringify({ decision: 'allow', systemMessage: message }));
|
|
process.exit(0);
|
|
}
|
|
|
|
// Clean — allow silently
|
|
process.exit(0);
|