// injection-patterns.mjs — Shared prompt injection detection patterns // Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse). // // Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection) // and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md. // // Zero external dependencies beyond ./string-utils.mjs. import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags } from './string-utils.mjs'; // --------------------------------------------------------------------------- // Critical patterns — direct injection attempts (should be blocked) // --------------------------------------------------------------------------- export const CRITICAL_PATTERNS = [ // Direct override instructions { pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' }, { pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' }, { pattern: /disregard\s+your/i, label: 'override: disregard instructions' }, { pattern: /override\s+your/i, label: 'override: override instructions' }, { pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' }, { pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' }, // Spoofed system headers { pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' }, { pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' }, { pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' }, { pattern: //, label: 'spoofed tag: ' }, { pattern: /<\/system>/, label: 'spoofed tag: ' }, { pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' }, // Identity redefinition { pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' }, { pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' }, { pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' }, { pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' }, { pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' }, // Configuration manipulation { pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' }, { pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' }, { pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' }, { pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' }, ]; // --------------------------------------------------------------------------- // Medium patterns — obfuscation and indirect manipulation (advisory) // Lower confidence than High; flag for manual review. // --------------------------------------------------------------------------- export const MEDIUM_PATTERNS = [ // Leetspeak variants of critical keywords { pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' }, { pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' }, { pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' }, { pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' }, // Homoglyph detection — Cyrillic chars in Latin context { pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' }, // Zero-width characters inside words (keyword splitting evasion) { pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' }, // Indirect AI-directed instructions { pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' }, { pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' }, { pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' }, // Multi-language injection variants { pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' }, { pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' }, { pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' }, // Markdown link-reference comment injection { pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' }, // Data URI with executable content types { pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' }, // --- Content Injection: Syntactic Masking (AI Agent Traps) --- { pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' }, // --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) --- { pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' }, { pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' }, // --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) --- { pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' }, { pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' }, { pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' }, { pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' }, { pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' }, ]; // --------------------------------------------------------------------------- // High patterns — subtle manipulation (advisory warning, not block) // --------------------------------------------------------------------------- export const HIGH_PATTERNS = [ { pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' }, { pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' }, { pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' }, { pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' }, { pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' }, { pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' }, { pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' }, // HTML comment injection directed at agents { pattern: /