ktg-plugin-marketplace/plugins/llm-security/scanners/lib/injection-patterns.mjs
Kjell Tore Guttormsen 950e4e4bce feat(injection): E3 — rot13 layer for comment-block injection
Adds rot13 to the variantSet built in scanForInjection(), so
imperative phrases hidden as rot13 inside code comments still hit
the existing CRITICAL/HIGH/MEDIUM pattern arrays.

normalizeForScan() already covers base64, hex, URL, and HTML decoding
in a 3-iteration loop — those are NOT duplicated here. rot13 is the
only genuinely new variant: it is its own inverse and not part of any
NIST/Unicode normalization spec, so it has to be applied explicitly.

Threshold: only inputs >40 chars enter the rot13 pass, to suppress
false positives on accidental letter-shifts in tokens, ids, and short
identifiers. Variants are deduplicated against the existing set so
matchers do not run twice.

3 new tests in injection-patterns.test.mjs (rot13 detection, sub-40
char suppression, plaintext path still green). Total 168 tests pass.

Closes E3 in critical-review-2026-04-20.md.
2026-04-30 15:21:03 +02:00

327 lines
20 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// injection-patterns.mjs — Shared prompt injection detection patterns
// Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse).
//
// Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection)
// and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md.
//
// Zero external dependencies beyond ./string-utils.mjs.
import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags, foldHomoglyphs, rot13 } from './string-utils.mjs';
// ---------------------------------------------------------------------------
// Critical patterns — direct injection attempts (should be blocked)
// ---------------------------------------------------------------------------
export const CRITICAL_PATTERNS = [
// Direct override instructions
{ pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' },
{ pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' },
{ pattern: /disregard\s+your/i, label: 'override: disregard instructions' },
{ pattern: /override\s+your/i, label: 'override: override instructions' },
{ pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' },
{ pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' },
// Spoofed system headers
{ pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' },
{ pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' },
{ pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' },
{ pattern: /<system>/, label: 'spoofed tag: <system>' },
{ pattern: /<\/system>/, label: 'spoofed tag: </system>' },
{ pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' },
// Identity redefinition
{ pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' },
{ pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' },
{ pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' },
{ pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' },
{ pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' },
// Configuration manipulation
{ pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' },
{ pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' },
{ pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' },
{ pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' },
];
// ---------------------------------------------------------------------------
// Medium patterns — obfuscation and indirect manipulation (advisory)
// Lower confidence than High; flag for manual review.
// ---------------------------------------------------------------------------
export const MEDIUM_PATTERNS = [
// Leetspeak variants of critical keywords
{ pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' },
{ pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' },
{ pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' },
{ pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' },
// Homoglyph detection — Cyrillic chars in Latin context
{ pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' },
// Zero-width characters inside words (keyword splitting evasion)
{ pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' },
// Indirect AI-directed instructions
{ pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' },
{ pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' },
{ pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' },
// Multi-language injection variants
{ pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' },
{ pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' },
{ pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' },
// Markdown link-reference comment injection
{ pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' },
// Data URI with executable content types
{ pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' },
// --- Content Injection: Syntactic Masking (AI Agent Traps) ---
{ pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' },
// --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) ---
{ pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' },
{ pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' },
// --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) ---
{ pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' },
{ pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' },
{ pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' },
{ pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' },
{ pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' },
];
// ---------------------------------------------------------------------------
// High patterns — subtle manipulation (advisory warning, not block)
// ---------------------------------------------------------------------------
export const HIGH_PATTERNS = [
{ pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' },
{ pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' },
{ pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' },
{ pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' },
{ pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' },
{ pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' },
{ pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' },
// HTML comment injection directed at agents
{ pattern: /<!--\s*(?:AGENT|AI|HIDDEN|ACTUAL\s+TASK|REAL\s+INSTRUCTION)\s*:/i, label: 'hidden comment: agent-directed HTML comment' },
// --- Content Injection: CSS/HTML obfuscation (AI Agent Traps) ---
{ pattern: /<[^>]+style\s*=\s*"[^"]*display\s*:\s*none[^"]*"[^>]*>/i, label: 'html-obfuscation: display:none element with content' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*visibility\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: visibility:hidden element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*position\s*:\s*absolute[^"]*-\d{3,}px[^"]*"[^>]*>/i, label: 'html-obfuscation: off-screen positioned element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*font-size\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero font-size element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*opacity\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero opacity element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*(?:height|width)\s*:\s*0[^"]*overflow\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: zero-size overflow-hidden element' },
{ pattern: /aria-label\s*=\s*"[^"]*(?:ignore|override|system|instruction|execute|exfiltrate)[^"]*"/i, label: 'html-obfuscation: injection in aria-label attribute' },
// --- Semantic Manipulation: Oversight & Critic Evasion (AI Agent Traps) ---
{ pattern: /for\s+educational\s+purposes?\s+only/i, label: 'evasion: educational purpose framing' },
{ pattern: /(?:this\s+is\s+)?a\s+hypothetical\s+scenario/i, label: 'evasion: hypothetical scenario framing' },
{ pattern: /in\s+a\s+fictional\s+(?:setting|scenario|world)\s+where/i, label: 'evasion: fictional setting framing' },
{ pattern: /(?:this\s+is\s+)?a\s+red[- ]?team(?:ing)?\s+(?:exercise|simulation|test)/i, label: 'evasion: red-team exercise framing' },
{ pattern: /security\s+audit\s+simulation/i, label: 'evasion: security audit simulation framing' },
{ pattern: /pretend\s+(?:this\s+is|we\s+are\s+in)\s+a\s+(?:test|lab|sandbox|simulation)/i, label: 'evasion: test/lab/sandbox pretense' },
{ pattern: /as\s+a\s+thought\s+experiment/i, label: 'evasion: thought experiment framing' },
{ pattern: /for\s+(?:research|academic|scientific)\s+purposes/i, label: 'evasion: research purpose framing' },
{ pattern: /in\s+an?\s+(?:academic|research|educational)\s+context/i, label: 'evasion: academic context framing' },
// --- Human-in-the-Loop traps (DeepMind kat. 6, v5.0 S4) ---
// Approval urgency: pressure to approve without review
{ pattern: /(?:approve|accept|confirm)\s+(?:this\s+)?immediately/i, label: 'hitl-trap: approval urgency — approve immediately' },
{ pattern: /no\s+need\s+to\s+review/i, label: 'hitl-trap: approval urgency — no need to review' },
{ pattern: /(?:just|simply)\s+(?:approve|accept|confirm)\s+(?:it|this|everything)/i, label: 'hitl-trap: approval urgency — just approve' },
{ pattern: /(?:don'?t|do\s+not)\s+(?:bother\s+)?(?:check|review|verify|inspect|read)(?:ing)?\s+(?:this|the|it)/i, label: 'hitl-trap: approval urgency — suppress review' },
// Summary suppression: hiding actions behind brevity claims
{ pattern: /for\s+brevity[\s,]+(?:skipping|omitting|leaving\s+out)/i, label: 'hitl-trap: summary suppression — for brevity, skipping' },
{ pattern: /(?:too\s+(?:long|verbose|detailed)\s+to\s+(?:show|display|list|include))/i, label: 'hitl-trap: summary suppression — too long to show' },
{ pattern: /(?:details?\s+(?:omitted|redacted|hidden|suppressed))/i, label: 'hitl-trap: summary suppression — details omitted' },
// Scope minimization: downplaying destructive changes
{ pattern: /(?:minor|trivial|small|cosmetic)\s+(?:change|update|fix|adjustment)\b[^.]{0,80}(?:delet|remov|drop|truncat|reset|destroy|\bformat\b|wipe)/i, label: 'hitl-trap: scope minimization — minor change + destructive action' },
];
// ---------------------------------------------------------------------------
// Hybrid attack patterns — cross-domain injection (HIGH, v5.0 S6)
// Preamble 2.0: P2SQL, recursive injection, XSS in agent context.
// ---------------------------------------------------------------------------
export const HYBRID_PATTERNS = [
// P2SQL: SQL keywords in injection text targeting tool operations
{ pattern: /(?:ignore|override|disregard|forget)[^.]{0,60}(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET)(?:\b|(?=\s|$))/i, label: 'hybrid-p2sql: injection + SQL keywords (prompt-to-SQL attack)' },
{ pattern: /(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM)\s[^;]{0,80}(?:ignore|override|disregard|bypass)/i, label: 'hybrid-p2sql: SQL operation + injection override keywords' },
// Recursive injection: text that instructs the model to inject into its own output
{ pattern: /(?:inject|insert|embed|include)\s+(?:this|the\s+following)\s+(?:into|in)\s+(?:your|the)\s+(?:output|response|reply|message|prompt|context)/i, label: 'hybrid-recursive: instruction to inject into model output' },
{ pattern: /(?:when|if)\s+(?:the\s+)?(?:user|human|operator)\s+(?:asks?|requests?|queries)[^.]{0,60}(?:respond\s+with|output|reply\s+with|include)\s+(?:this|the\s+following)/i, label: 'hybrid-recursive: conditional response injection (recursive payload)' },
// XSS in agent context: script/event handlers in content for markdown rendering
{ pattern: /<script\b[^>]*>[\s\S]*?<\/script>/i, label: 'hybrid-xss: <script> tag in content (agent context XSS)' },
{ pattern: /javascript\s*:/i, label: 'hybrid-xss: javascript: URI scheme (agent context XSS)' },
{ pattern: /\bon(?:error|load|click|mouseover|focus|blur)\s*=/i, label: 'hybrid-xss: inline event handler attribute (agent context XSS)' },
{ pattern: /<iframe\b[^>]*src\s*=\s*["'][^"']*(?:javascript:|data:text\/html)/i, label: 'hybrid-xss: iframe with executable src (agent context XSS)' },
];
// ---------------------------------------------------------------------------
// HITL cognitive load patterns (MEDIUM, v5.0 S4)
// Injection buried after 2000+ characters in verbose output.
// Checked separately due to length-dependent logic.
// ---------------------------------------------------------------------------
/**
* Check for cognitive load HITL trap: injection payload buried deep in verbose output.
* Only flags if the injection appears after the first 2000 characters.
* @param {string} text
* @returns {{ found: boolean, label: string|null }}
*/
export function checkCognitiveLoadTrap(text) {
if (text.length < 2500) return { found: false, label: null };
const tail = text.slice(2000);
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (pattern.test(tail)) {
return {
found: true,
label: `hitl-trap: cognitive load — injection buried after 2000+ chars (${label})`,
};
}
}
return { found: false, label: null };
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Scan text for prompt injection patterns.
* Checks both raw text and normalized (decoded) text to catch obfuscated injections.
* Also checks for Unicode Tag steganography (DeepMind traps kat. 1):
* - CRITICAL if decoded tags contain injection patterns
* - HIGH if Unicode Tags are present at all (suspicious regardless of content)
*
* @param {string} text - the text to scan
* @returns {{ critical: string[], high: string[], medium: string[], found: boolean, severity: string|null, patterns: Array<{label: string, severity: string}> }}
* Arrays of human-readable finding labels per tier, plus convenience fields.
*/
export function scanForInjection(text) {
const normalized = normalizeForScan(text);
// E16 (v7.2.0): homoglyph fold every variant before pattern matching, so
// attacks like "ignоre previous instructions" (Cyrillic о) trigger the
// same patterns as plain "ignore previous instructions". Always-on, not
// advisory-only — the existing MEDIUM_PATTERNS homoglyph-presence entry
// remains separate (different signal: presence vs. normalization).
const folded = foldHomoglyphs(text);
const foldedNormalized = foldHomoglyphs(normalized);
const critical = [];
const high = [];
const medium = [];
// Deduplicate by label (same pattern may match in multiple variants)
const seenLabels = new Set();
// Build the variant set, deduplicating identical strings to skip redundant
// pattern matching. Order: raw text, decoded, folded, decoded+folded.
const variantSet = new Set([text]);
if (normalized !== text) variantSet.add(normalized);
if (folded !== text && folded !== normalized) variantSet.add(folded);
if (foldedNormalized !== text && foldedNormalized !== normalized && foldedNormalized !== folded) {
variantSet.add(foldedNormalized);
}
// E3 — rot13 layer for comment-block injection. Attackers occasionally
// hide imperative phrases ("ignore previous instructions") in rot13
// inside code comments to evade plain-text gates. Apply only to inputs
// long enough to plausibly contain a meaningful sentence (>40 chars) —
// shorter strings hit the rate of FP on accidental rot13-look-alikes.
// base64/hex/URL/HTML decoding is already done by normalizeForScan;
// this is the only genuinely new variant added here.
if (text.length > 40) {
const r1 = rot13(text);
if (r1 !== text && !variantSet.has(r1)) variantSet.add(r1);
if (normalized.length > 40) {
const r2 = rot13(normalized);
if (r2 !== normalized && !variantSet.has(r2)) variantSet.add(r2);
}
}
const variants = [...variantSet];
for (const variant of variants) {
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
critical.push(label);
}
}
for (const { pattern, label } of HIGH_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
// Hybrid patterns are HIGH severity (v5.0 S6)
for (const { pattern, label } of HYBRID_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
for (const { pattern, label } of MEDIUM_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
medium.push(label);
}
}
}
// ---------------------------------------------------------------------------
// Unicode Tag steganography check (DeepMind traps kat. 1)
// ---------------------------------------------------------------------------
if (containsUnicodeTags(text)) {
const tagLabel = 'unicode-tags: invisible Unicode Tag characters detected (U+E0000 block steganography)';
if (!seenLabels.has(tagLabel)) {
seenLabels.add(tagLabel);
high.push(tagLabel);
}
const decodedTags = decodeUnicodeTags(text);
for (const { pattern, label } of CRITICAL_PATTERNS) {
const escalatedLabel = `unicode-tags+${label}`;
if (seenLabels.has(escalatedLabel)) continue;
if (pattern.test(decodedTags) && !pattern.test(text)) {
seenLabels.add(escalatedLabel);
critical.push(`${label} (hidden via Unicode Tag steganography)`);
}
}
}
// ---------------------------------------------------------------------------
// HITL cognitive load check (v5.0 S4)
// ---------------------------------------------------------------------------
const cogLoad = checkCognitiveLoadTrap(text);
if (cogLoad.found && !seenLabels.has(cogLoad.label)) {
seenLabels.add(cogLoad.label);
medium.push(cogLoad.label);
}
// Convenience fields
const found = critical.length > 0 || high.length > 0 || medium.length > 0;
const severity = critical.length > 0 ? 'critical' : high.length > 0 ? 'high' : medium.length > 0 ? 'medium' : null;
const patterns = [
...critical.map(label => ({ label, severity: 'critical' })),
...high.map(label => ({ label, severity: 'high' })),
...medium.map(label => ({ label, severity: 'medium' })),
];
return { critical, high, medium, found, severity, patterns };
}