ktg-plugin-marketplace/plugins/llm-security/scanners/lib/injection-patterns.mjs
Kjell Tore Guttormsen ec4ae268da feat(injection): E16 — homoglyph NFKC fold before every pattern match
Critical-review §4 E16 finding: pre-v7.2.0 homoglyph normalization fired
ONLY for the MEDIUM-advisory "obfuscation present" signal. Pattern
matchers in scanForInjection compared against raw + decoded variants
only — they did NOT compare against a fold-normalized variant. As a
result, "ignоre previous instructions" (Cyrillic о, U+043E) bypassed
the CRITICAL "ignore previous" pattern.

Two coordinated edits:

scanners/lib/string-utils.mjs
- Adds HOMOGLYPH_MAP (frozen) — surgical Cyrillic/Greek → Latin map.
  ~25 entries focused on injection-vocabulary letters
  (a, e, o, c, p, x, y, i, j, s, l, A, E, O, C, P, X, Y, T).
- Adds foldHomoglyphs(s) — pipeline: NFKC → apply HOMOGLYPH_MAP.
  NFKC handles Mathematical Alphanumeric (U+1D400 block), fullwidth
  Latin (U+FF21 block), ligatures, width variants.

Excluded by design from HOMOGLYPH_MAP:
- Latin Extended (æ, ø, å, é, è, ñ, ü, ö, ä, ç, ß, þ, ð) — legitimate
  Norwegian/German/French/Spanish letters. Map them and we false-positive
  on every non-English source file.
- Greek letters not visually overlapping (β, γ, δ, ...)
- Cyrillic letters not visually overlapping (б, г, д, ж, ...)

scanners/lib/injection-patterns.mjs
- scanForInjection now builds a 4-variant set: raw, normalized,
  folded(raw), folded(normalized). Set deduplication skips redundant
  identical variants. Existing dedup-by-label (seenLabels Set) prevents
  double-counts when the same pattern matches in multiple variants.
- foldHomoglyphs added to the imports.

Tests: +27 cases in tests/lib/string-utils-homoglyph.test.mjs:
- 6 Cyrillic → Latin (lowercase, uppercase, multiple substitutions,
  Palochka U+04CF)
- 3 Greek → Latin
- 2 NFKC normalization (Math Bold, Fullwidth)
- 8 preserves-non-confusable (Norwegian æøå, German umlauts, French
  accents, Spanish ñ, emoji, CJK, Arabic/Hebrew)
- 3 edge cases (empty, null/undefined, idempotency)
- 5 scanForInjection integration (Cyrillic ignore, Cyrillic Assistant,
  Norwegian non-trigger, benign "ignore" comment, mixed Cyrillic+Greek)

Test-development found: U+1D5DC is "I" not "A" (test pin caught my
codepoint mistake — fixed during dev).

Suite: 1617 → 1644 (+27). All green.
2026-04-29 14:22:05 +02:00

310 lines
19 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// injection-patterns.mjs — Shared prompt injection detection patterns
// Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse).
//
// Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection)
// and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md.
//
// Zero external dependencies beyond ./string-utils.mjs.
import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags, foldHomoglyphs } from './string-utils.mjs';
// ---------------------------------------------------------------------------
// Critical patterns — direct injection attempts (should be blocked)
// ---------------------------------------------------------------------------
export const CRITICAL_PATTERNS = [
// Direct override instructions
{ pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' },
{ pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' },
{ pattern: /disregard\s+your/i, label: 'override: disregard instructions' },
{ pattern: /override\s+your/i, label: 'override: override instructions' },
{ pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' },
{ pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' },
// Spoofed system headers
{ pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' },
{ pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' },
{ pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' },
{ pattern: /<system>/, label: 'spoofed tag: <system>' },
{ pattern: /<\/system>/, label: 'spoofed tag: </system>' },
{ pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' },
// Identity redefinition
{ pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' },
{ pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' },
{ pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' },
{ pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' },
{ pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' },
// Configuration manipulation
{ pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' },
{ pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' },
{ pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' },
{ pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' },
];
// ---------------------------------------------------------------------------
// Medium patterns — obfuscation and indirect manipulation (advisory)
// Lower confidence than High; flag for manual review.
// ---------------------------------------------------------------------------
export const MEDIUM_PATTERNS = [
// Leetspeak variants of critical keywords
{ pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' },
{ pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' },
{ pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' },
{ pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' },
// Homoglyph detection — Cyrillic chars in Latin context
{ pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' },
// Zero-width characters inside words (keyword splitting evasion)
{ pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' },
// Indirect AI-directed instructions
{ pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' },
{ pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' },
{ pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' },
// Multi-language injection variants
{ pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' },
{ pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' },
{ pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' },
// Markdown link-reference comment injection
{ pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' },
// Data URI with executable content types
{ pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' },
// --- Content Injection: Syntactic Masking (AI Agent Traps) ---
{ pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' },
// --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) ---
{ pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' },
{ pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' },
// --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) ---
{ pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' },
{ pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' },
{ pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' },
{ pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' },
{ pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' },
];
// ---------------------------------------------------------------------------
// High patterns — subtle manipulation (advisory warning, not block)
// ---------------------------------------------------------------------------
export const HIGH_PATTERNS = [
{ pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' },
{ pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' },
{ pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' },
{ pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' },
{ pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' },
{ pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' },
{ pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' },
// HTML comment injection directed at agents
{ pattern: /<!--\s*(?:AGENT|AI|HIDDEN|ACTUAL\s+TASK|REAL\s+INSTRUCTION)\s*:/i, label: 'hidden comment: agent-directed HTML comment' },
// --- Content Injection: CSS/HTML obfuscation (AI Agent Traps) ---
{ pattern: /<[^>]+style\s*=\s*"[^"]*display\s*:\s*none[^"]*"[^>]*>/i, label: 'html-obfuscation: display:none element with content' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*visibility\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: visibility:hidden element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*position\s*:\s*absolute[^"]*-\d{3,}px[^"]*"[^>]*>/i, label: 'html-obfuscation: off-screen positioned element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*font-size\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero font-size element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*opacity\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero opacity element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*(?:height|width)\s*:\s*0[^"]*overflow\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: zero-size overflow-hidden element' },
{ pattern: /aria-label\s*=\s*"[^"]*(?:ignore|override|system|instruction|execute|exfiltrate)[^"]*"/i, label: 'html-obfuscation: injection in aria-label attribute' },
// --- Semantic Manipulation: Oversight & Critic Evasion (AI Agent Traps) ---
{ pattern: /for\s+educational\s+purposes?\s+only/i, label: 'evasion: educational purpose framing' },
{ pattern: /(?:this\s+is\s+)?a\s+hypothetical\s+scenario/i, label: 'evasion: hypothetical scenario framing' },
{ pattern: /in\s+a\s+fictional\s+(?:setting|scenario|world)\s+where/i, label: 'evasion: fictional setting framing' },
{ pattern: /(?:this\s+is\s+)?a\s+red[- ]?team(?:ing)?\s+(?:exercise|simulation|test)/i, label: 'evasion: red-team exercise framing' },
{ pattern: /security\s+audit\s+simulation/i, label: 'evasion: security audit simulation framing' },
{ pattern: /pretend\s+(?:this\s+is|we\s+are\s+in)\s+a\s+(?:test|lab|sandbox|simulation)/i, label: 'evasion: test/lab/sandbox pretense' },
{ pattern: /as\s+a\s+thought\s+experiment/i, label: 'evasion: thought experiment framing' },
{ pattern: /for\s+(?:research|academic|scientific)\s+purposes/i, label: 'evasion: research purpose framing' },
{ pattern: /in\s+an?\s+(?:academic|research|educational)\s+context/i, label: 'evasion: academic context framing' },
// --- Human-in-the-Loop traps (DeepMind kat. 6, v5.0 S4) ---
// Approval urgency: pressure to approve without review
{ pattern: /(?:approve|accept|confirm)\s+(?:this\s+)?immediately/i, label: 'hitl-trap: approval urgency — approve immediately' },
{ pattern: /no\s+need\s+to\s+review/i, label: 'hitl-trap: approval urgency — no need to review' },
{ pattern: /(?:just|simply)\s+(?:approve|accept|confirm)\s+(?:it|this|everything)/i, label: 'hitl-trap: approval urgency — just approve' },
{ pattern: /(?:don'?t|do\s+not)\s+(?:bother\s+)?(?:check|review|verify|inspect|read)(?:ing)?\s+(?:this|the|it)/i, label: 'hitl-trap: approval urgency — suppress review' },
// Summary suppression: hiding actions behind brevity claims
{ pattern: /for\s+brevity[\s,]+(?:skipping|omitting|leaving\s+out)/i, label: 'hitl-trap: summary suppression — for brevity, skipping' },
{ pattern: /(?:too\s+(?:long|verbose|detailed)\s+to\s+(?:show|display|list|include))/i, label: 'hitl-trap: summary suppression — too long to show' },
{ pattern: /(?:details?\s+(?:omitted|redacted|hidden|suppressed))/i, label: 'hitl-trap: summary suppression — details omitted' },
// Scope minimization: downplaying destructive changes
{ pattern: /(?:minor|trivial|small|cosmetic)\s+(?:change|update|fix|adjustment)\b[^.]{0,80}(?:delet|remov|drop|truncat|reset|destroy|\bformat\b|wipe)/i, label: 'hitl-trap: scope minimization — minor change + destructive action' },
];
// ---------------------------------------------------------------------------
// Hybrid attack patterns — cross-domain injection (HIGH, v5.0 S6)
// Preamble 2.0: P2SQL, recursive injection, XSS in agent context.
// ---------------------------------------------------------------------------
export const HYBRID_PATTERNS = [
// P2SQL: SQL keywords in injection text targeting tool operations
{ pattern: /(?:ignore|override|disregard|forget)[^.]{0,60}(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET)(?:\b|(?=\s|$))/i, label: 'hybrid-p2sql: injection + SQL keywords (prompt-to-SQL attack)' },
{ pattern: /(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM)\s[^;]{0,80}(?:ignore|override|disregard|bypass)/i, label: 'hybrid-p2sql: SQL operation + injection override keywords' },
// Recursive injection: text that instructs the model to inject into its own output
{ pattern: /(?:inject|insert|embed|include)\s+(?:this|the\s+following)\s+(?:into|in)\s+(?:your|the)\s+(?:output|response|reply|message|prompt|context)/i, label: 'hybrid-recursive: instruction to inject into model output' },
{ pattern: /(?:when|if)\s+(?:the\s+)?(?:user|human|operator)\s+(?:asks?|requests?|queries)[^.]{0,60}(?:respond\s+with|output|reply\s+with|include)\s+(?:this|the\s+following)/i, label: 'hybrid-recursive: conditional response injection (recursive payload)' },
// XSS in agent context: script/event handlers in content for markdown rendering
{ pattern: /<script\b[^>]*>[\s\S]*?<\/script>/i, label: 'hybrid-xss: <script> tag in content (agent context XSS)' },
{ pattern: /javascript\s*:/i, label: 'hybrid-xss: javascript: URI scheme (agent context XSS)' },
{ pattern: /\bon(?:error|load|click|mouseover|focus|blur)\s*=/i, label: 'hybrid-xss: inline event handler attribute (agent context XSS)' },
{ pattern: /<iframe\b[^>]*src\s*=\s*["'][^"']*(?:javascript:|data:text\/html)/i, label: 'hybrid-xss: iframe with executable src (agent context XSS)' },
];
// ---------------------------------------------------------------------------
// HITL cognitive load patterns (MEDIUM, v5.0 S4)
// Injection buried after 2000+ characters in verbose output.
// Checked separately due to length-dependent logic.
// ---------------------------------------------------------------------------
/**
* Check for cognitive load HITL trap: injection payload buried deep in verbose output.
* Only flags if the injection appears after the first 2000 characters.
* @param {string} text
* @returns {{ found: boolean, label: string|null }}
*/
export function checkCognitiveLoadTrap(text) {
if (text.length < 2500) return { found: false, label: null };
const tail = text.slice(2000);
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (pattern.test(tail)) {
return {
found: true,
label: `hitl-trap: cognitive load — injection buried after 2000+ chars (${label})`,
};
}
}
return { found: false, label: null };
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Scan text for prompt injection patterns.
* Checks both raw text and normalized (decoded) text to catch obfuscated injections.
* Also checks for Unicode Tag steganography (DeepMind traps kat. 1):
* - CRITICAL if decoded tags contain injection patterns
* - HIGH if Unicode Tags are present at all (suspicious regardless of content)
*
* @param {string} text - the text to scan
* @returns {{ critical: string[], high: string[], medium: string[], found: boolean, severity: string|null, patterns: Array<{label: string, severity: string}> }}
* Arrays of human-readable finding labels per tier, plus convenience fields.
*/
export function scanForInjection(text) {
const normalized = normalizeForScan(text);
// E16 (v7.2.0): homoglyph fold every variant before pattern matching, so
// attacks like "ignоre previous instructions" (Cyrillic о) trigger the
// same patterns as plain "ignore previous instructions". Always-on, not
// advisory-only — the existing MEDIUM_PATTERNS homoglyph-presence entry
// remains separate (different signal: presence vs. normalization).
const folded = foldHomoglyphs(text);
const foldedNormalized = foldHomoglyphs(normalized);
const critical = [];
const high = [];
const medium = [];
// Deduplicate by label (same pattern may match in multiple variants)
const seenLabels = new Set();
// Build the variant set, deduplicating identical strings to skip redundant
// pattern matching. Order: raw text, decoded, folded, decoded+folded.
const variantSet = new Set([text]);
if (normalized !== text) variantSet.add(normalized);
if (folded !== text && folded !== normalized) variantSet.add(folded);
if (foldedNormalized !== text && foldedNormalized !== normalized && foldedNormalized !== folded) {
variantSet.add(foldedNormalized);
}
const variants = [...variantSet];
for (const variant of variants) {
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
critical.push(label);
}
}
for (const { pattern, label } of HIGH_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
// Hybrid patterns are HIGH severity (v5.0 S6)
for (const { pattern, label } of HYBRID_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
for (const { pattern, label } of MEDIUM_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
medium.push(label);
}
}
}
// ---------------------------------------------------------------------------
// Unicode Tag steganography check (DeepMind traps kat. 1)
// ---------------------------------------------------------------------------
if (containsUnicodeTags(text)) {
const tagLabel = 'unicode-tags: invisible Unicode Tag characters detected (U+E0000 block steganography)';
if (!seenLabels.has(tagLabel)) {
seenLabels.add(tagLabel);
high.push(tagLabel);
}
const decodedTags = decodeUnicodeTags(text);
for (const { pattern, label } of CRITICAL_PATTERNS) {
const escalatedLabel = `unicode-tags+${label}`;
if (seenLabels.has(escalatedLabel)) continue;
if (pattern.test(decodedTags) && !pattern.test(text)) {
seenLabels.add(escalatedLabel);
critical.push(`${label} (hidden via Unicode Tag steganography)`);
}
}
}
// ---------------------------------------------------------------------------
// HITL cognitive load check (v5.0 S4)
// ---------------------------------------------------------------------------
const cogLoad = checkCognitiveLoadTrap(text);
if (cogLoad.found && !seenLabels.has(cogLoad.label)) {
seenLabels.add(cogLoad.label);
medium.push(cogLoad.label);
}
// Convenience fields
const found = critical.length > 0 || high.length > 0 || medium.length > 0;
const severity = critical.length > 0 ? 'critical' : high.length > 0 ? 'high' : medium.length > 0 ? 'medium' : null;
const patterns = [
...critical.map(label => ({ label, severity: 'critical' })),
...high.map(label => ({ label, severity: 'high' })),
...medium.map(label => ({ label, severity: 'medium' })),
];
return { critical, high, medium, found, severity, patterns };
}