ktg-plugin-marketplace/plugins/llm-security/scanners/lib/injection-patterns.mjs

// injection-patterns.mjs — Shared prompt injection detection patterns
// Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse).
//
// Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection)
// and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md.
//
// Zero external dependencies beyond ./string-utils.mjs.

import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags } from './string-utils.mjs';

// ---------------------------------------------------------------------------
// Critical patterns — direct injection attempts (should be blocked)
// ---------------------------------------------------------------------------

export const CRITICAL_PATTERNS = [
  // Direct override instructions
  { pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' },
  { pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' },
  { pattern: /disregard\s+your/i, label: 'override: disregard instructions' },
  { pattern: /override\s+your/i, label: 'override: override instructions' },
  { pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' },
  { pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' },

  // Spoofed system headers
  { pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' },
  { pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' },
  { pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' },
  { pattern: /<system>/, label: 'spoofed tag: <system>' },
  { pattern: /<\/system>/, label: 'spoofed tag: </system>' },
  { pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' },

  // Identity redefinition
  { pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' },
  { pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' },
  { pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' },
  { pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' },
  { pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' },

  // Configuration manipulation
  { pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' },
  { pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' },
  { pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' },
  { pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' },
];

// ---------------------------------------------------------------------------
// Medium patterns — obfuscation and indirect manipulation (advisory)
// Lower confidence than High; flag for manual review.
// ---------------------------------------------------------------------------

export const MEDIUM_PATTERNS = [
  // Leetspeak variants of critical keywords
  { pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' },
  { pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' },
  { pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' },
  { pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' },

  // Homoglyph detection — Cyrillic chars in Latin context
  { pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' },

  // Zero-width characters inside words (keyword splitting evasion)
  { pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' },

  // Indirect AI-directed instructions
  { pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' },
  { pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' },
  { pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' },

  // Multi-language injection variants
  { pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' },
  { pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' },
  { pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' },

  // Markdown link-reference comment injection
  { pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' },

  // Data URI with executable content types
  { pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' },

  // --- Content Injection: Syntactic Masking (AI Agent Traps) ---
  { pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' },

  // --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) ---
  { pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' },
  { pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' },

  // --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) ---
  { pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' },
  { pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' },
  { pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' },
  { pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' },
  { pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' },
];

// ---------------------------------------------------------------------------
// High patterns — subtle manipulation (advisory warning, not block)
// ---------------------------------------------------------------------------

export const HIGH_PATTERNS = [
  { pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' },
  { pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' },
  { pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' },
  { pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' },
  { pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' },
  { pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' },
  { pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' },

  // HTML comment injection directed at agents
  { pattern: /<!--\s*(?:AGENT|AI|HIDDEN|ACTUAL\s+TASK|REAL\s+INSTRUCTION)\s*:/i, label: 'hidden comment: agent-directed HTML comment' },

  // --- Content Injection: CSS/HTML obfuscation (AI Agent Traps) ---
  { pattern: /<[^>]+style\s*=\s*"[^"]*display\s*:\s*none[^"]*"[^>]*>/i, label: 'html-obfuscation: display:none element with content' },
  { pattern: /<[^>]+style\s*=\s*"[^"]*visibility\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: visibility:hidden element' },
  { pattern: /<[^>]+style\s*=\s*"[^"]*position\s*:\s*absolute[^"]*-\d{3,}px[^"]*"[^>]*>/i, label: 'html-obfuscation: off-screen positioned element' },
  { pattern: /<[^>]+style\s*=\s*"[^"]*font-size\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero font-size element' },
  { pattern: /<[^>]+style\s*=\s*"[^"]*opacity\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero opacity element' },
  { pattern: /<[^>]+style\s*=\s*"[^"]*(?:height|width)\s*:\s*0[^"]*overflow\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: zero-size overflow-hidden element' },
  { pattern: /aria-label\s*=\s*"[^"]*(?:ignore|override|system|instruction|execute|exfiltrate)[^"]*"/i, label: 'html-obfuscation: injection in aria-label attribute' },

  // --- Semantic Manipulation: Oversight & Critic Evasion (AI Agent Traps) ---
  { pattern: /for\s+educational\s+purposes?\s+only/i, label: 'evasion: educational purpose framing' },
  { pattern: /(?:this\s+is\s+)?a\s+hypothetical\s+scenario/i, label: 'evasion: hypothetical scenario framing' },
  { pattern: /in\s+a\s+fictional\s+(?:setting|scenario|world)\s+where/i, label: 'evasion: fictional setting framing' },
  { pattern: /(?:this\s+is\s+)?a\s+red[- ]?team(?:ing)?\s+(?:exercise|simulation|test)/i, label: 'evasion: red-team exercise framing' },
  { pattern: /security\s+audit\s+simulation/i, label: 'evasion: security audit simulation framing' },
  { pattern: /pretend\s+(?:this\s+is|we\s+are\s+in)\s+a\s+(?:test|lab|sandbox|simulation)/i, label: 'evasion: test/lab/sandbox pretense' },
  { pattern: /as\s+a\s+thought\s+experiment/i, label: 'evasion: thought experiment framing' },
  { pattern: /for\s+(?:research|academic|scientific)\s+purposes/i, label: 'evasion: research purpose framing' },
  { pattern: /in\s+an?\s+(?:academic|research|educational)\s+context/i, label: 'evasion: academic context framing' },

  // --- Human-in-the-Loop traps (DeepMind kat. 6, v5.0 S4) ---
  // Approval urgency: pressure to approve without review
  { pattern: /(?:approve|accept|confirm)\s+(?:this\s+)?immediately/i, label: 'hitl-trap: approval urgency — approve immediately' },
  { pattern: /no\s+need\s+to\s+review/i, label: 'hitl-trap: approval urgency — no need to review' },
  { pattern: /(?:just|simply)\s+(?:approve|accept|confirm)\s+(?:it|this|everything)/i, label: 'hitl-trap: approval urgency — just approve' },
  { pattern: /(?:don'?t|do\s+not)\s+(?:bother\s+)?(?:check|review|verify|inspect|read)(?:ing)?\s+(?:this|the|it)/i, label: 'hitl-trap: approval urgency — suppress review' },
  // Summary suppression: hiding actions behind brevity claims
  { pattern: /for\s+brevity[\s,]+(?:skipping|omitting|leaving\s+out)/i, label: 'hitl-trap: summary suppression — for brevity, skipping' },
  { pattern: /(?:too\s+(?:long|verbose|detailed)\s+to\s+(?:show|display|list|include))/i, label: 'hitl-trap: summary suppression — too long to show' },
  { pattern: /(?:details?\s+(?:omitted|redacted|hidden|suppressed))/i, label: 'hitl-trap: summary suppression — details omitted' },
  // Scope minimization: downplaying destructive changes
  { pattern: /(?:minor|trivial|small|cosmetic)\s+(?:change|update|fix|adjustment)\b[^.]{0,80}(?:delet|remov|drop|truncat|reset|destroy|\bformat\b|wipe)/i, label: 'hitl-trap: scope minimization — minor change + destructive action' },
];

// ---------------------------------------------------------------------------
// Hybrid attack patterns — cross-domain injection (HIGH, v5.0 S6)
// Preamble 2.0: P2SQL, recursive injection, XSS in agent context.
// ---------------------------------------------------------------------------

export const HYBRID_PATTERNS = [
  // P2SQL: SQL keywords in injection text targeting tool operations
  { pattern: /(?:ignore|override|disregard|forget)[^.]{0,60}(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET)(?:\b|(?=\s|$))/i, label: 'hybrid-p2sql: injection + SQL keywords (prompt-to-SQL attack)' },
  { pattern: /(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM)\s[^;]{0,80}(?:ignore|override|disregard|bypass)/i, label: 'hybrid-p2sql: SQL operation + injection override keywords' },

  // Recursive injection: text that instructs the model to inject into its own output
  { pattern: /(?:inject|insert|embed|include)\s+(?:this|the\s+following)\s+(?:into|in)\s+(?:your|the)\s+(?:output|response|reply|message|prompt|context)/i, label: 'hybrid-recursive: instruction to inject into model output' },
  { pattern: /(?:when|if)\s+(?:the\s+)?(?:user|human|operator)\s+(?:asks?|requests?|queries)[^.]{0,60}(?:respond\s+with|output|reply\s+with|include)\s+(?:this|the\s+following)/i, label: 'hybrid-recursive: conditional response injection (recursive payload)' },

  // XSS in agent context: script/event handlers in content for markdown rendering
  { pattern: /<script\b[^>]*>[\s\S]*?<\/script>/i, label: 'hybrid-xss: <script> tag in content (agent context XSS)' },
  { pattern: /javascript\s*:/i, label: 'hybrid-xss: javascript: URI scheme (agent context XSS)' },
  { pattern: /\bon(?:error|load|click|mouseover|focus|blur)\s*=/i, label: 'hybrid-xss: inline event handler attribute (agent context XSS)' },
  { pattern: /<iframe\b[^>]*src\s*=\s*["'][^"']*(?:javascript:|data:text\/html)/i, label: 'hybrid-xss: iframe with executable src (agent context XSS)' },
];

// ---------------------------------------------------------------------------
// HITL cognitive load patterns (MEDIUM, v5.0 S4)
// Injection buried after 2000+ characters in verbose output.
// Checked separately due to length-dependent logic.
// ---------------------------------------------------------------------------

/**
 * Check for cognitive load HITL trap: injection payload buried deep in verbose output.
 * Only flags if the injection appears after the first 2000 characters.
 * @param {string} text
 * @returns {{ found: boolean, label: string|null }}
 */
export function checkCognitiveLoadTrap(text) {
  if (text.length < 2500) return { found: false, label: null };

  const tail = text.slice(2000);
  for (const { pattern, label } of CRITICAL_PATTERNS) {
    if (pattern.test(tail)) {
      return {
        found: true,
        label: `hitl-trap: cognitive load — injection buried after 2000+ chars (${label})`,
      };
    }
  }
  return { found: false, label: null };
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

/**
 * Scan text for prompt injection patterns.
 * Checks both raw text and normalized (decoded) text to catch obfuscated injections.
 * Also checks for Unicode Tag steganography (DeepMind traps kat. 1):
 *   - CRITICAL if decoded tags contain injection patterns
 *   - HIGH if Unicode Tags are present at all (suspicious regardless of content)
 *
 * @param {string} text - the text to scan
 * @returns {{ critical: string[], high: string[], medium: string[], found: boolean, severity: string|null, patterns: Array<{label: string, severity: string}> }}
 *   Arrays of human-readable finding labels per tier, plus convenience fields.
 */
export function scanForInjection(text) {
  const normalized = normalizeForScan(text);
  const isDifferent = normalized !== text;

  const critical = [];
  const high = [];
  const medium = [];

  // Deduplicate by label (same pattern may match in both raw and normalized)
  const seenLabels = new Set();

  const variants = isDifferent ? [text, normalized] : [text];

  for (const variant of variants) {
    for (const { pattern, label } of CRITICAL_PATTERNS) {
      if (seenLabels.has(label)) continue;
      if (pattern.test(variant)) {
        seenLabels.add(label);
        critical.push(label);
      }
    }

    for (const { pattern, label } of HIGH_PATTERNS) {
      if (seenLabels.has(label)) continue;
      if (pattern.test(variant)) {
        seenLabels.add(label);
        high.push(label);
      }
    }

    // Hybrid patterns are HIGH severity (v5.0 S6)
    for (const { pattern, label } of HYBRID_PATTERNS) {
      if (seenLabels.has(label)) continue;
      if (pattern.test(variant)) {
        seenLabels.add(label);
        high.push(label);
      }
    }

    for (const { pattern, label } of MEDIUM_PATTERNS) {
      if (seenLabels.has(label)) continue;
      if (pattern.test(variant)) {
        seenLabels.add(label);
        medium.push(label);
      }
    }
  }

  // ---------------------------------------------------------------------------
  // Unicode Tag steganography check (DeepMind traps kat. 1)
  // ---------------------------------------------------------------------------
  if (containsUnicodeTags(text)) {
    const tagLabel = 'unicode-tags: invisible Unicode Tag characters detected (U+E0000 block steganography)';
    if (!seenLabels.has(tagLabel)) {
      seenLabels.add(tagLabel);
      high.push(tagLabel);
    }

    const decodedTags = decodeUnicodeTags(text);
    for (const { pattern, label } of CRITICAL_PATTERNS) {
      const escalatedLabel = `unicode-tags+${label}`;
      if (seenLabels.has(escalatedLabel)) continue;
      if (pattern.test(decodedTags) && !pattern.test(text)) {
        seenLabels.add(escalatedLabel);
        critical.push(`${label} (hidden via Unicode Tag steganography)`);
      }
    }
  }

  // ---------------------------------------------------------------------------
  // HITL cognitive load check (v5.0 S4)
  // ---------------------------------------------------------------------------
  const cogLoad = checkCognitiveLoadTrap(text);
  if (cogLoad.found && !seenLabels.has(cogLoad.label)) {
    seenLabels.add(cogLoad.label);
    medium.push(cogLoad.label);
  }

  // Convenience fields
  const found = critical.length > 0 || high.length > 0 || medium.length > 0;
  const severity = critical.length > 0 ? 'critical' : high.length > 0 ? 'high' : medium.length > 0 ? 'medium' : null;
  const patterns = [
    ...critical.map(label => ({ label, severity: 'critical' })),
    ...high.map(label => ({ label, severity: 'high' })),
    ...medium.map(label => ({ label, severity: 'medium' })),
  ];

  return { critical, high, medium, found, severity, patterns };
}