ktg-plugin-marketplace/plugins/llm-security/hooks/scripts/post-mcp-verify.mjs

#!/usr/bin/env node
// Hook: post-mcp-verify.mjs
// Event: PostToolUse (ALL tools)
// Purpose: Monitor tool output for data leakage and indirect prompt injection.
//
// Protocol:
//   - Read JSON from stdin: { tool_name, tool_input, tool_output }
//   - Advisory only: always exit 0. Output systemMessage via stdout to warn user.
//
// v2.3.0: Expanded from Bash-only to ALL tools.
//   - Bash-specific: secret scanning, external URL detection, large MCP output
//   - Universal: indirect prompt injection scanning (OWASP LLM01)
//   - Short output (<100 chars) skipped for performance
// v5.0.0: MEDIUM injection patterns included in advisory output.
// v5.0.0-S4: HITL trap patterns (HIGH), sub-agent spawn (MEDIUM), NL indirection (MEDIUM),
//            cognitive load trap (MEDIUM) — all via scanForInjection() from injection-patterns.mjs.

import { readFileSync, writeFileSync, appendFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs';
import { checkDescriptionDrift } from '../../scanners/lib/mcp-description-cache.mjs';
import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs';
import { decodeHtmlEntities } from '../../scanners/lib/string-utils.mjs';

// ---------------------------------------------------------------------------
// Secret patterns — same set as pre-edit-secrets.mjs so any secret that
// slips through a write guard will at least be flagged in command output.
// Only checked for Bash tool output.
// ---------------------------------------------------------------------------
const SECRET_PATTERNS = [
  { name: 'AWS Access Key ID',          pattern: /AKIA[0-9A-Z]{16}/ },
  { name: 'GitHub Token',               pattern: /(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}/ },
  { name: 'npm Token',                  pattern: /npm_[A-Za-z0-9]{36}/ },
  { name: 'Private Key PEM Block',      pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/ },
  { name: 'Azure Connection String',    pattern: /(?:AccountKey|SharedAccessKey|sig)=[A-Za-z0-9+/=]{20,}/ },
  { name: 'Bearer Token',              pattern: /Bearer\s+[A-Za-z0-9\-._~+/]+=*/ },
  { name: 'Database connection string', pattern: /(?:postgres|mysql|mongodb|redis):\/\/[^\s]+@[^\s]+/i },
  {
    name: 'Generic credential assignment',
    pattern: /(?:password|passwd|secret|token|api[_-]?key)\s*[=:]\s*['"][^'"]{8,}['"]/i,
  },
];

// ---------------------------------------------------------------------------
// MCP-indicator keywords — commands that suggest MCP tool usage.
// We give extra weight to findings when the command looks MCP-related.
// Only relevant for Bash tool.
// ---------------------------------------------------------------------------
const MCP_INDICATORS = [
  'mcp',
  'model_context_protocol',
  'claude mcp',
  'npx @anthropic',
  'mcp-server',
  'tool_use',
  'tool_result',
];

// ---------------------------------------------------------------------------
// Large data dump heuristic — output longer than this threshold (bytes) from
// an MCP-related command may indicate exfiltration or accidental bulk dump.
// Only checked for Bash tool.
// ---------------------------------------------------------------------------
const LARGE_OUTPUT_THRESHOLD = 50_000; // 50 KB

// ---------------------------------------------------------------------------
// Minimum output length for injection scanning (performance optimization).
// Short output is unlikely to contain meaningful injection payloads.
// ---------------------------------------------------------------------------
const MIN_INJECTION_SCAN_LENGTH = 100;

// ---------------------------------------------------------------------------
// Per-tool volume tracking — tracks cumulative output per MCP tool within
// a session. Warns when a single tool produces disproportionate output.
// State file: ${os.tmpdir()}/llm-security-mcp-volume-${ppid}.json
// ---------------------------------------------------------------------------
const MCP_TOOL_VOLUME_THRESHOLD = getPolicyValue('mcp', 'volume_threshold_bytes', 100_000);
const VOLUME_STATE_FILE = join(tmpdir(), `llm-security-mcp-volume-${process.ppid}.json`);

// ---------------------------------------------------------------------------
// Unexpected external URL patterns in curl/wget invocations within output.
// Only checked for Bash tool.
// ---------------------------------------------------------------------------
const EXTERNAL_URL_PATTERN =
  /(?:curl|wget)\s+(?:-[a-zA-Z]+\s+)*['"]?(https?:\/\/(?!localhost|127\.|0\.0\.0\.|::1)[^\s'"]+)/gi;

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function isMcpRelatedCommand(command) {
  if (!command) return false;
  const lower = command.toLowerCase();
  return MCP_INDICATORS.some((indicator) => lower.includes(indicator));
}

function scanForSecrets(text) {
  const matches = [];
  for (const { name, pattern } of SECRET_PATTERNS) {
    if (pattern.test(text)) {
      matches.push(name);
    }
  }
  return matches;
}

function extractExternalUrls(text) {
  const urls = [];
  let match;
  const re = new RegExp(EXTERNAL_URL_PATTERN.source, EXTERNAL_URL_PATTERN.flags);
  while ((match = re.exec(text)) !== null) {
    urls.push(match[1]);
  }
  return [...new Set(urls)]; // deduplicate
}

function emitAdvisory(message) {
  process.stdout.write(
    JSON.stringify({ systemMessage: message })
  );
}

/**
 * Format a tool identifier for advisory messages.
 * For Bash: includes the command. For other tools: includes tool name and relevant input.
 */
function formatToolContext(toolName, toolInput) {
  if (toolName === 'Bash') {
    const cmd = toolInput?.command ?? '';
    return `Command: ${cmd.slice(0, 150)}${cmd.length > 150 ? '...' : ''}`;
  }
  if (toolName === 'Read') {
    const target = toolInput?.file_path ?? '';
    return `Tool: Read, file: ${target.slice(0, 150)}`;
  }
  if (toolName === 'WebFetch') {
    const target = toolInput?.url ?? '';
    return `Tool: WebFetch, url: ${target.slice(0, 150)}`;
  }
  // MCP tools often have descriptive names
  if (toolName?.startsWith('mcp__')) {
    return `MCP tool: ${toolName}`;
  }
  return `Tool: ${toolName}`;
}

// ---------------------------------------------------------------------------
// Per-tool MCP volume state
// ---------------------------------------------------------------------------

/**
 * Load per-tool volume state.
 * @returns {{ volumes: Record<string, number>, warned: Record<string, boolean> }}
 */
function loadVolumeState() {
  try {
    if (existsSync(VOLUME_STATE_FILE)) {
      return JSON.parse(readFileSync(VOLUME_STATE_FILE, 'utf-8'));
    }
  } catch { /* ignore */ }
  return { volumes: {}, warned: {} };
}

/**
 * Save per-tool volume state.
 * @param {{ volumes: Record<string, number>, warned: Record<string, boolean> }} state
 */
function saveVolumeState(state) {
  try {
    writeFileSync(VOLUME_STATE_FILE, JSON.stringify(state), 'utf-8');
  } catch { /* ignore */ }
}

// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
let input;
try {
  const raw = readFileSync(0, 'utf-8');
  input = JSON.parse(raw);
} catch {
  // Cannot parse stdin — exit silently.
  process.exit(0);
}

const toolName   = input?.tool_name       ?? '';
const toolInput  = input?.tool_input      ?? {};
const toolOutput = input?.tool_output     ?? '';
const command    = toolInput?.command      ?? '';

// Convert tool_output to string if it isn't already (some hooks pass objects)
const outputText = typeof toolOutput === 'string'
  ? toolOutput
  : JSON.stringify(toolOutput);

if (!outputText.trim()) {
  process.exit(0);
}

const advisories = [];
const isBash = toolName === 'Bash';

// Policy: trusted MCP servers are exempt from volume tracking and drift checks
const trustedServers = new Set(getPolicyValue('mcp', 'trusted_servers', []));
const mcpServerName = toolName.includes('mcp__') ? toolName.split('__')[1] : null;
const isTrustedMcp = mcpServerName && trustedServers.has(mcpServerName);

// =========================================================================
// Bash-specific checks: secrets, external URLs, large MCP output
// These checks are only relevant for shell command output.
// =========================================================================
if (isBash) {
  const isMcp        = isMcpRelatedCommand(command);
  const secretHits   = scanForSecrets(outputText);
  const externalUrls = extractExternalUrls(outputText);
  const isLargeOutput = outputText.length > LARGE_OUTPUT_THRESHOLD;

  // --- Secret detection in output ---
  if (secretHits.length > 0) {
    advisories.push(
      `Potential secret(s) detected in command output:\n` +
      secretHits.map((n) => `  - ${n}`).join('\n') + '\n' +
      `  Review the output above before sharing logs, screenshots, or copying to external systems.\n` +
      `  Rotate any exposed credentials immediately.`
    );
  }

  // --- Unexpected external URLs (only flag when in MCP context or multiple hits) ---
  if (externalUrls.length > 0 && (isMcp || externalUrls.length > 2)) {
    advisories.push(
      `External URL(s) accessed via curl/wget in command output:\n` +
      externalUrls.slice(0, 5).map((u) => `  - ${u}`).join('\n') +
      (externalUrls.length > 5 ? `\n  ... and ${externalUrls.length - 5} more` : '') + '\n' +
      `  Verify these requests are expected and that no sensitive data was sent.`
    );
  }

  // --- Large output from MCP-related command ---
  if (isMcp && isLargeOutput) {
    const kb = Math.round(outputText.length / 1024);
    advisories.push(
      `Large output (${kb} KB) from an MCP-related command.\n` +
      `  Unexpectedly large MCP responses may indicate bulk data retrieval or exfiltration.\n` +
      `  ${formatToolContext(toolName, toolInput)}`
    );
  }
}

// =========================================================================
// Universal check: indirect prompt injection in tool output (LLM01)
// Runs for ALL tools. External content fetched by any tool may contain
// injection payloads targeting the model.
// Skip short output for performance.
// v5.0.0: Now includes MEDIUM patterns in advisory.
// =========================================================================
if (outputText.length >= MIN_INJECTION_SCAN_LENGTH) {
  const scanSlice = outputText.slice(0, 100_000); // first 100 KB
  const injection = scanForInjection(scanSlice);
  if (injection.critical.length > 0 || injection.high.length > 0 || injection.medium.length > 0) {
    const lines = [];
    if (injection.critical.length > 0) {
      lines.push(`  Critical injection patterns:`);
      for (const c of injection.critical) lines.push(`    - ${c}`);
    }
    if (injection.high.length > 0) {
      lines.push(`  Manipulation signals:`);
      for (const h of injection.high) lines.push(`    - ${h}`);
    }
    if (injection.medium.length > 0) {
      // When critical/high are present, just append count. When medium-only, list them.
      if (injection.critical.length > 0 || injection.high.length > 0) {
        lines.push(`  Additionally, ${injection.medium.length} lower-confidence signal(s) (MEDIUM).`);
      } else {
        lines.push(`  Obfuscation/manipulation signals (MEDIUM):`);
        for (const m of injection.medium) lines.push(`    - ${m}`);
      }
    }
    const severity = injection.critical.length > 0 ? 'CRITICAL' : injection.high.length > 0 ? 'HIGH' : 'MEDIUM';
    advisories.push(
      `Indirect prompt injection detected in tool output — ${severity} (OWASP LLM01).\n` +
      lines.join('\n') + '\n' +
      `  External content may be attempting to manipulate the model.\n` +
      `  ${formatToolContext(toolName, toolInput)}`
    );
  }
}

// =========================================================================
// HTML content check: CSS-hidden content detection (AI Agent Traps)
// WebFetch and Read may return HTML with visually hidden elements that
// contain adversarial instructions. Agents parse these; humans do not.
// =========================================================================
const isHtmlSource = toolName === 'WebFetch' || toolName === 'Read' || toolName?.startsWith('mcp__');
if (isHtmlSource && outputText.length >= MIN_INJECTION_SCAN_LENGTH) {
  const htmlSlice = outputText.slice(0, 100_000);

  // -------------------------------------------------------------------------
  // E4 (v7.2.0): Markdown link title-attribute injection.
  // Pattern: [text](url "title") — the quoted title is rendered as a tooltip
  // and parsed by agents, but rarely inspected by humans during review.
  // Markdown does not require HTML tags, so this runs outside the HTML gate.
  // -------------------------------------------------------------------------
  const linkTitleRegex = /\[[^\]]*\]\([^)]*\s+"([^"]+)"\s*\)/g;
  const linkTitles = [];
  let linkTitleMatch;
  while ((linkTitleMatch = linkTitleRegex.exec(htmlSlice)) !== null) {
    linkTitles.push(decodeHtmlEntities(linkTitleMatch[1]));
  }
  if (linkTitles.length > 0) {
    const titlesText = linkTitles.join('\n');
    const titleScan = scanForInjection(titlesText);
    if (titleScan.critical.length > 0 || titleScan.high.length > 0 || titleScan.medium.length > 0) {
      const labels = [...titleScan.critical, ...titleScan.high, ...titleScan.medium];
      const sev = titleScan.critical.length > 0 ? 'CRITICAL'
                : titleScan.high.length > 0 ? 'HIGH'
                : 'MEDIUM';
      advisories.push(
        `Markdown link-title injection detected — ${sev} (markdown-link-title-injection, OWASP LLM01).\n` +
        `  Adversarial content hidden in link title attributes — rendered as tooltips, parsed by agents.\n` +
        labels.slice(0, 5).map(l => `  - ${l}`).join('\n') + '\n' +
        `  ${formatToolContext(toolName, toolInput)}`
      );
    }
  }

  // Only run HTML-specific checks if content looks like HTML
  if (/<[a-zA-Z][^>]*>/.test(htmlSlice)) {
    const htmlFindings = [];
    // Detect CSS-hidden elements with substantial content
    const hiddenElementRegex = /<([a-z]+)\s[^>]*style\s*=\s*"[^"]*(?:display\s*:\s*none|visibility\s*:\s*hidden|position\s*:\s*absolute[^"]*-\d{3,}px|font-size\s*:\s*0|opacity\s*:\s*0)[^"]*"[^>]*>([^<]{20,})/gi;
    let htmlMatch;
    while ((htmlMatch = hiddenElementRegex.exec(htmlSlice)) !== null) {
      const content = htmlMatch[2].trim().slice(0, 100);
      htmlFindings.push(`CSS-hidden <${htmlMatch[1]}>: "${content}${htmlMatch[2].length > 100 ? '...' : ''}"`);
    }
    // Detect injection in aria-label attributes
    const ariaRegex = /aria-label\s*=\s*"([^"]{20,})"/gi;
    while ((htmlMatch = ariaRegex.exec(htmlSlice)) !== null) {
      const ariaContent = htmlMatch[1].toLowerCase();
      if (/(?:ignore|override|system|instruction|execute|exfiltrate|forget|disregard)/.test(ariaContent)) {
        htmlFindings.push(`Injection in aria-label: "${htmlMatch[1].slice(0, 100)}"`);
      }
    }
    if (htmlFindings.length > 0) {
      advisories.push(
        `Hidden HTML content detected — possible Agent Trap (OWASP LLM01, Content Injection).\n` +
        `  AI agents parse hidden elements that are invisible to human reviewers.\n` +
        htmlFindings.map(f => `  - ${f}`).join('\n') + '\n' +
        `  ${formatToolContext(toolName, toolInput)}`
      );
    }

    // -----------------------------------------------------------------------
    // E7 (v7.2.0): HTML comment node injection.
    // Generalizes the existing keyword-restricted CRITICAL pattern in
    // injection-patterns.mjs (which only fires on AGENT/AI/HIDDEN markers).
    // The existing pattern still fires (defense-in-depth); this scans the
    // body of any <!-- ... --> comment for the full injection rule set.
    // -----------------------------------------------------------------------
    const commentRegex = /<!--([\s\S]*?)-->/g;
    const commentBodies = [];
    let commentMatch;
    while ((commentMatch = commentRegex.exec(htmlSlice)) !== null) {
      const body = commentMatch[1].trim();
      if (body.length > 0) {
        commentBodies.push(decodeHtmlEntities(body));
      }
    }
    if (commentBodies.length > 0) {
      const commentScan = scanForInjection(commentBodies.join('\n'));
      if (commentScan.critical.length > 0 || commentScan.high.length > 0 || commentScan.medium.length > 0) {
        const labels = [...commentScan.critical, ...commentScan.high, ...commentScan.medium];
        const sev = commentScan.critical.length > 0 ? 'CRITICAL'
                  : commentScan.high.length > 0 ? 'HIGH'
                  : 'MEDIUM';
        advisories.push(
          `HTML comment-node injection detected — ${sev} (html-comment-injection, OWASP LLM01).\n` +
          `  Adversarial content inside <!-- ... --> — invisible in render, parsed by agents.\n` +
          labels.slice(0, 5).map(l => `  - ${l}`).join('\n') + '\n' +
          `  ${formatToolContext(toolName, toolInput)}`
        );
      }
    }

    // -----------------------------------------------------------------------
    // E5 (v7.2.0): SVG element-content injection.
    // Adversarial text inside <desc>, <title>, <metadata>, <foreignObject>
    // is invisible in rendered SVG yet parsed by agents reading the source.
    // -----------------------------------------------------------------------
    const isSvgSource = /<svg[\s>]/i.test(htmlSlice);
    if (isSvgSource) {
      const svgElementRegex = /<(desc|title|metadata|foreignObject)\b[^>]*>([\s\S]*?)<\/\1>/gi;
      const svgTexts = [];
      let svgMatch;
      while ((svgMatch = svgElementRegex.exec(htmlSlice)) !== null) {
        const inner = svgMatch[2].trim();
        if (inner.length > 0) {
          svgTexts.push(decodeHtmlEntities(inner));
        }
      }
      if (svgTexts.length > 0) {
        const svgScan = scanForInjection(svgTexts.join('\n'));
        if (svgScan.critical.length > 0 || svgScan.high.length > 0 || svgScan.medium.length > 0) {
          const labels = [...svgScan.critical, ...svgScan.high, ...svgScan.medium];
          const sev = svgScan.critical.length > 0 ? 'CRITICAL'
                    : svgScan.high.length > 0 ? 'HIGH'
                    : 'MEDIUM';
          advisories.push(
            `SVG element-content injection detected — ${sev} (svg-element-injection, OWASP LLM01).\n` +
            `  Adversarial text inside <desc>/<title>/<metadata>/<foreignObject> — invisible in render, parsed by agents.\n` +
            labels.slice(0, 5).map(l => `  - ${l}`).join('\n') + '\n' +
            `  ${formatToolContext(toolName, toolInput)}`
          );
        }
      }
    }
  }
}

// =========================================================================
// MCP description drift detection (OWASP MCP05 — Rug Pull)
// Checks if the MCP tool's description has changed since first seen.
// Only relevant for MCP tools that provide a description in tool_input.
// =========================================================================
const isMcpTool = toolName?.startsWith('mcp__');
if (isMcpTool && !isTrustedMcp) {
  const description = toolInput?.description || toolInput?.tool_description || '';
  if (description && typeof description === 'string' && description.length > 10) {
    try {
      const driftResult = checkDescriptionDrift(toolName, description);
      if (driftResult.drift) {
        advisories.push(
          `MCP tool description drift detected (OWASP MCP05 — Rug Pull).\n` +
          `  ${driftResult.detail}\n` +
          `  Previous: "${(driftResult.cached || '').slice(0, 120)}${(driftResult.cached || '').length > 120 ? '...' : ''}"\n` +
          `  Current:  "${description.slice(0, 120)}${description.length > 120 ? '...' : ''}"\n` +
          `  A changed tool description may indicate the MCP server has been compromised.`
        );
      }
    } catch { /* drift check is advisory, never block */ }
  }
}

// =========================================================================
// Per-MCP-tool volume tracking
// Tracks cumulative output size per MCP tool within a session. Warns when
// a single tool produces disproportionate output (>100 KB cumulative).
// =========================================================================
if (isMcpTool && !isTrustedMcp && outputText.length > 0) {
  const volState = loadVolumeState();
  volState.volumes[toolName] = (volState.volumes[toolName] || 0) + outputText.length;
  const toolTotal = volState.volumes[toolName];

  if (toolTotal >= MCP_TOOL_VOLUME_THRESHOLD && !volState.warned[toolName]) {
    const kb = Math.round(toolTotal / 1024);
    advisories.push(
      `MCP tool cumulative output exceeded ${Math.round(MCP_TOOL_VOLUME_THRESHOLD / 1024)} KB.\n` +
      `  Tool: ${toolName}\n` +
      `  Cumulative output this session: ~${kb} KB\n` +
      `  High per-tool volume may indicate bulk data harvesting (OWASP ASI02, MCP03).`
    );
    volState.warned[toolName] = true;
  }
  saveVolumeState(volState);
}

// Emit combined advisory if anything was flagged
if (advisories.length > 0) {
  const header = 'SECURITY ADVISORY (post-mcp-verify): Potential data leakage detected.';
  const body   = advisories.map((a, i) => `[${i + 1}] ${a}`).join('\n\n');
  emitAdvisory(`${header}\n\n${body}`);
}

// PostToolUse hooks are always advisory — never block.
process.exit(0);