#!/usr/bin/env node // Hook: post-session-guard.mjs // Event: PostToolUse (ALL tools) // Purpose: Runtime lethal trifecta detection — monitors tool call sequences // and warns when untrusted input + sensitive data access + exfiltration // sink all appear within a sliding window. // // Protocol: // - Read JSON from stdin: { tool_name, tool_input, tool_output } // - Advisory only: always exit 0. Output systemMessage via stdout to warn. // - State persisted in ${os.tmpdir()}/llm-security-session-${ppid}.jsonl // // Rule of Two (Meta, Oct 2025): // Of 3 capabilities A (untrusted input), B (sensitive data), C (state change/exfil), // an agent should NEVER hold all 3 simultaneously. Env var LLM_SECURITY_TRIFECTA_MODE // controls enforcement: warn (default), block (exit 2 for high-confidence trifecta), off. // // Long-horizon monitoring (OpenAI Atlas, Dec 2025): // 100-call window alongside 20-call for slow-burn trifecta detection and // behavioral drift via Jensen-Shannon divergence on tool distributions. // // Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4): // Task/Agent tools classified as 'delegation'. Escalation-after-input advisory // when delegation occurs within 5 calls of an input_source (untrusted content // may be influencing sub-agent spawning decisions). // // CaMeL-inspired data flow tagging (DeepMind CaMeL, v5.0 S6): // Lightweight data provenance tracking. On tool output: hash first 200 chars as // data tag. On next tool input: check substring match against prior tags. Match = // "data flow link". Trifecta with linked flows = elevated severity. // // Trifecta concept (Willison / Invariant Labs): // 1. Agent exposed to UNTRUSTED INPUT (prompt injection surface) // 2. Agent has access to SENSITIVE DATA via tools // 3. An EXFILTRATION SINK exists (HTTP POST, scp, etc.) // // OWASP: ASI01 (Excessive Agency), ASI02 (Data Leakage), LLM01 (Prompt Injection) import { readFileSync, appendFileSync, existsSync, readdirSync, statSync, unlinkSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { createHash } from 'node:crypto'; import { extractMcpServer } from '../../scanners/lib/mcp-description-cache.mjs'; import { jensenShannonDivergence, buildDistribution } from '../../scanners/lib/distribution-stats.mjs'; import { writeAuditEvent } from '../../scanners/lib/audit-trail.mjs'; import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs'; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const WINDOW_SIZE = getPolicyValue('trifecta', 'window_size', 20); const STATE_PREFIX = 'llm-security-session-'; const STATE_DIR = tmpdir(); const CLEANUP_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours // Long-horizon monitoring (OpenAI Atlas, Dec 2025) const LONG_HORIZON_WINDOW = getPolicyValue('trifecta', 'long_horizon_window', 100); const SLOW_BURN_MIN_SPREAD = 50; const DRIFT_THRESHOLD = 0.25; const DRIFT_SAMPLE_SIZE = 20; // Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4) const DELEGATION_ESCALATION_WINDOW = 5; // calls after input_source // Rule of Two enforcement mode: block | warn | off (env var takes precedence over policy) const policyTrifectaMode = getPolicyValue('trifecta', 'mode', 'warn'); const TRIFECTA_MODE = (process.env.LLM_SECURITY_TRIFECTA_MODE || policyTrifectaMode).toLowerCase(); // Volume tracking thresholds (cumulative bytes per session) const VOLUME_THRESHOLDS = [ { bytes: 1_000_000, label: '1 MB', severity: 'HIGH' }, { bytes: 500_000, label: '500 KB', severity: 'MEDIUM' }, { bytes: 100_000, label: '100 KB', severity: 'LOW' }, ]; // --------------------------------------------------------------------------- // Sensitive path patterns (for data_access classification of Read/Bash) // --------------------------------------------------------------------------- const SENSITIVE_PATH_PATTERNS = [ /\.env(?:\.|$)/i, /\.ssh\//i, /\.aws\//i, /\.gnupg\//i, /credentials/i, /secrets?[./]/i, /tokens?[./]/i, /password/i, /keychain/i, /\.npmrc/i, /\.pypirc/i, /id_rsa/i, /id_ed25519/i, /authorized_keys/i, /\.netrc/i, /\.pgpass/i, ]; // --------------------------------------------------------------------------- // Bash command patterns // --------------------------------------------------------------------------- const BASH_EXFIL_PATTERNS = [ /\bcurl\b[^|]*(?:-X\s*(?:POST|PUT|PATCH)\b|-d\s|--data\b|--data-\w+\b|-F\s|--form\b)/i, /\bwget\b[^|]*--post/i, /\bnc\s+(?:-[a-zA-Z]*\s+)*\S+\s+\d/i, // nc host port /\bsendmail\b/i, /\bscp\s/i, /\brsync\b[^|]*[^/]\S+:/i, // rsync to remote (user@host:) /\bgit\s+push\b/i, /\bsftp\b/i, ]; const BASH_INPUT_PATTERNS = [ /\bcurl\b/i, // curl without POST indicators = downloading /\bwget\b/i, // wget without --post = downloading ]; const BASH_DATA_CMD_PATTERNS = [ /\b(?:cat|head|tail|less|more|bat)\s/i, ]; // --------------------------------------------------------------------------- // Classification // --------------------------------------------------------------------------- /** * Classify a tool call into trifecta leg(s). * @param {string} toolName * @param {object} toolInput * @returns {{ classes: string[], detail: string }} */ function classifyToolCall(toolName, toolInput) { // --- WebFetch / WebSearch: always input_source --- if (toolName === 'WebFetch' || toolName === 'WebSearch') { const target = toolInput?.url || toolInput?.query || ''; return { classes: ['input_source'], detail: target.slice(0, 80) }; } // --- MCP tools: untrusted external input --- if (toolName?.startsWith('mcp__')) { return { classes: ['input_source'], detail: toolName }; } // --- Task / Agent: delegation (DeepMind Agent Traps kat. 4, v5.0 S4) --- if (toolName === 'Task' || toolName === 'Agent') { const desc = toolInput?.description || toolInput?.prompt || ''; return { classes: ['delegation'], detail: desc.slice(0, 80) }; } // --- Read: data_access (sensitive path = stronger signal, but all reads count) --- if (toolName === 'Read') { const filePath = toolInput?.file_path || ''; const isSensitive = SENSITIVE_PATH_PATTERNS.some(p => p.test(filePath)); return { classes: ['data_access'], detail: `${isSensitive ? '[SENSITIVE] ' : ''}${filePath.slice(-60)}`, }; } // --- Grep / Glob: data_access --- if (toolName === 'Grep' || toolName === 'Glob') { const target = toolInput?.pattern || toolInput?.path || ''; return { classes: ['data_access'], detail: target.slice(0, 60) }; } // --- Bash: can be multiple classes depending on command --- if (toolName === 'Bash') { return classifyBashCommand(toolInput?.command || ''); } // --- Everything else: neutral --- return { classes: ['neutral'], detail: '' }; } /** * Classify a Bash command. Can return multiple classes. * @param {string} command * @returns {{ classes: string[], detail: string }} */ function classifyBashCommand(command) { const classes = []; const detail = command.slice(0, 80); // Check exfil first (highest priority) if (BASH_EXFIL_PATTERNS.some(p => p.test(command))) { classes.push('exfil_sink'); } // Check data access: command reads files AND path looks sensitive if (BASH_DATA_CMD_PATTERNS.some(p => p.test(command))) { if (SENSITIVE_PATH_PATTERNS.some(p => p.test(command))) { classes.push('data_access'); } } // Check input source: curl/wget without POST = downloading content // Only add if not already classified as exfil (avoid double-counting curl POST) if (!classes.includes('exfil_sink') && BASH_INPUT_PATTERNS.some(p => p.test(command))) { classes.push('input_source'); } if (classes.length === 0) { classes.push('neutral'); } return { classes, detail }; } // --------------------------------------------------------------------------- // State management // --------------------------------------------------------------------------- /** * Get the state file path for this session. * @returns {string} */ function getStateFilePath() { return join(STATE_DIR, `${STATE_PREFIX}${process.ppid}.jsonl`); } /** * Append a tool call entry to the state file. * @param {string} stateFile * @param {object} entry */ function appendEntry(stateFile, entry) { appendFileSync(stateFile, JSON.stringify(entry) + '\n', 'utf-8'); } /** * Read the last N entries from the state file. * @param {string} stateFile * @param {number} n * @returns {object[]} */ function readLastEntries(stateFile, n) { if (!existsSync(stateFile)) return []; try { const content = readFileSync(stateFile, 'utf-8'); const lines = content.trim().split('\n').filter(Boolean); const tail = lines.slice(-n); const entries = []; for (const line of tail) { try { entries.push(JSON.parse(line)); } catch { /* skip malformed */ } } return entries; } catch { return []; } } /** * Clean up state files older than CLEANUP_MAX_AGE_MS. * Only called on first invocation per session (when state file doesn't exist yet). */ function cleanupOldStateFiles() { try { const now = Date.now(); const files = readdirSync(STATE_DIR); for (const file of files) { if (!file.startsWith(STATE_PREFIX) || !file.endsWith('.jsonl')) continue; const fullPath = join(STATE_DIR, file); try { const stat = statSync(fullPath); if (now - stat.mtimeMs > CLEANUP_MAX_AGE_MS) { unlinkSync(fullPath); } } catch { /* ignore per-file errors */ } } } catch { /* ignore cleanup errors entirely */ } } // --------------------------------------------------------------------------- // Trifecta detection // --------------------------------------------------------------------------- /** * Check if all 3 trifecta legs are present in the window. * @param {object[]} entries * @returns {{ detected: boolean, evidence: { input: string[], access: string[], exfil: string[] } }} */ function checkTrifecta(entries) { const evidence = { input: [], access: [], exfil: [] }; for (const entry of entries) { if (entry.type === 'warning') continue; // skip warning markers const classes = entry.classes || []; for (const cls of classes) { if (cls === 'input_source') evidence.input.push(entry.detail || entry.tool); if (cls === 'data_access') evidence.access.push(entry.detail || entry.tool); if (cls === 'exfil_sink') evidence.exfil.push(entry.detail || entry.tool); } } return { detected: evidence.input.length > 0 && evidence.access.length > 0 && evidence.exfil.length > 0, evidence, }; } /** * Check if a warning was already emitted in the current window. * @param {object[]} entries * @returns {boolean} */ function hasRecentWarning(entries) { return entries.some(e => e.type === 'warning'); } /** * Check if the trifecta is MCP-concentrated: all 3 legs originate from tools * on the same MCP server. This is a stronger signal — a single compromised * server providing input, accessing data, AND exfiltrating. * @param {object[]} entries * @returns {{ concentrated: boolean, server: string|null }} */ function checkMcpConcentration(entries) { // Collect MCP servers per trifecta leg const serversByLeg = { input: new Set(), access: new Set(), exfil: new Set() }; for (const entry of entries) { if (entry.type === 'warning') continue; const server = extractMcpServer(entry.tool); if (!server) continue; const classes = entry.classes || []; for (const cls of classes) { if (cls === 'input_source') serversByLeg.input.add(server); if (cls === 'data_access') serversByLeg.access.add(server); if (cls === 'exfil_sink') serversByLeg.exfil.add(server); } } // Find a server present in all 3 legs for (const server of serversByLeg.input) { if (serversByLeg.access.has(server) && serversByLeg.exfil.has(server)) { return { concentrated: true, server }; } } return { concentrated: false, server: null }; } /** * Check if the trifecta involves sensitive path access + exfiltration. * This is a high-confidence signal: data from .env/.ssh/.aws etc. being sent out. * @param {object[]} entries * @returns {boolean} */ function checkSensitiveExfil(entries) { let hasSensitiveAccess = false; let hasExfil = false; for (const entry of entries) { if (entry.type === 'warning') continue; const classes = entry.classes || []; const detail = entry.detail || ''; if (classes.includes('data_access') && detail.startsWith('[SENSITIVE]')) { hasSensitiveAccess = true; } if (classes.includes('exfil_sink')) { hasExfil = true; } } return hasSensitiveAccess && hasExfil; } /** * Compute cumulative data volume from entries with outputSize. * @param {object[]} allEntries - All entries (not just window) * @returns {number} Total bytes */ function computeCumulativeVolume(allEntries) { let total = 0; for (const entry of allEntries) { if (entry.type === 'warning' || entry.type === 'volume_warning') continue; total += entry.outputSize || 0; } return total; } /** * Check if a volume warning at a given threshold was already emitted. * @param {object[]} entries * @param {number} thresholdBytes * @returns {boolean} */ function hasVolumeWarning(entries, thresholdBytes) { return entries.some(e => e.type === 'volume_warning' && e.threshold === thresholdBytes); } /** * Format the volume warning message. * @param {number} totalBytes * @param {string} thresholdLabel * @param {string} severity * @returns {string} */ function formatVolumeWarning(totalBytes, thresholdLabel, severity) { const kb = Math.round(totalBytes / 1024); return ( `SECURITY ADVISORY (session-guard): Cumulative MCP data volume exceeded ${thresholdLabel} [${severity}].\n\n` + `This session has received ~${kb} KB of tool output data.\n` + 'High cumulative volume may indicate bulk data harvesting or exfiltration staging (OWASP ASI02).\n' + 'Review whether the volume of data being processed is proportional to the task.' ); } /** * Format the trifecta warning message. * Uses Rule of Two terminology (Meta, Oct 2025): A=untrusted input, B=sensitive data, C=state change/exfil. * @param {{ input: string[], access: string[], exfil: string[] }} evidence * @param {{ concentrated: boolean, server: string|null }} [mcpInfo] * @param {boolean} [isSensitiveExfil] * @returns {string} */ function formatWarning(evidence, mcpInfo, isSensitiveExfil) { const inputEx = evidence.input.slice(-2).map(e => ` - ${e}`).join('\n'); const accessEx = evidence.access.slice(-2).map(e => ` - ${e}`).join('\n'); const exfilEx = evidence.exfil.slice(-2).map(e => ` - ${e}`).join('\n'); const mcpLine = mcpInfo?.concentrated ? `\nRULE OF TWO VIOLATION: MCP-CONCENTRATED — All 3 legs trace to server "${mcpInfo.server}" (elevated severity).\n` : ''; const sensitiveLine = isSensitiveExfil ? '\nRULE OF TWO VIOLATION: SENSITIVE DATA + EXFILTRATION — Sensitive paths accessed and exfil sink present.\n' : ''; return ( 'SECURITY ADVISORY (session-guard): Rule of Two violation — potential lethal trifecta detected.\n\n' + 'Within the last 20 tool calls, this session holds all 3 capabilities simultaneously:\n' + ' [A] Untrusted external input (prompt injection surface):\n' + inputEx + '\n' + ' [B] Sensitive data access:\n' + accessEx + '\n' + ' [C] Exfiltration-capable tool (state change):\n' + exfilEx + '\n' + mcpLine + sensitiveLine + '\n' + 'Rule of Two (Meta, Oct 2025): An agent should never hold A+B+C simultaneously.\n' + 'This combination enables prompt injection -> data theft chains (OWASP ASI01, ASI02, LLM01).\n' + 'Review recent tool calls for unexpected behavior.' ); } // --------------------------------------------------------------------------- // Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4) // --------------------------------------------------------------------------- /** * Check for escalation-after-input: delegation within DELEGATION_ESCALATION_WINDOW * calls of an input_source. Untrusted content consumed shortly before spawning a * sub-agent may indicate the model is being manipulated into delegating dangerous work. * @param {object[]} entries — recent window (20-call) * @param {{ classes: string[] }} currentEntry — the entry just appended * @returns {{ detected: boolean, inputDetail: string }} */ function checkEscalationAfterInput(entries, currentEntry) { if (!currentEntry.classes.includes('delegation')) { return { detected: false, inputDetail: '' }; } // Walk backwards through the last DELEGATION_ESCALATION_WINDOW entries // looking for an input_source const toolEntries = entries.filter(e => !e.type); const recentN = toolEntries.slice(-(DELEGATION_ESCALATION_WINDOW + 1), -1); // exclude current for (const entry of recentN) { if ((entry.classes || []).includes('input_source')) { return { detected: true, inputDetail: entry.detail || entry.tool || 'unknown' }; } } return { detected: false, inputDetail: '' }; } /** * Check if an escalation-after-input warning was already emitted. * @param {object[]} entries * @returns {boolean} */ function hasEscalationWarning(entries) { return entries.some(e => e.type === 'escalation_warning'); } /** * Format the escalation-after-input warning. * @param {string} delegationDetail — what the delegation was for * @param {string} inputDetail — what input source preceded it * @returns {string} */ function formatEscalationWarning(delegationDetail, inputDetail) { return ( 'SECURITY ADVISORY (session-guard): Escalation-after-input detected [MEDIUM] — ' + 'sub-agent delegation shortly after untrusted input.\n\n' + `A Task/Agent delegation occurred within ${DELEGATION_ESCALATION_WINDOW} calls of untrusted input:\n` + ` Input source: ${inputDetail}\n` + ` Delegation: ${delegationDetail}\n\n` + 'Untrusted content (web pages, MCP tool output) may be influencing the model\n' + 'to spawn sub-agents with capabilities beyond the original task scope.\n' + 'This is a known attack vector (DeepMind AI Agent Traps, Category 4).\n' + 'Review whether this delegation is expected and appropriately scoped.' ); } // --------------------------------------------------------------------------- // Long-horizon monitoring (100-call window) — OpenAI Atlas, Dec 2025 // --------------------------------------------------------------------------- /** * Filter entries to only tool calls (exclude warning/marker entries). * @param {object[]} entries * @returns {object[]} */ function filterToolEntries(entries) { return entries.filter(e => !e.type); } /** * Check for slow-burn trifecta: all 3 legs present but spread over >50 calls. * Catches multi-step injection chains that pace actions to avoid short-window detection. * @param {object[]} entries - Long-horizon window entries * @returns {{ detected: boolean, spread: number }} */ function checkSlowBurnTrifecta(entries) { const toolEntries = filterToolEntries(entries); let firstInput = -1, firstAccess = -1, firstExfil = -1; let lastInput = -1, lastAccess = -1, lastExfil = -1; for (let i = 0; i < toolEntries.length; i++) { for (const cls of toolEntries[i].classes || []) { if (cls === 'input_source') { if (firstInput === -1) firstInput = i; lastInput = i; } if (cls === 'data_access') { if (firstAccess === -1) firstAccess = i; lastAccess = i; } if (cls === 'exfil_sink') { if (firstExfil === -1) firstExfil = i; lastExfil = i; } } } if (firstInput === -1 || firstAccess === -1 || firstExfil === -1) { return { detected: false, spread: 0 }; } const earliestFirst = Math.min(firstInput, firstAccess, firstExfil); const latestLast = Math.max(lastInput, lastAccess, lastExfil); const spread = latestLast - earliestFirst; return { detected: spread > SLOW_BURN_MIN_SPREAD, spread }; } /** * @param {object[]} entries * @returns {boolean} */ function hasSlowBurnWarning(entries) { return entries.some(e => e.type === 'slow_burn_warning'); } /** * Detect behavioral drift: tool distribution shift in first vs last DRIFT_SAMPLE_SIZE calls. * @param {object[]} entries * @returns {{ drifted: boolean, jsd: number, firstTools: string[], lastTools: string[] }} */ function checkBehavioralDrift(entries) { const toolEntries = filterToolEntries(entries); if (toolEntries.length < 2 * DRIFT_SAMPLE_SIZE) { return { drifted: false, jsd: 0, firstTools: [], lastTools: [] }; } const firstTools = toolEntries.slice(0, DRIFT_SAMPLE_SIZE).map(e => e.tool); const lastTools = toolEntries.slice(-DRIFT_SAMPLE_SIZE).map(e => e.tool); const P = buildDistribution(firstTools); const Q = buildDistribution(lastTools); const jsd = jensenShannonDivergence(P, Q); return { drifted: jsd > DRIFT_THRESHOLD, jsd, firstTools, lastTools }; } /** * @param {object[]} entries * @returns {boolean} */ function hasDriftWarning(entries) { return entries.some(e => e.type === 'drift_warning'); } /** * Get top N most frequent items from an array, formatted as "name(count)". * @param {string[]} items * @param {number} n * @returns {string} */ function topN(items, n) { const counts = new Map(); for (const item of items) counts.set(item, (counts.get(item) || 0) + 1); return [...counts.entries()] .sort((a, b) => b[1] - a[1]) .slice(0, n) .map(([name, count]) => `${name}(${count})`) .join(', '); } /** * Format the slow-burn trifecta warning message. * @param {number} spread * @returns {string} */ function formatSlowBurnWarning(spread) { return ( 'SECURITY ADVISORY (session-guard): Slow-burn trifecta detected [MEDIUM] — ' + `Rule of Two legs spread over ${spread} calls.\n\n` + 'Within the last 100 tool calls, all 3 capabilities appeared but spread across a wide range:\n' + ' [A] Untrusted external input (prompt injection surface)\n' + ' [B] Sensitive data access\n' + ' [C] Exfiltration-capable tool (state change)\n\n' + 'This pattern may indicate a multi-step prompt injection chain (OpenAI Atlas, Dec 2025).\n' + 'Wide spread across calls makes detection harder with short-window monitoring.' ); } /** * Format the behavioral drift warning message. * @param {number} jsd * @param {string[]} firstTools * @param {string[]} lastTools * @returns {string} */ function formatDriftWarning(jsd, firstTools, lastTools) { return ( 'SECURITY ADVISORY (session-guard): Behavioral drift detected [MEDIUM] — tool usage shift.\n\n' + `Jensen-Shannon divergence: ${jsd.toFixed(3)} (threshold: ${DRIFT_THRESHOLD})\n` + `First ${DRIFT_SAMPLE_SIZE} calls: ${topN(firstTools, 3)}\n` + `Last ${DRIFT_SAMPLE_SIZE} calls: ${topN(lastTools, 3)}\n\n` + 'A significant shift in tool usage patterns may indicate session hijacking or prompt injection\n' + "changing the agent's behavior over time (OpenAI Atlas, Dec 2025)." ); } // --------------------------------------------------------------------------- // CaMeL-inspired data flow tagging (DeepMind CaMeL, v5.0 S6) // --------------------------------------------------------------------------- /** * Compute a short data tag from tool output (first 200 chars, SHA-256 truncated to 16 hex). * Used for lightweight data provenance tracking. * @param {string} text - tool output text * @returns {string} 16-char hex hash */ function computeDataTag(text) { const sample = text.slice(0, 200); return createHash('sha256').update(sample).digest('hex').slice(0, 16); } /** * Extract a string representation of tool input for data flow matching. * @param {object} toolInput * @returns {string} */ function extractInputText(toolInput) { if (!toolInput || typeof toolInput !== 'object') return ''; // Collect all string values from the input object const parts = []; for (const val of Object.values(toolInput)) { if (typeof val === 'string') parts.push(val); else if (typeof val === 'object') parts.push(JSON.stringify(val)); } return parts.join(' '); } /** * Check if the current tool input contains data that matches a previous output's tag. * Matches by checking if the first 200 chars of any previous output hash matches * a stored tag, AND the current input contains a substring from previous output. * For efficiency, uses dataTag hashes and inputSnippet matching. * @param {object[]} entries - recent state entries * @param {string} currentInputText - stringified current tool input * @returns {{ linked: boolean, sourceEntries: object[] }} */ function checkDataFlowLink(entries, currentInputText) { if (!currentInputText || currentInputText.length < 20) { return { linked: false, sourceEntries: [] }; } const sourceEntries = []; // Check if any previous entry's data tag matches content in current input for (const entry of entries) { if (entry.type || !entry.dataTag) continue; // Check if the input text contains a meaningful snippet from the output // We store inputSnippet from previous entries for cross-reference if (entry.outputSnippet && currentInputText.includes(entry.outputSnippet)) { sourceEntries.push(entry); } } return { linked: sourceEntries.length > 0, sourceEntries }; } /** * Check if a data flow warning was already emitted. * @param {object[]} entries * @returns {boolean} */ function hasDataFlowWarning(entries) { return entries.some(e => e.type === 'data_flow_warning'); } /** * Format the data flow linked trifecta warning. * @param {{ input: string[], access: string[], exfil: string[] }} evidence * @param {object[]} sourceEntries * @returns {string} */ function formatDataFlowWarning(evidence, sourceEntries) { const sources = sourceEntries.slice(0, 3).map(e => ` - ${e.tool} → ${e.detail || 'unknown'}` ).join('\n'); return ( 'SECURITY ADVISORY (session-guard): Data flow linked trifecta [HIGH] — ' + 'CaMeL-style provenance tracking detected data flow chain.\n\n' + 'Tool output from an untrusted source appears to flow into subsequent tool inputs,\n' + 'creating a traceable data flow chain across the trifecta:\n' + ` Data flow sources:\n${sources}\n\n` + 'This elevates the trifecta severity: data is not just co-located in the session,\n' + 'but actively flowing between tools in a potential injection chain (DeepMind CaMeL).' ); } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- let input; try { const raw = readFileSync(0, 'utf-8'); input = JSON.parse(raw); } catch { process.exit(0); } const toolName = input?.tool_name ?? ''; const toolInput = input?.tool_input ?? {}; const toolOutput = input?.tool_output ?? ''; if (!toolName) { process.exit(0); } // Off mode: skip all detection if (TRIFECTA_MODE === 'off') { process.exit(0); } // Compute output size for volume tracking const outputText = typeof toolOutput === 'string' ? toolOutput : JSON.stringify(toolOutput); const outputSize = Buffer.byteLength(outputText, 'utf-8'); // Classify the current tool call const { classes, detail } = classifyToolCall(toolName, toolInput); // State file management const stateFile = getStateFilePath(); const isFirstCall = !existsSync(stateFile); // Cleanup old state files on first call per session if (isFirstCall) { cleanupOldStateFiles(); } // Compute data tag for CaMeL-style flow tracking (v5.0 S6) const dataTag = outputText.length >= 20 ? computeDataTag(outputText) : null; // Store a short snippet from output for data flow matching (first 50 non-whitespace chars) const outputSnippet = outputText.length >= 50 ? outputText.trim().slice(0, 50) : null; // Append current entry (with outputSize for volume tracking, dataTag for CaMeL) const entry = { ts: Date.now(), tool: toolName, classes, detail, outputSize, ...(dataTag ? { dataTag } : {}), ...(outputSnippet ? { outputSnippet } : {}), }; appendEntry(stateFile, entry); const messages = []; // --- Trifecta detection (skip for neutral-only and delegation-only calls) --- if (!(classes.length === 1 && (classes[0] === 'neutral' || classes[0] === 'delegation'))) { const window = readLastEntries(stateFile, WINDOW_SIZE); const { detected, evidence } = checkTrifecta(window); if (detected && !hasRecentWarning(window)) { const mcpInfo = checkMcpConcentration(window); const sensitiveExfil = checkSensitiveExfil(window); messages.push(formatWarning(evidence, mcpInfo, sensitiveExfil)); appendEntry(stateFile, { type: 'warning', ts: Date.now() }); writeAuditEvent({ event_type: 'trifecta_warning', severity: mcpInfo.concentrated || sensitiveExfil ? 'critical' : 'high', source: 'post-session-guard', details: { evidence, mcp_concentrated: mcpInfo.concentrated, sensitive_exfil: sensitiveExfil }, owasp: ['ASI01', 'ASI02', 'LLM01'], action_taken: TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil) ? 'blocked' : 'warned', }); // --- Rule of Two: Block mode --- // Block for high-confidence trifecta: MCP-concentrated OR sensitive path + exfil if (TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil)) { process.stderr.write( 'BLOCKED: Rule of Two violation — high-confidence lethal trifecta detected.\n' + (mcpInfo.concentrated ? ` MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n` : ' Sensitive data access combined with exfiltration sink\n') + ' Set LLM_SECURITY_TRIFECTA_MODE=warn to downgrade to advisory.\n' ); process.stdout.write(JSON.stringify({ decision: 'block' })); process.exit(2); } } } // --- Escalation-after-input detection (delegation within 5 calls of input_source) --- if (classes.includes('delegation')) { const window = readLastEntries(stateFile, WINDOW_SIZE); const escalation = checkEscalationAfterInput(window, entry); if (escalation.detected && !hasEscalationWarning(window)) { messages.push(formatEscalationWarning(detail, escalation.inputDetail)); appendEntry(stateFile, { type: 'escalation_warning', ts: Date.now() }); writeAuditEvent({ event_type: 'escalation_after_input', severity: 'medium', source: 'post-session-guard', details: { tool: detail, input_source: escalation.inputDetail }, owasp: ['ASI01'], action_taken: 'warned', }); } } // --- CaMeL data flow check (v5.0 S6) --- // Check if current tool input contains data that flowed from a previous tool output. // If a data flow link is detected AND a trifecta is present, elevate severity. if (!(classes.length === 1 && classes[0] === 'neutral')) { const inputText = extractInputText(toolInput); if (inputText.length >= 20) { const window = readLastEntries(stateFile, WINDOW_SIZE); const flowLink = checkDataFlowLink(window, inputText); if (flowLink.linked && !hasDataFlowWarning(window)) { // Check if a trifecta is also present const { detected, evidence } = checkTrifecta(window); if (detected) { messages.push(formatDataFlowWarning(evidence, flowLink.sourceEntries)); appendEntry(stateFile, { type: 'data_flow_warning', ts: Date.now() }); writeAuditEvent({ event_type: 'data_flow_trifecta', severity: 'high', source: 'post-session-guard', details: { evidence, flow_sources: flowLink.sourceEntries.length }, owasp: ['ASI01', 'ASI02'], action_taken: 'warned', }); } } } } // --- Cumulative volume tracking --- if (outputSize > 0) { const allEntries = readLastEntries(stateFile, 10_000); // read all const totalVolume = computeCumulativeVolume(allEntries); // Check thresholds from highest to lowest — only warn once per threshold for (const { bytes, label, severity } of VOLUME_THRESHOLDS) { if (totalVolume >= bytes && !hasVolumeWarning(allEntries, bytes)) { messages.push(formatVolumeWarning(totalVolume, label, severity)); appendEntry(stateFile, { type: 'volume_warning', ts: Date.now(), threshold: bytes }); writeAuditEvent({ event_type: 'volume_threshold', severity: severity.toLowerCase(), source: 'post-session-guard', details: { total_bytes: totalVolume, threshold: label }, owasp: ['ASI02'], action_taken: 'warned', }); break; // only emit highest unwarned threshold } } } // --- Long-horizon monitoring (100-call window) --- { const longWindow = readLastEntries(stateFile, LONG_HORIZON_WINDOW); // Slow-burn trifecta: all 3 legs spread over >50 calls const slowBurn = checkSlowBurnTrifecta(longWindow); if (slowBurn.detected && !hasSlowBurnWarning(longWindow)) { messages.push(formatSlowBurnWarning(slowBurn.spread)); appendEntry(stateFile, { type: 'slow_burn_warning', ts: Date.now() }); writeAuditEvent({ event_type: 'slow_burn_trifecta', severity: 'medium', source: 'post-session-guard', details: { spread: slowBurn.spread }, owasp: ['ASI06', 'ASI08'], action_taken: 'warned', }); } // Behavioral drift: JSD on tool distribution (first vs last DRIFT_SAMPLE_SIZE) const drift = checkBehavioralDrift(longWindow); if (drift.drifted && !hasDriftWarning(longWindow)) { messages.push(formatDriftWarning(drift.jsd, drift.firstTools, drift.lastTools)); appendEntry(stateFile, { type: 'drift_warning', ts: Date.now() }); writeAuditEvent({ event_type: 'behavioral_drift', severity: 'medium', source: 'post-session-guard', details: { jsd: drift.jsd, first_tools: drift.firstTools, last_tools: drift.lastTools }, owasp: ['ASI06', 'ASI08'], action_taken: 'warned', }); } } // Emit combined advisory if (messages.length > 0) { const combined = messages.join('\n\n---\n\n'); process.stdout.write(JSON.stringify({ systemMessage: combined })); } // Default: advisory only (warn mode) process.exit(0);