ktg-plugin-marketplace/plugins/llm-security/hooks/scripts/post-session-guard.mjs

#!/usr/bin/env node
// Hook: post-session-guard.mjs
// Event: PostToolUse (ALL tools)
// Purpose: Runtime lethal trifecta detection — monitors tool call sequences
//          and warns when untrusted input + sensitive data access + exfiltration
//          sink all appear within a sliding window.
//
// Protocol:
//   - Read JSON from stdin: { tool_name, tool_input, tool_output }
//   - Advisory only: always exit 0. Output systemMessage via stdout to warn.
//   - State persisted in ${os.tmpdir()}/llm-security-session-${ppid}.jsonl
//
// Rule of Two (Meta, Oct 2025):
//   Of 3 capabilities A (untrusted input), B (sensitive data), C (state change/exfil),
//   an agent should NEVER hold all 3 simultaneously. Env var LLM_SECURITY_TRIFECTA_MODE
//   controls enforcement: warn (default), block (exit 2 for high-confidence trifecta), off.
//
// Long-horizon monitoring (OpenAI Atlas, Dec 2025):
//   100-call window alongside 20-call for slow-burn trifecta detection and
//   behavioral drift via Jensen-Shannon divergence on tool distributions.
//
// Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4):
//   Task/Agent tools classified as 'delegation'. Escalation-after-input advisory
//   when delegation occurs within 5 calls of an input_source (untrusted content
//   may be influencing sub-agent spawning decisions).
//
// CaMeL-inspired data flow tagging (DeepMind CaMeL, v5.0 S6):
//   Lightweight data provenance tracking. On tool output: hash first 200 chars as
//   data tag. On next tool input: check substring match against prior tags. Match =
//   "data flow link". Trifecta with linked flows = elevated severity.
//
// Trifecta concept (Willison / Invariant Labs):
//   1. Agent exposed to UNTRUSTED INPUT (prompt injection surface)
//   2. Agent has access to SENSITIVE DATA via tools
//   3. An EXFILTRATION SINK exists (HTTP POST, scp, etc.)
//
// OWASP: ASI01 (Excessive Agency), ASI02 (Data Leakage), LLM01 (Prompt Injection)

import { readFileSync, appendFileSync, existsSync, readdirSync, statSync, unlinkSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { createHash } from 'node:crypto';
import { extractMcpServer } from '../../scanners/lib/mcp-description-cache.mjs';
import { jensenShannonDivergence, buildDistribution } from '../../scanners/lib/distribution-stats.mjs';
import { writeAuditEvent } from '../../scanners/lib/audit-trail.mjs';
import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs';

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

const WINDOW_SIZE = getPolicyValue('trifecta', 'window_size', 20);
const STATE_PREFIX = 'llm-security-session-';
const STATE_DIR = tmpdir();
const CLEANUP_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours

// Long-horizon monitoring (OpenAI Atlas, Dec 2025)
const LONG_HORIZON_WINDOW = getPolicyValue('trifecta', 'long_horizon_window', 100);
const SLOW_BURN_MIN_SPREAD = 50;
const DRIFT_THRESHOLD = 0.25;
const DRIFT_SAMPLE_SIZE = 20;

// Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4)
// E17 (v7.2.0): primary window configurable via LLM_SECURITY_ESCALATION_WINDOW
// (default 5). Secondary 20-call window emits MEDIUM advisory for delegation
// in the [primary, 20]-call range. Both reference an input_source; the
// secondary catches slow-burn variants where the attacker waits past the
// primary window before delegating.
const DELEGATION_ESCALATION_WINDOW = (() => {
  const envVal = parseInt(process.env.LLM_SECURITY_ESCALATION_WINDOW, 10);
  if (Number.isFinite(envVal) && envVal > 0) return envVal;
  return getPolicyValue('trifecta', 'escalation_window', 5);
})();
const DELEGATION_ESCALATION_WINDOW_MEDIUM = 20; // secondary longer-window advisory

// Rule of Two enforcement mode: block | warn | off (env var takes precedence over policy)
const policyTrifectaMode = getPolicyValue('trifecta', 'mode', 'warn');
const TRIFECTA_MODE = (process.env.LLM_SECURITY_TRIFECTA_MODE || policyTrifectaMode).toLowerCase();

// Volume tracking thresholds (cumulative bytes per session)
const VOLUME_THRESHOLDS = [
  { bytes: 1_000_000, label: '1 MB',   severity: 'HIGH' },
  { bytes:   500_000, label: '500 KB', severity: 'MEDIUM' },
  { bytes:   100_000, label: '100 KB', severity: 'LOW' },
];

// ---------------------------------------------------------------------------
// Sensitive path patterns (for data_access classification of Read/Bash)
// ---------------------------------------------------------------------------

const SENSITIVE_PATH_PATTERNS = [
  /\.env(?:\.|$)/i,
  /\.ssh\//i,
  /\.aws\//i,
  /\.gnupg\//i,
  /credentials/i,
  /secrets?[./]/i,
  /tokens?[./]/i,
  /password/i,
  /keychain/i,
  /\.npmrc/i,
  /\.pypirc/i,
  /id_rsa/i,
  /id_ed25519/i,
  /authorized_keys/i,
  /\.netrc/i,
  /\.pgpass/i,
];

// ---------------------------------------------------------------------------
// Bash command patterns
// ---------------------------------------------------------------------------

const BASH_EXFIL_PATTERNS = [
  /\bcurl\b[^|]*(?:-X\s*(?:POST|PUT|PATCH)\b|-d\s|--data\b|--data-\w+\b|-F\s|--form\b)/i,
  /\bwget\b[^|]*--post/i,
  /\bnc\s+(?:-[a-zA-Z]*\s+)*\S+\s+\d/i,   // nc host port
  /\bsendmail\b/i,
  /\bscp\s/i,
  /\brsync\b[^|]*[^/]\S+:/i,               // rsync to remote (user@host:)
  /\bgit\s+push\b/i,
  /\bsftp\b/i,
];

const BASH_INPUT_PATTERNS = [
  /\bcurl\b/i,     // curl without POST indicators = downloading
  /\bwget\b/i,     // wget without --post = downloading
];

const BASH_DATA_CMD_PATTERNS = [
  /\b(?:cat|head|tail|less|more|bat)\s/i,
];

// ---------------------------------------------------------------------------
// Classification
// ---------------------------------------------------------------------------

/**
 * Classify a tool call into trifecta leg(s).
 * @param {string} toolName
 * @param {object} toolInput
 * @returns {{ classes: string[], detail: string }}
 */
function classifyToolCall(toolName, toolInput) {
  // --- WebFetch / WebSearch: always input_source ---
  if (toolName === 'WebFetch' || toolName === 'WebSearch') {
    const target = toolInput?.url || toolInput?.query || '';
    return { classes: ['input_source'], detail: target.slice(0, 80) };
  }

  // --- MCP tools: untrusted external input ---
  if (toolName?.startsWith('mcp__')) {
    return { classes: ['input_source'], detail: toolName };
  }

  // --- Task / Agent: delegation (DeepMind Agent Traps kat. 4, v5.0 S4) ---
  if (toolName === 'Task' || toolName === 'Agent') {
    const desc = toolInput?.description || toolInput?.prompt || '';
    return { classes: ['delegation'], detail: desc.slice(0, 80) };
  }

  // --- Read: data_access (sensitive path = stronger signal, but all reads count) ---
  if (toolName === 'Read') {
    const filePath = toolInput?.file_path || '';
    const isSensitive = SENSITIVE_PATH_PATTERNS.some(p => p.test(filePath));
    return {
      classes: ['data_access'],
      detail: `${isSensitive ? '[SENSITIVE] ' : ''}${filePath.slice(-60)}`,
    };
  }

  // --- Grep / Glob: data_access ---
  if (toolName === 'Grep' || toolName === 'Glob') {
    const target = toolInput?.pattern || toolInput?.path || '';
    return { classes: ['data_access'], detail: target.slice(0, 60) };
  }

  // --- Bash: can be multiple classes depending on command ---
  if (toolName === 'Bash') {
    return classifyBashCommand(toolInput?.command || '');
  }

  // --- Everything else: neutral ---
  return { classes: ['neutral'], detail: '' };
}

/**
 * Classify a Bash command. Can return multiple classes.
 * @param {string} command
 * @returns {{ classes: string[], detail: string }}
 */
function classifyBashCommand(command) {
  const classes = [];
  const detail = command.slice(0, 80);

  // Check exfil first (highest priority)
  if (BASH_EXFIL_PATTERNS.some(p => p.test(command))) {
    classes.push('exfil_sink');
  }

  // Check data access: command reads files AND path looks sensitive
  if (BASH_DATA_CMD_PATTERNS.some(p => p.test(command))) {
    if (SENSITIVE_PATH_PATTERNS.some(p => p.test(command))) {
      classes.push('data_access');
    }
  }

  // Check input source: curl/wget without POST = downloading content
  // Only add if not already classified as exfil (avoid double-counting curl POST)
  if (!classes.includes('exfil_sink') && BASH_INPUT_PATTERNS.some(p => p.test(command))) {
    classes.push('input_source');
  }

  if (classes.length === 0) {
    classes.push('neutral');
  }

  return { classes, detail };
}

// ---------------------------------------------------------------------------
// State management
// ---------------------------------------------------------------------------

/**
 * Get the state file path for this session.
 * @returns {string}
 */
function getStateFilePath() {
  return join(STATE_DIR, `${STATE_PREFIX}${process.ppid}.jsonl`);
}

/**
 * Append a tool call entry to the state file.
 * @param {string} stateFile
 * @param {object} entry
 */
function appendEntry(stateFile, entry) {
  appendFileSync(stateFile, JSON.stringify(entry) + '\n', 'utf-8');
}

/**
 * Read the last N entries from the state file.
 * @param {string} stateFile
 * @param {number} n
 * @returns {object[]}
 */
function readLastEntries(stateFile, n) {
  if (!existsSync(stateFile)) return [];

  try {
    const content = readFileSync(stateFile, 'utf-8');
    const lines = content.trim().split('\n').filter(Boolean);
    const tail = lines.slice(-n);
    const entries = [];
    for (const line of tail) {
      try { entries.push(JSON.parse(line)); } catch { /* skip malformed */ }
    }
    return entries;
  } catch {
    return [];
  }
}

/**
 * Clean up state files older than CLEANUP_MAX_AGE_MS.
 * Only called on first invocation per session (when state file doesn't exist yet).
 */
function cleanupOldStateFiles() {
  try {
    const now = Date.now();
    const files = readdirSync(STATE_DIR);
    for (const file of files) {
      if (!file.startsWith(STATE_PREFIX) || !file.endsWith('.jsonl')) continue;
      const fullPath = join(STATE_DIR, file);
      try {
        const stat = statSync(fullPath);
        if (now - stat.mtimeMs > CLEANUP_MAX_AGE_MS) {
          unlinkSync(fullPath);
        }
      } catch { /* ignore per-file errors */ }
    }
  } catch { /* ignore cleanup errors entirely */ }
}

// ---------------------------------------------------------------------------
// Trifecta detection
// ---------------------------------------------------------------------------

/**
 * Check if all 3 trifecta legs are present in the window.
 * @param {object[]} entries
 * @returns {{ detected: boolean, evidence: { input: string[], access: string[], exfil: string[] } }}
 */
function checkTrifecta(entries) {
  const evidence = { input: [], access: [], exfil: [] };

  for (const entry of entries) {
    if (entry.type === 'warning') continue; // skip warning markers
    const classes = entry.classes || [];
    for (const cls of classes) {
      if (cls === 'input_source') evidence.input.push(entry.detail || entry.tool);
      if (cls === 'data_access') evidence.access.push(entry.detail || entry.tool);
      if (cls === 'exfil_sink') evidence.exfil.push(entry.detail || entry.tool);
    }
  }

  return {
    detected: evidence.input.length > 0 && evidence.access.length > 0 && evidence.exfil.length > 0,
    evidence,
  };
}

/**
 * Check if a warning was already emitted in the current window.
 * @param {object[]} entries
 * @returns {boolean}
 */
function hasRecentWarning(entries) {
  return entries.some(e => e.type === 'warning');
}

/**
 * Check if the trifecta is MCP-concentrated: all 3 legs originate from tools
 * on the same MCP server. This is a stronger signal — a single compromised
 * server providing input, accessing data, AND exfiltrating.
 * @param {object[]} entries
 * @returns {{ concentrated: boolean, server: string|null }}
 */
function checkMcpConcentration(entries) {
  // Collect MCP servers per trifecta leg
  const serversByLeg = { input: new Set(), access: new Set(), exfil: new Set() };

  for (const entry of entries) {
    if (entry.type === 'warning') continue;
    const server = extractMcpServer(entry.tool);
    if (!server) continue;

    const classes = entry.classes || [];
    for (const cls of classes) {
      if (cls === 'input_source') serversByLeg.input.add(server);
      if (cls === 'data_access') serversByLeg.access.add(server);
      if (cls === 'exfil_sink') serversByLeg.exfil.add(server);
    }
  }

  // Find a server present in all 3 legs
  for (const server of serversByLeg.input) {
    if (serversByLeg.access.has(server) && serversByLeg.exfil.has(server)) {
      return { concentrated: true, server };
    }
  }
  return { concentrated: false, server: null };
}

/**
 * Check if the trifecta involves sensitive path access + exfiltration.
 * This is a high-confidence signal: data from .env/.ssh/.aws etc. being sent out.
 * @param {object[]} entries
 * @returns {boolean}
 */
function checkSensitiveExfil(entries) {
  let hasSensitiveAccess = false;
  let hasExfil = false;

  for (const entry of entries) {
    if (entry.type === 'warning') continue;
    const classes = entry.classes || [];
    const detail = entry.detail || '';

    if (classes.includes('data_access') && detail.startsWith('[SENSITIVE]')) {
      hasSensitiveAccess = true;
    }
    if (classes.includes('exfil_sink')) {
      hasExfil = true;
    }
  }

  return hasSensitiveAccess && hasExfil;
}

/**
 * Compute cumulative data volume from entries with outputSize.
 * @param {object[]} allEntries - All entries (not just window)
 * @returns {number} Total bytes
 */
function computeCumulativeVolume(allEntries) {
  let total = 0;
  for (const entry of allEntries) {
    if (entry.type === 'warning' || entry.type === 'volume_warning') continue;
    total += entry.outputSize || 0;
  }
  return total;
}

/**
 * Check if a volume warning at a given threshold was already emitted.
 * @param {object[]} entries
 * @param {number} thresholdBytes
 * @returns {boolean}
 */
function hasVolumeWarning(entries, thresholdBytes) {
  return entries.some(e => e.type === 'volume_warning' && e.threshold === thresholdBytes);
}

/**
 * Format the volume warning message.
 * @param {number} totalBytes
 * @param {string} thresholdLabel
 * @param {string} severity
 * @returns {string}
 */
function formatVolumeWarning(totalBytes, thresholdLabel, severity) {
  const kb = Math.round(totalBytes / 1024);
  return (
    `SECURITY ADVISORY (session-guard): Cumulative MCP data volume exceeded ${thresholdLabel} [${severity}].\n\n` +
    `This session has received ~${kb} KB of tool output data.\n` +
    'High cumulative volume may indicate bulk data harvesting or exfiltration staging (OWASP ASI02).\n' +
    'Review whether the volume of data being processed is proportional to the task.'
  );
}

/**
 * Format the trifecta warning message.
 * Uses Rule of Two terminology (Meta, Oct 2025): A=untrusted input, B=sensitive data, C=state change/exfil.
 * @param {{ input: string[], access: string[], exfil: string[] }} evidence
 * @param {{ concentrated: boolean, server: string|null }} [mcpInfo]
 * @param {boolean} [isSensitiveExfil]
 * @returns {string}
 */
function formatWarning(evidence, mcpInfo, isSensitiveExfil) {
  const inputEx = evidence.input.slice(-2).map(e => `    - ${e}`).join('\n');
  const accessEx = evidence.access.slice(-2).map(e => `    - ${e}`).join('\n');
  const exfilEx = evidence.exfil.slice(-2).map(e => `    - ${e}`).join('\n');

  const mcpLine = mcpInfo?.concentrated
    ? `\nRULE OF TWO VIOLATION: MCP-CONCENTRATED — All 3 legs trace to server "${mcpInfo.server}" (elevated severity).\n`
    : '';

  const sensitiveLine = isSensitiveExfil
    ? '\nRULE OF TWO VIOLATION: SENSITIVE DATA + EXFILTRATION — Sensitive paths accessed and exfil sink present.\n'
    : '';

  return (
    'SECURITY ADVISORY (session-guard): Rule of Two violation — potential lethal trifecta detected.\n\n' +
    'Within the last 20 tool calls, this session holds all 3 capabilities simultaneously:\n' +
    '  [A] Untrusted external input (prompt injection surface):\n' + inputEx + '\n' +
    '  [B] Sensitive data access:\n' + accessEx + '\n' +
    '  [C] Exfiltration-capable tool (state change):\n' + exfilEx + '\n' +
    mcpLine + sensitiveLine + '\n' +
    'Rule of Two (Meta, Oct 2025): An agent should never hold A+B+C simultaneously.\n' +
    'This combination enables prompt injection -> data theft chains (OWASP ASI01, ASI02, LLM01).\n' +
    'Review recent tool calls for unexpected behavior.'
  );
}

// ---------------------------------------------------------------------------
// Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4)
// ---------------------------------------------------------------------------

/**
 * Check for escalation-after-input: delegation within DELEGATION_ESCALATION_WINDOW
 * calls of an input_source. Untrusted content consumed shortly before spawning a
 * sub-agent may indicate the model is being manipulated into delegating dangerous work.
 *
 * E17 (v7.2.0): returns a `tier` indicating which window matched.
 *   - `'primary'` — input within DELEGATION_ESCALATION_WINDOW calls (default 5).
 *     Existing MEDIUM advisory.
 *   - `'secondary'` — input within DELEGATION_ESCALATION_WINDOW_MEDIUM calls
 *     (20) but outside the primary window. New, slow-burn variant —
 *     also MEDIUM but with a different message.
 *   - `null` (when detected=false) — no input source within either window.
 *
 * @param {object[]} entries — recent window (long-horizon, 100-call)
 * @param {{ classes: string[] }} currentEntry — the entry just appended
 * @returns {{ detected: boolean, inputDetail: string, tier: 'primary'|'secondary'|null }}
 */
function checkEscalationAfterInput(entries, currentEntry) {
  if (!currentEntry.classes.includes('delegation')) {
    return { detected: false, inputDetail: '', tier: null };
  }

  const toolEntries = entries.filter(e => !e.type);
  // Look at the last DELEGATION_ESCALATION_WINDOW_MEDIUM entries before
  // current (excluding current). Iterate from newest (closest to delegation)
  // to oldest, so we report tier=primary if a match is in the inner window.
  const limit = DELEGATION_ESCALATION_WINDOW_MEDIUM;
  const slice = toolEntries.slice(-(limit + 1), -1); // exclude current
  // Walk newest-to-oldest. Index from the end: distance 1 = most recent.
  for (let i = slice.length - 1; i >= 0; i--) {
    const entry = slice[i];
    if ((entry.classes || []).includes('input_source')) {
      // distance: how many tool calls between input_source and current
      // delegation. distance=1 means input is directly before delegation.
      const distance = slice.length - i;
      const tier = distance <= DELEGATION_ESCALATION_WINDOW ? 'primary' : 'secondary';
      return {
        detected: true,
        inputDetail: entry.detail || entry.tool || 'unknown',
        tier,
      };
    }
  }
  return { detected: false, inputDetail: '', tier: null };
}

/**
 * Check if an escalation-after-input warning was already emitted.
 * @param {object[]} entries
 * @returns {boolean}
 */
function hasEscalationWarning(entries) {
  return entries.some(e => e.type === 'escalation_warning');
}

/**
 * Format the escalation-after-input warning.
 * @param {string} delegationDetail — what the delegation was for
 * @param {string} inputDetail — what input source preceded it
 * @param {'primary'|'secondary'} tier — which window matched (E17, v7.2.0)
 * @returns {string}
 */
function formatEscalationWarning(delegationDetail, inputDetail, tier = 'primary') {
  if (tier === 'secondary') {
    return (
      'SECURITY ADVISORY (session-guard): Slow-burn escalation-after-input detected [MEDIUM] — ' +
      'sub-agent delegation in the slow-burn window after untrusted input.\n\n' +
      `A Task/Agent delegation occurred within ${DELEGATION_ESCALATION_WINDOW_MEDIUM} calls (` +
      `but outside the ${DELEGATION_ESCALATION_WINDOW}-call primary window) of untrusted input:\n` +
      `  Input source: ${inputDetail}\n` +
      `  Delegation: ${delegationDetail}\n\n` +
      'This is a slower variant of the escalation-after-input pattern. The wider window\n' +
      'catches attackers who deliberately wait past the primary window before delegating,\n' +
      'and surfaces patterns that the primary 5-call window cannot. Review whether this\n' +
      'delegation is expected and appropriately scoped.\n' +
      'Configure window via LLM_SECURITY_ESCALATION_WINDOW env var (default 5).'
    );
  }
  return (
    'SECURITY ADVISORY (session-guard): Escalation-after-input detected [MEDIUM] — ' +
    'sub-agent delegation shortly after untrusted input.\n\n' +
    `A Task/Agent delegation occurred within ${DELEGATION_ESCALATION_WINDOW} calls of untrusted input:\n` +
    `  Input source: ${inputDetail}\n` +
    `  Delegation: ${delegationDetail}\n\n` +
    'Untrusted content (web pages, MCP tool output) may be influencing the model\n' +
    'to spawn sub-agents with capabilities beyond the original task scope.\n' +
    'This is a known attack vector (DeepMind AI Agent Traps, Category 4).\n' +
    'Review whether this delegation is expected and appropriately scoped.\n' +
    'Configure window via LLM_SECURITY_ESCALATION_WINDOW env var (default 5).'
  );
}

// ---------------------------------------------------------------------------
// Long-horizon monitoring (100-call window) — OpenAI Atlas, Dec 2025
// ---------------------------------------------------------------------------

/**
 * Filter entries to only tool calls (exclude warning/marker entries).
 * @param {object[]} entries
 * @returns {object[]}
 */
function filterToolEntries(entries) {
  return entries.filter(e => !e.type);
}

/**
 * Check for slow-burn trifecta: all 3 legs present but spread over >50 calls.
 * Catches multi-step injection chains that pace actions to avoid short-window detection.
 * @param {object[]} entries - Long-horizon window entries
 * @returns {{ detected: boolean, spread: number }}
 */
function checkSlowBurnTrifecta(entries) {
  const toolEntries = filterToolEntries(entries);
  let firstInput = -1, firstAccess = -1, firstExfil = -1;
  let lastInput = -1, lastAccess = -1, lastExfil = -1;

  for (let i = 0; i < toolEntries.length; i++) {
    for (const cls of toolEntries[i].classes || []) {
      if (cls === 'input_source') {
        if (firstInput === -1) firstInput = i;
        lastInput = i;
      }
      if (cls === 'data_access') {
        if (firstAccess === -1) firstAccess = i;
        lastAccess = i;
      }
      if (cls === 'exfil_sink') {
        if (firstExfil === -1) firstExfil = i;
        lastExfil = i;
      }
    }
  }

  if (firstInput === -1 || firstAccess === -1 || firstExfil === -1) {
    return { detected: false, spread: 0 };
  }

  const earliestFirst = Math.min(firstInput, firstAccess, firstExfil);
  const latestLast = Math.max(lastInput, lastAccess, lastExfil);
  const spread = latestLast - earliestFirst;

  return { detected: spread > SLOW_BURN_MIN_SPREAD, spread };
}

/**
 * @param {object[]} entries
 * @returns {boolean}
 */
function hasSlowBurnWarning(entries) {
  return entries.some(e => e.type === 'slow_burn_warning');
}

/**
 * Detect behavioral drift: tool distribution shift in first vs last DRIFT_SAMPLE_SIZE calls.
 * @param {object[]} entries
 * @returns {{ drifted: boolean, jsd: number, firstTools: string[], lastTools: string[] }}
 */
function checkBehavioralDrift(entries) {
  const toolEntries = filterToolEntries(entries);
  if (toolEntries.length < 2 * DRIFT_SAMPLE_SIZE) {
    return { drifted: false, jsd: 0, firstTools: [], lastTools: [] };
  }

  const firstTools = toolEntries.slice(0, DRIFT_SAMPLE_SIZE).map(e => e.tool);
  const lastTools = toolEntries.slice(-DRIFT_SAMPLE_SIZE).map(e => e.tool);
  const P = buildDistribution(firstTools);
  const Q = buildDistribution(lastTools);
  const jsd = jensenShannonDivergence(P, Q);

  return { drifted: jsd > DRIFT_THRESHOLD, jsd, firstTools, lastTools };
}

/**
 * @param {object[]} entries
 * @returns {boolean}
 */
function hasDriftWarning(entries) {
  return entries.some(e => e.type === 'drift_warning');
}

/**
 * Get top N most frequent items from an array, formatted as "name(count)".
 * @param {string[]} items
 * @param {number} n
 * @returns {string}
 */
function topN(items, n) {
  const counts = new Map();
  for (const item of items) counts.set(item, (counts.get(item) || 0) + 1);
  return [...counts.entries()]
    .sort((a, b) => b[1] - a[1])
    .slice(0, n)
    .map(([name, count]) => `${name}(${count})`)
    .join(', ');
}

/**
 * Format the slow-burn trifecta warning message.
 * @param {number} spread
 * @returns {string}
 */
function formatSlowBurnWarning(spread) {
  return (
    'SECURITY ADVISORY (session-guard): Slow-burn trifecta detected [MEDIUM] — ' +
    `Rule of Two legs spread over ${spread} calls.\n\n` +
    'Within the last 100 tool calls, all 3 capabilities appeared but spread across a wide range:\n' +
    '  [A] Untrusted external input (prompt injection surface)\n' +
    '  [B] Sensitive data access\n' +
    '  [C] Exfiltration-capable tool (state change)\n\n' +
    'This pattern may indicate a multi-step prompt injection chain (OpenAI Atlas, Dec 2025).\n' +
    'Wide spread across calls makes detection harder with short-window monitoring.'
  );
}

/**
 * Format the behavioral drift warning message.
 * @param {number} jsd
 * @param {string[]} firstTools
 * @param {string[]} lastTools
 * @returns {string}
 */
function formatDriftWarning(jsd, firstTools, lastTools) {
  return (
    'SECURITY ADVISORY (session-guard): Behavioral drift detected [MEDIUM] — tool usage shift.\n\n' +
    `Jensen-Shannon divergence: ${jsd.toFixed(3)} (threshold: ${DRIFT_THRESHOLD})\n` +
    `First ${DRIFT_SAMPLE_SIZE} calls: ${topN(firstTools, 3)}\n` +
    `Last ${DRIFT_SAMPLE_SIZE} calls: ${topN(lastTools, 3)}\n\n` +
    'A significant shift in tool usage patterns may indicate session hijacking or prompt injection\n' +
    "changing the agent's behavior over time (OpenAI Atlas, Dec 2025)."
  );
}

// ---------------------------------------------------------------------------
// Output fingerprint matching (inspired by CaMeL, DeepMind 2025; v5.0 S6)
//
// NOTE: This is opportunistic byte-matching of truncated output fingerprints,
// not semantic data-flow tracking. We hash the first 200 bytes of tool output
// (SHA-256, truncated to 16 hex chars) and check whether that exact tag
// appears verbatim in the next tool input. Trivially bypassed by:
//   - Mutating any of the first 200 bytes
//   - Summarising the output before passing it on
//   - Re-encoding (base64, JSON-escape, whitespace changes)
// Inspired by CaMeL but NOT a CaMeL capability-tracking implementation.
// ---------------------------------------------------------------------------

/**
 * Compute a short output fingerprint from tool output (first 200 chars,
 * SHA-256 truncated to 16 hex). Used for opportunistic byte-matching, not
 * semantic provenance.
 * @param {string} text - tool output text
 * @returns {string} 16-char hex hash
 */
function computeDataTag(text) {
  const sample = text.slice(0, 200);
  return createHash('sha256').update(sample).digest('hex').slice(0, 16);
}

/**
 * Extract a string representation of tool input for data flow matching.
 * @param {object} toolInput
 * @returns {string}
 */
function extractInputText(toolInput) {
  if (!toolInput || typeof toolInput !== 'object') return '';
  // Collect all string values from the input object
  const parts = [];
  for (const val of Object.values(toolInput)) {
    if (typeof val === 'string') parts.push(val);
    else if (typeof val === 'object') parts.push(JSON.stringify(val));
  }
  return parts.join(' ');
}

/**
 * Check if the current tool input contains data that matches a previous output's tag.
 * Matches by checking if the first 200 chars of any previous output hash matches
 * a stored tag, AND the current input contains a substring from previous output.
 * For efficiency, uses dataTag hashes and inputSnippet matching.
 * @param {object[]} entries - recent state entries
 * @param {string} currentInputText - stringified current tool input
 * @returns {{ linked: boolean, sourceEntries: object[] }}
 */
function checkDataFlowLink(entries, currentInputText) {
  if (!currentInputText || currentInputText.length < 20) {
    return { linked: false, sourceEntries: [] };
  }

  const sourceEntries = [];
  // Check if any previous entry's data tag matches content in current input
  for (const entry of entries) {
    if (entry.type || !entry.dataTag) continue;
    // Check if the input text contains a meaningful snippet from the output
    // We store inputSnippet from previous entries for cross-reference
    if (entry.outputSnippet && currentInputText.includes(entry.outputSnippet)) {
      sourceEntries.push(entry);
    }
  }
  return { linked: sourceEntries.length > 0, sourceEntries };
}

/**
 * Check if a data flow warning was already emitted.
 * @param {object[]} entries
 * @returns {boolean}
 */
function hasDataFlowWarning(entries) {
  return entries.some(e => e.type === 'data_flow_warning');
}

/**
 * Format the data flow linked trifecta warning.
 * @param {{ input: string[], access: string[], exfil: string[] }} evidence
 * @param {object[]} sourceEntries
 * @returns {string}
 */
function formatDataFlowWarning(evidence, sourceEntries) {
  const sources = sourceEntries.slice(0, 3).map(e =>
    `    - ${e.tool} → ${e.detail || 'unknown'}`
  ).join('\n');
  return (
    'SECURITY ADVISORY (session-guard): Data flow linked trifecta [HIGH] — ' +
    'CaMeL-style provenance tracking detected data flow chain.\n\n' +
    'Tool output from an untrusted source appears to flow into subsequent tool inputs,\n' +
    'creating a traceable data flow chain across the trifecta:\n' +
    `  Data flow sources:\n${sources}\n\n` +
    'This elevates the trifecta severity: data is not just co-located in the session,\n' +
    'but actively flowing between tools in a potential injection chain (DeepMind CaMeL).'
  );
}

// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------

let input;
try {
  const raw = readFileSync(0, 'utf-8');
  input = JSON.parse(raw);
} catch {
  process.exit(0);
}

const toolName   = input?.tool_name   ?? '';
const toolInput  = input?.tool_input  ?? {};
const toolOutput = input?.tool_output ?? '';

if (!toolName) {
  process.exit(0);
}

// Off mode: skip all detection
if (TRIFECTA_MODE === 'off') {
  process.exit(0);
}

// Compute output size for volume tracking
const outputText = typeof toolOutput === 'string' ? toolOutput : JSON.stringify(toolOutput);
const outputSize = Buffer.byteLength(outputText, 'utf-8');

// Classify the current tool call
const { classes, detail } = classifyToolCall(toolName, toolInput);

// State file management
const stateFile = getStateFilePath();
const isFirstCall = !existsSync(stateFile);

// Cleanup old state files on first call per session
if (isFirstCall) {
  cleanupOldStateFiles();
}

// Compute data tag for CaMeL-style flow tracking (v5.0 S6)
const dataTag = outputText.length >= 20 ? computeDataTag(outputText) : null;
// Store a short snippet from output for data flow matching (first 50 non-whitespace chars)
const outputSnippet = outputText.length >= 50
  ? outputText.trim().slice(0, 50)
  : null;

// Append current entry (with outputSize for volume tracking, dataTag for CaMeL)
const entry = {
  ts: Date.now(),
  tool: toolName,
  classes,
  detail,
  outputSize,
  ...(dataTag ? { dataTag } : {}),
  ...(outputSnippet ? { outputSnippet } : {}),
};
appendEntry(stateFile, entry);

const messages = [];

// --- Trifecta detection (skip for neutral-only and delegation-only calls) ---
if (!(classes.length === 1 && (classes[0] === 'neutral' || classes[0] === 'delegation'))) {
  const window = readLastEntries(stateFile, WINDOW_SIZE);
  const { detected, evidence } = checkTrifecta(window);

  if (detected && !hasRecentWarning(window)) {
    const mcpInfo = checkMcpConcentration(window);
    const sensitiveExfil = checkSensitiveExfil(window);
    messages.push(formatWarning(evidence, mcpInfo, sensitiveExfil));
    appendEntry(stateFile, { type: 'warning', ts: Date.now() });
    writeAuditEvent({
      event_type: 'trifecta_warning',
      severity: mcpInfo.concentrated || sensitiveExfil ? 'critical' : 'high',
      source: 'post-session-guard',
      details: { evidence, mcp_concentrated: mcpInfo.concentrated, sensitive_exfil: sensitiveExfil },
      owasp: ['ASI01', 'ASI02', 'LLM01'],
      action_taken: TRIFECTA_MODE === 'block' ? 'blocked' : 'warned',
    });

    // --- Rule of Two: Block mode ---
    // v7.1.0 B2 fix: block mode blocks on any detected trifecta, not only
    // MCP-concentrated or sensitive-path cases. Distributed trifectas
    // (different sources, non-sensitive path, non-sensitive sink) were
    // previously only warned — a mismatch with the documented semantics
    // of block mode. The severity gate below (critical vs high) remains:
    // distributed trifectas are blocked with high-severity framing; MCP-
    // concentrated and sensitive-exfil cases are blocked with critical-
    // severity framing.
    if (TRIFECTA_MODE === 'block') {
      let context;
      if (mcpInfo.concentrated) {
        context = `  MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n`;
      } else if (sensitiveExfil) {
        context = '  Sensitive data access combined with exfiltration sink\n';
      } else {
        context = '  Distributed trifecta: three legs from different sources\n';
      }
      process.stderr.write(
        'BLOCKED: Rule of Two violation — lethal trifecta detected.\n' +
        context +
        '  Set LLM_SECURITY_TRIFECTA_MODE=warn to downgrade to advisory.\n'
      );
      process.stdout.write(JSON.stringify({ decision: 'block' }));
      process.exit(2);
    }
  }
}

// --- Escalation-after-input detection (E17 v7.2.0: primary + secondary window) ---
// Primary window: DELEGATION_ESCALATION_WINDOW (default 5, env-configurable).
// Secondary window: DELEGATION_ESCALATION_WINDOW_MEDIUM (20). Slow-burn variant
// emits MEDIUM advisory with a different message. Read enough entries to cover
// the secondary window.
if (classes.includes('delegation')) {
  const escalationWindow = readLastEntries(stateFile, Math.max(WINDOW_SIZE, DELEGATION_ESCALATION_WINDOW_MEDIUM + 5));
  const escalation = checkEscalationAfterInput(escalationWindow, entry);
  if (escalation.detected && !hasEscalationWarning(escalationWindow)) {
    messages.push(formatEscalationWarning(detail, escalation.inputDetail, escalation.tier));
    appendEntry(stateFile, { type: 'escalation_warning', ts: Date.now(), tier: escalation.tier });
    writeAuditEvent({
      event_type: 'escalation_after_input',
      severity: 'medium',
      source: 'post-session-guard',
      details: { tool: detail, input_source: escalation.inputDetail, tier: escalation.tier },
      owasp: ['ASI01'],
      action_taken: 'warned',
    });
  }
}

// --- CaMeL data flow check (v5.0 S6) ---
// Check if current tool input contains data that flowed from a previous tool output.
// If a data flow link is detected AND a trifecta is present, elevate severity.
if (!(classes.length === 1 && classes[0] === 'neutral')) {
  const inputText = extractInputText(toolInput);
  if (inputText.length >= 20) {
    const window = readLastEntries(stateFile, WINDOW_SIZE);
    const flowLink = checkDataFlowLink(window, inputText);
    if (flowLink.linked && !hasDataFlowWarning(window)) {
      // Check if a trifecta is also present
      const { detected, evidence } = checkTrifecta(window);
      if (detected) {
        messages.push(formatDataFlowWarning(evidence, flowLink.sourceEntries));
        appendEntry(stateFile, { type: 'data_flow_warning', ts: Date.now() });
        writeAuditEvent({
          event_type: 'data_flow_trifecta',
          severity: 'high',
          source: 'post-session-guard',
          details: { evidence, flow_sources: flowLink.sourceEntries.length },
          owasp: ['ASI01', 'ASI02'],
          action_taken: 'warned',
        });
      }
    }
  }
}

// --- Cumulative volume tracking ---
if (outputSize > 0) {
  const allEntries = readLastEntries(stateFile, 10_000); // read all
  const totalVolume = computeCumulativeVolume(allEntries);

  // Check thresholds from highest to lowest — only warn once per threshold
  for (const { bytes, label, severity } of VOLUME_THRESHOLDS) {
    if (totalVolume >= bytes && !hasVolumeWarning(allEntries, bytes)) {
      messages.push(formatVolumeWarning(totalVolume, label, severity));
      appendEntry(stateFile, { type: 'volume_warning', ts: Date.now(), threshold: bytes });
      writeAuditEvent({
        event_type: 'volume_threshold',
        severity: severity.toLowerCase(),
        source: 'post-session-guard',
        details: { total_bytes: totalVolume, threshold: label },
        owasp: ['ASI02'],
        action_taken: 'warned',
      });
      break; // only emit highest unwarned threshold
    }
  }
}

// --- Long-horizon monitoring (100-call window) ---
{
  const longWindow = readLastEntries(stateFile, LONG_HORIZON_WINDOW);

  // Slow-burn trifecta: all 3 legs spread over >50 calls
  const slowBurn = checkSlowBurnTrifecta(longWindow);
  if (slowBurn.detected && !hasSlowBurnWarning(longWindow)) {
    messages.push(formatSlowBurnWarning(slowBurn.spread));
    appendEntry(stateFile, { type: 'slow_burn_warning', ts: Date.now() });
    writeAuditEvent({
      event_type: 'slow_burn_trifecta',
      severity: 'medium',
      source: 'post-session-guard',
      details: { spread: slowBurn.spread },
      owasp: ['ASI06', 'ASI08'],
      action_taken: 'warned',
    });
  }

  // Behavioral drift: JSD on tool distribution (first vs last DRIFT_SAMPLE_SIZE)
  const drift = checkBehavioralDrift(longWindow);
  if (drift.drifted && !hasDriftWarning(longWindow)) {
    messages.push(formatDriftWarning(drift.jsd, drift.firstTools, drift.lastTools));
    appendEntry(stateFile, { type: 'drift_warning', ts: Date.now() });
    writeAuditEvent({
      event_type: 'behavioral_drift',
      severity: 'medium',
      source: 'post-session-guard',
      details: { jsd: drift.jsd, first_tools: drift.firstTools, last_tools: drift.lastTools },
      owasp: ['ASI06', 'ASI08'],
      action_taken: 'warned',
    });
  }
}

// Emit combined advisory
if (messages.length > 0) {
  const combined = messages.join('\n\n---\n\n');
  process.stdout.write(JSON.stringify({ systemMessage: combined }));
}

// Default: advisory only (warn mode)
process.exit(0);