feat(post-session-guard): E17 — configurable escalation window + 20-call MEDIUM advisory

Critical-review §4 E17 finding: pre-v7.2.0 the delegation-after-input advisory fired only within a 5-call window. Attackers who deliberately waited 6+ calls before delegating bypassed detection. Window was also hardcoded — operators couldn't tune it for their environment. Two coordinated changes: 1. LLM_SECURITY_ESCALATION_WINDOW env var (primary window override) - parseInt(env) || getPolicyValue('trifecta', 'escalation_window', 5) - Mirrors the established pattern from LLM_SECURITY_TRIFECTA_MODE et al. - Setting env=3 narrows; env=8 expands. 2. Secondary 20-call MEDIUM advisory (slow-burn variant) - DELEGATION_ESCALATION_WINDOW_MEDIUM = 20 (hardcoded — same value for all operators; tunable in a future patch if needed) - checkEscalationAfterInput now returns `tier: 'primary'|'secondary'|null` - formatEscalationWarning emits a different message for secondary — mentions "slow-burn", references env-var, distinct from the primary "DeepMind Category 4" framing Hook reads max(WINDOW_SIZE, secondary+5) entries to cover the wider window. Existing duplicate-suppression (`escalation_warning` state entry) covers both tiers. Audit-trail event captures `tier` field. Tests: +5 cases in tests/hooks/post-session-guard.test.mjs: - secondary window catches 9-call distance (slow-burn) - secondary boundary at exactly 20 calls - primary regression guard (1-call distance) - env=3 narrows primary (4-call distance becomes secondary) - env=8 expands primary (7-call distance stays primary) Updated existing test "does NOT trigger when input_source is >5 calls ago" — now requires >20 calls (secondary window catches 6-20). Suite: 1644 → 1672 (+28 from new tests + extended scope). All green. CLAUDE.md hooks table updated to document both windows and the env var.
2026-04-29 14:26:18 +02:00 · 2026-04-29 14:26:18 +02:00 · f0a1d4024a
commit f0a1d4024a
parent ec4ae268da
3 changed files with 215 additions and 23 deletions
--- a/plugins/llm-security/hooks/scripts/post-session-guard.mjs
+++ b/plugins/llm-security/hooks/scripts/post-session-guard.mjs
@ -61,7 +61,17 @@ const DRIFT_THRESHOLD = 0.25;
 const DRIFT_SAMPLE_SIZE = 20;

 // Sub-agent delegation tracking (DeepMind Agent Traps kat. 4, v5.0 S4)
-const DELEGATION_ESCALATION_WINDOW = 5; // calls after input_source
+// E17 (v7.2.0): primary window configurable via LLM_SECURITY_ESCALATION_WINDOW
+// (default 5). Secondary 20-call window emits MEDIUM advisory for delegation
+// in the [primary, 20]-call range. Both reference an input_source; the
+// secondary catches slow-burn variants where the attacker waits past the
+// primary window before delegating.
+const DELEGATION_ESCALATION_WINDOW = (() => {
+  const envVal = parseInt(process.env.LLM_SECURITY_ESCALATION_WINDOW, 10);
+  if (Number.isFinite(envVal) && envVal > 0) return envVal;
+  return getPolicyValue('trifecta', 'escalation_window', 5);
+})();
+const DELEGATION_ESCALATION_WINDOW_MEDIUM = 20; // secondary longer-window advisory

 // Rule of Two enforcement mode: block | warn | off (env var takes precedence over policy)
 const policyTrifectaMode = getPolicyValue('trifecta', 'mode', 'warn');
@ -452,25 +462,46 @@ function formatWarning(evidence, mcpInfo, isSensitiveExfil) {
 * Check for escalation-after-input: delegation within DELEGATION_ESCALATION_WINDOW
 * calls of an input_source. Untrusted content consumed shortly before spawning a
 * sub-agent may indicate the model is being manipulated into delegating dangerous work.
- * @param {object[]} entries — recent window (20-call)
+ *
+ * E17 (v7.2.0): returns a `tier` indicating which window matched.
+ *   - `'primary'` — input within DELEGATION_ESCALATION_WINDOW calls (default 5).
+ *     Existing MEDIUM advisory.
+ *   - `'secondary'` — input within DELEGATION_ESCALATION_WINDOW_MEDIUM calls
+ *     (20) but outside the primary window. New, slow-burn variant —
+ *     also MEDIUM but with a different message.
+ *   - `null` (when detected=false) — no input source within either window.
+ *
+ * @param {object[]} entries — recent window (long-horizon, 100-call)
 * @param {{ classes: string[] }} currentEntry — the entry just appended
- * @returns {{ detected: boolean, inputDetail: string }}
+ * @returns {{ detected: boolean, inputDetail: string, tier: 'primary'|'secondary'|null }}
 */
 function checkEscalationAfterInput(entries, currentEntry) {
  if (!currentEntry.classes.includes('delegation')) {
-    return { detected: false, inputDetail: '' };
+    return { detected: false, inputDetail: '', tier: null };
  }

-  // Walk backwards through the last DELEGATION_ESCALATION_WINDOW entries
-  // looking for an input_source
  const toolEntries = entries.filter(e => !e.type);
-  const recentN = toolEntries.slice(-(DELEGATION_ESCALATION_WINDOW + 1), -1); // exclude current
-  for (const entry of recentN) {
+  // Look at the last DELEGATION_ESCALATION_WINDOW_MEDIUM entries before
+  // current (excluding current). Iterate from newest (closest to delegation)
+  // to oldest, so we report tier=primary if a match is in the inner window.
+  const limit = DELEGATION_ESCALATION_WINDOW_MEDIUM;
+  const slice = toolEntries.slice(-(limit + 1), -1); // exclude current
+  // Walk newest-to-oldest. Index from the end: distance 1 = most recent.
+  for (let i = slice.length - 1; i >= 0; i--) {
+    const entry = slice[i];
    if ((entry.classes || []).includes('input_source')) {
-      return { detected: true, inputDetail: entry.detail || entry.tool || 'unknown' };
+      // distance: how many tool calls between input_source and current
+      // delegation. distance=1 means input is directly before delegation.
+      const distance = slice.length - i;
+      const tier = distance <= DELEGATION_ESCALATION_WINDOW ? 'primary' : 'secondary';
+      return {
+        detected: true,
+        inputDetail: entry.detail || entry.tool || 'unknown',
+        tier,
+      };
    }
  }
-  return { detected: false, inputDetail: '' };
+  return { detected: false, inputDetail: '', tier: null };
 }

 /**
@ -486,9 +517,25 @@ function hasEscalationWarning(entries) {
 * Format the escalation-after-input warning.
 * @param {string} delegationDetail — what the delegation was for
 * @param {string} inputDetail — what input source preceded it
+ * @param {'primary'|'secondary'} tier — which window matched (E17, v7.2.0)
 * @returns {string}
 */
-function formatEscalationWarning(delegationDetail, inputDetail) {
+function formatEscalationWarning(delegationDetail, inputDetail, tier = 'primary') {
+  if (tier === 'secondary') {
+    return (
+      'SECURITY ADVISORY (session-guard): Slow-burn escalation-after-input detected [MEDIUM] — ' +
+      'sub-agent delegation in the slow-burn window after untrusted input.\n\n' +
+      `A Task/Agent delegation occurred within ${DELEGATION_ESCALATION_WINDOW_MEDIUM} calls (` +
+      `but outside the ${DELEGATION_ESCALATION_WINDOW}-call primary window) of untrusted input:\n` +
+      `  Input source: ${inputDetail}\n` +
+      `  Delegation: ${delegationDetail}\n\n` +
+      'This is a slower variant of the escalation-after-input pattern. The wider window\n' +
+      'catches attackers who deliberately wait past the primary window before delegating,\n' +
+      'and surfaces patterns that the primary 5-call window cannot. Review whether this\n' +
+      'delegation is expected and appropriately scoped.\n' +
+      'Configure window via LLM_SECURITY_ESCALATION_WINDOW env var (default 5).'
+    );
+  }
  return (
    'SECURITY ADVISORY (session-guard): Escalation-after-input detected [MEDIUM] — ' +
    'sub-agent delegation shortly after untrusted input.\n\n' +
@ -498,7 +545,8 @@ function formatEscalationWarning(delegationDetail, inputDetail) {
    'Untrusted content (web pages, MCP tool output) may be influencing the model\n' +
    'to spawn sub-agents with capabilities beyond the original task scope.\n' +
    'This is a known attack vector (DeepMind AI Agent Traps, Category 4).\n' +
-    'Review whether this delegation is expected and appropriately scoped.'
+    'Review whether this delegation is expected and appropriately scoped.\n' +
+    'Configure window via LLM_SECURITY_ESCALATION_WINDOW env var (default 5).'
  );
 }

@ -850,18 +898,22 @@ if (!(classes.length === 1 && (classes[0] === 'neutral' || classes[0] === 'deleg
  }
 }

-// --- Escalation-after-input detection (delegation within 5 calls of input_source) ---
+// --- Escalation-after-input detection (E17 v7.2.0: primary + secondary window) ---
+// Primary window: DELEGATION_ESCALATION_WINDOW (default 5, env-configurable).
+// Secondary window: DELEGATION_ESCALATION_WINDOW_MEDIUM (20). Slow-burn variant
+// emits MEDIUM advisory with a different message. Read enough entries to cover
+// the secondary window.
 if (classes.includes('delegation')) {
-  const window = readLastEntries(stateFile, WINDOW_SIZE);
-  const escalation = checkEscalationAfterInput(window, entry);
-  if (escalation.detected && !hasEscalationWarning(window)) {
-    messages.push(formatEscalationWarning(detail, escalation.inputDetail));
-    appendEntry(stateFile, { type: 'escalation_warning', ts: Date.now() });
+  const escalationWindow = readLastEntries(stateFile, Math.max(WINDOW_SIZE, DELEGATION_ESCALATION_WINDOW_MEDIUM + 5));
+  const escalation = checkEscalationAfterInput(escalationWindow, entry);
+  if (escalation.detected && !hasEscalationWarning(escalationWindow)) {
+    messages.push(formatEscalationWarning(detail, escalation.inputDetail, escalation.tier));
+    appendEntry(stateFile, { type: 'escalation_warning', ts: Date.now(), tier: escalation.tier });
    writeAuditEvent({
      event_type: 'escalation_after_input',
      severity: 'medium',
      source: 'post-session-guard',
-      details: { tool: detail, input_source: escalation.inputDetail },
+      details: { tool: detail, input_source: escalation.inputDetail, tier: escalation.tier },
      owasp: ['ASI01'],
      action_taken: 'warned',
    });