fix(llm-security): A2 batch — JSDoc arithmetic + co-monotonicity test + CaMeL nedton

Closes A2 of v7.1.0 critical-review patch (docs/critical-review-2026-04-20.md): - B4 (severity JSDoc): 4 critical = 93, not 90. Fixed in scanners/lib/severity.mjs:23 and CHANGELOG.md v7.0.0 tier description. The actual computation has always been 93 (70 + log2(5)*10 = 93.22 → round); only the docs were wrong. - §5.4 co-monotonicity: new sweep test in tests/lib/severity.test.mjs over 15 representative count vectors. Asserts that (verdict, riskBand) agree under the v7.0.0 contract for every case — catches future drift between riskScore tiers, verdict cutoffs, and riskBand cutoffs. Includes a B4 anchor test (riskScore {critical: 4} === 93) so doc/code drift fails loudly. - B8 (CaMeL claims toned down): post-session-guard.mjs:646 comment block and CLAUDE.md:184 Defense Philosophy bullet now describe the implementation honestly — opportunistic byte-matching of truncated output fingerprints (first 200 bytes, SHA-256/16-hex), not semantic data-flow tracking. Trivially bypassed by mutation, summarisation, or re-encoding. Inspired by CaMeL (DeepMind 2025), but not a CaMeL capability-tracking implementation. Tests: 1495 → 1511 (+16: 15 sweep cases + 1 B4 anchor). All green.
2026-04-29 11:49:08 +02:00 · 2026-04-29 11:49:08 +02:00 · 4aa5318bcb
commit 4aa5318bcb
parent 36be963d4d
5 changed files with 84 additions and 6 deletions
--- a/plugins/llm-security/tests/lib/severity.test.mjs
+++ b/plugins/llm-security/tests/lib/severity.test.mjs
@ -235,6 +235,74 @@ describe('riskBand (v7.0.0 cutoffs: 14/39/64/84)', () => {
  });
 });

+// ---------------------------------------------------------------------------
+// Verdict / riskBand co-monotonicity sweep (critical-review §5.4)
+//
+// Asserts that for every representative count vector, (verdict, riskBand)
+// agree under the v7.0.0 contract:
+//   BLOCK   ⇔ band ∈ {Critical, Extreme} OR critical ≥ 1
+//   WARNING ⇔ band ∈ {Medium, High}      OR (high ≥ 1 AND verdict != BLOCK)
+//   ALLOW   ⇔ band == Low                AND no high/critical
+//
+// Catches regressions where a future change to riskScore tiers, verdict
+// cutoffs, or riskBand cutoffs would re-introduce contradictions like
+// "ALLOW + High band" or "BLOCK + Medium band".
+// ---------------------------------------------------------------------------
+
+describe('verdict/riskBand co-monotonicity (v7.0.0 §5.4)', () => {
+  const cases = [
+    { critical: 0, high: 0, medium: 0, low: 0, info: 0 },
+    { low: 1 },
+    { low: 10 },
+    { medium: 1 },
+    { medium: 5 },
+    { medium: 50 },
+    { high: 1 },
+    { high: 5 },
+    { high: 7 },
+    { high: 8 },
+    { high: 17 },
+    { critical: 1 },
+    { critical: 2 },
+    { critical: 4 },
+    { critical: 10 },
+  ];
+
+  for (const counts of cases) {
+    const label = JSON.stringify(counts);
+    it(`(${label}) — verdict and riskBand agree`, () => {
+      const score = riskScore(counts);
+      const v = verdict(counts);
+      const band = riskBand(score);
+      const hasCritical = (counts.critical || 0) >= 1;
+      const hasHigh = (counts.high || 0) >= 1;
+
+      if (v === 'BLOCK') {
+        assert.ok(
+          band === 'Critical' || band === 'Extreme' || hasCritical,
+          `BLOCK requires Critical/Extreme band or critical>=1; got band=${band}, score=${score}, counts=${label}`,
+        );
+      } else if (v === 'WARNING') {
+        assert.ok(
+          band === 'Medium' || band === 'High' || hasHigh,
+          `WARNING requires Medium/High band or high>=1; got band=${band}, score=${score}, counts=${label}`,
+        );
+        assert.ok(!hasCritical, `WARNING must not have critical>=1; counts=${label}`);
+      } else {
+        assert.equal(v, 'ALLOW');
+        assert.equal(band, 'Low', `ALLOW requires Low band; got band=${band}, score=${score}, counts=${label}`);
+        assert.ok(!hasHigh && !hasCritical, `ALLOW must not have high/critical>=1; counts=${label}`);
+      }
+    });
+  }
+
+  it('JSDoc arithmetic anchor — 4 critical = 93 (not 90)', () => {
+    // Pin against doc/code drift documented in critical-review §5 (B4).
+    // 70 + min(25, log2(5)*10) = 70 + 23.219... = 93.219 → round → 93.
+    assert.equal(riskScore({ critical: 4 }), 93);
+  });
+});
+
 // ---------------------------------------------------------------------------
 // gradeFromPassRate
 // ---------------------------------------------------------------------------