diff --git a/plugins/llm-security/agents/mcp-scanner-agent.md b/plugins/llm-security/agents/mcp-scanner-agent.md index 27cec3f..a38ff92 100644 --- a/plugins/llm-security/agents/mcp-scanner-agent.md +++ b/plugins/llm-security/agents/mcp-scanner-agent.md @@ -416,8 +416,8 @@ server.js:142 — fetch('https://api.example.com/collect', { body: JSON.stringif | **Medium** | Meaningful risk, requires attention | Excessive permissions vs. stated purpose, missing input validation on tool args, remote feature flags without disclosure, plaintext tokens in config | | **Low** | Informational or best-practice gap | Unlocked dependency versions, missing README documentation, overly broad but not harmful env var access | -**Unified verdict:** `BLOCK` if Critical >= 1 OR score >= 61. `WARNING` if High >= 1 OR score >= 21. Otherwise `ALLOW`. -**Risk score:** `min((Critical × 25) + (High × 10) + (Medium × 4) + (Low × 1), 100)`. +**Unified verdict:** `BLOCK` if Critical ≥ 1 OR score ≥ 65. `WARNING` if High ≥ 1 OR score ≥ 15. Otherwise `ALLOW`. (v2 model — severity-dominated, see `scanners/lib/severity.mjs`.) +**Risk score:** `riskScore(counts)` — severity-dominated, log-scaled per tier. Critical present → 70-95; High only → 40-65; Medium only → 15-35; Low only → 1-11. `info` is scoring-inert. **Always include** the `owasp` field (e.g., "LLM01", "LLM03") in every finding for OWASP categorization. --- diff --git a/plugins/llm-security/agents/posture-assessor-agent.md b/plugins/llm-security/agents/posture-assessor-agent.md index 176ec8f..9897604 100644 --- a/plugins/llm-security/agents/posture-assessor-agent.md +++ b/plugins/llm-security/agents/posture-assessor-agent.md @@ -373,7 +373,7 @@ After completing all 10 categories: - 3 or more Critical-severity findings from any source Also compute and display the **risk score** (0-100) and **risk band** alongside the grade. -Use the formula: `score = min((Critical × 25) + (High × 10) + (Medium × 4) + (Low × 1), 100)` +Use the v2 model: `score = riskScore(counts)` (severity-dominated, log-scaled per tier — see `scanners/lib/severity.mjs`). Critical present → 70-95; High only → 40-65; Medium only → 15-35; Low only → 1-11. Verdict: critical ≥ 1 OR score ≥ 65 → BLOCK; high ≥ 1 OR score ≥ 15 → WARNING; else ALLOW. `info` is scoring-inert. --- diff --git a/plugins/llm-security/commands/audit.md b/plugins/llm-security/commands/audit.md index 81ea2fe..0904c5e 100644 --- a/plugins/llm-security/commands/audit.md +++ b/plugins/llm-security/commands/audit.md @@ -43,7 +43,7 @@ After skill scan, spawn `subagent_type: "llm-security:mcp-scanner-agent"`, `mode ## Step 5: Generate Report Merge posture scanner JSON + agent findings. Use the posture scanner's grade as the baseline. -Recalculate `risk_score = min(100, critical*25 + high*10 + medium*4 + low*1)` including agent findings. +Recalculate `risk_score = riskScore(counts)` (severity-dominated v2 model — see `scanners/lib/severity.mjs`) including agent findings. Output: Risk Dashboard, Executive Summary, 10 Category Sections (use scanner evidence + agent narrative), Summary Table, Action Items (IMMEDIATE → HIGH → MEDIUM). diff --git a/plugins/llm-security/commands/scan.md b/plugins/llm-security/commands/scan.md index 5179b19..e30b0ed 100644 --- a/plugins/llm-security/commands/scan.md +++ b/plugins/llm-security/commands/scan.md @@ -110,8 +110,8 @@ Otherwise (local scan — direct mode): ## Step 5: Aggregate and Report -Combine counts. `risk_score = min(100, critical*25 + high*10 + medium*4 + low*1)`. -Verdict: critical≥1 OR score≥61 → BLOCK, high≥1 OR score≥21 → WARNING, else ALLOW. +Combine counts. `risk_score = riskScore(counts)` (severity-dominated v2 model — see `scanners/lib/severity.mjs`). +Verdict: critical ≥ 1 OR score ≥ 65 → BLOCK; high ≥ 1 OR score ≥ 15 → WARNING; else ALLOW. Output banner then all findings grouped by severity (critical→info). Each finding: `### [SEV] Title` with Category, File:line, OWASP, Evidence, Remediation. diff --git a/plugins/llm-security/tests/lib/doc-consistency.test.mjs b/plugins/llm-security/tests/lib/doc-consistency.test.mjs new file mode 100644 index 0000000..ac2d2f6 --- /dev/null +++ b/plugins/llm-security/tests/lib/doc-consistency.test.mjs @@ -0,0 +1,86 @@ +// doc-consistency.test.mjs — Static asserts that prose documentation +// stays aligned with the v2 risk-scoring model in scanners/lib/severity.mjs. +// +// Background: v7.0.0 introduced the severity-dominated v2 risk-score model +// (BLOCK ≥65, WARNING ≥15) but several prose surfaces (commands/, agents/) +// continued to emit the v1 formula (`critical*25 + ...`, BLOCK ≥61, +// WARNING ≥21). v7.1.1 fixed two of them (agents/skill-scanner-agent.md, +// templates/unified-report.md). Batch B → v7.2.0 closes the trifecta: +// commands/scan.md, commands/audit.md, agents/mcp-scanner-agent.md. +// +// This test pins the closure. If any future edit re-introduces v1 formula +// tokens in commands/ or agents/, this test fails fast. + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readdirSync, readFileSync, statSync } from 'node:fs'; +import { join, dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const PLUGIN_ROOT = resolve(dirname(__filename), '..', '..'); + +// v1 formula tokens that must NOT appear in commands/ or agents/. +// These are the patterns the brief's verification step 4 grep checks. +const V1_TOKENS = [ + /\bscore\s*[><]?=\s*61\b/, // verdict cutoff + /\bscore\s*[><]?=\s*21\b/, // verdict cutoff + /score\s*≥\s*61/, // unicode variant + /score\s*≥\s*21/, // unicode variant + /critical\s*\*\s*25/, // formula multiplier + /Critical\s*[×x]\s*25/, // formula multiplier (table form) + /min\(\s*100\s*,\s*critical\s*\*\s*25/i, // full v1 formula prefix +]; + +function* walkMarkdown(dir) { + for (const entry of readdirSync(dir)) { + const full = join(dir, entry); + const stat = statSync(full); + if (stat.isDirectory()) { + yield* walkMarkdown(full); + } else if (entry.endsWith('.md')) { + yield full; + } + } +} + +describe('doc-consistency — v1 risk-formula tokens are absent from prose', () => { + const COMMANDS_DIR = join(PLUGIN_ROOT, 'commands'); + const AGENTS_DIR = join(PLUGIN_ROOT, 'agents'); + + for (const dir of [COMMANDS_DIR, AGENTS_DIR]) { + for (const file of walkMarkdown(dir)) { + const rel = file.replace(PLUGIN_ROOT + '/', ''); + it(`${rel} contains no v1 formula tokens`, () => { + const content = readFileSync(file, 'utf-8'); + for (const token of V1_TOKENS) { + assert.equal( + token.test(content), + false, + `${rel} still contains v1 formula token matching ${token}. ` + + `v7.2.0 unified all command/agent prose to v2 (BLOCK ≥65, WARNING ≥15). ` + + `If a new file legitimately needs to reference v1 (e.g. CHANGELOG history), ` + + `move that file out of commands/ or agents/.`, + ); + } + }); + } + } +}); + +describe('doc-consistency — v2 cutoffs are documented in unified prose', () => { + it('commands/scan.md mentions the v2 BLOCK cutoff (≥ 65)', () => { + const content = readFileSync(join(PLUGIN_ROOT, 'commands', 'scan.md'), 'utf-8'); + assert.match(content, /score\s*[≥>=]+\s*65/); + }); + + it('commands/audit.md references riskScore() (v2 helper)', () => { + const content = readFileSync(join(PLUGIN_ROOT, 'commands', 'audit.md'), 'utf-8'); + assert.match(content, /riskScore/); + }); + + it('agents/mcp-scanner-agent.md mentions the v2 BLOCK cutoff (≥ 65)', () => { + const content = readFileSync(join(PLUGIN_ROOT, 'agents', 'mcp-scanner-agent.md'), 'utf-8'); + assert.match(content, /score\s*[≥>=]+\s*65/); + }); +});