From 001df2ebe833e8a1db59157c96d771b2e8fcec4e Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Thu, 30 Apr 2026 16:49:01 +0200 Subject: [PATCH] =?UTF-8?q?feat(commands):=20E14=20part=203=20=E2=80=94=20?= =?UTF-8?q?/security=20mcp-baseline-reset=20slash=20command?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave C step C3: closes E14 with the user-facing reset command. After a legitimate MCP server upgrade the sticky baseline (added in C1) becomes a stale "what the tool used to say" anchor and every subsequent post-mcp-verify advisory will re-flag the change. /security mcp-baseline-reset lets the user acknowledge the upgrade so the next call seeds a fresh baseline. New files: - scanners/mcp-baseline-reset.mjs — small CLI wrapper around clearBaseline / listBaselines. Modes: --list (read-only), --target , no-args (all). Outputs JSON summary on stdout. Exit 0 always (idempotent). - commands/mcp-baseline-reset.md — dispatcher following mcp-inspect.md shape. Frontmatter: name=security:mcp-baseline-reset, sonnet model, Read/Bash/AskUserQuestion tools. 4-step body (list -> confirm scope -> execute -> confirm result). - tests/scanners/mcp-baseline-reset.test.mjs — 10 CLI tests across --list, --target, clear-all, idempotency, history preservation, and bare-positional sugar. Updated: - commands/security.md — new row in commands table after mcp-inspect. - CLAUDE.md — new commands-table row + new v7.3.0 narrative section describing the baseline schema, cumulative-drift detection, reset semantics, and the LLM_SECURITY_MCP_CACHE_FILE override. - Plugin README.md — new MCP-baseline-reset row in commands table, scanner count 12 standalone -> 13 standalone, new "MCP Description Drift (E14, v7.3.0)" subsection explaining the sticky baseline, cumulative threshold, reset semantics, and env-var override. - Root marketplace README.md — scanner count 22 -> 23 (10 orchestrated + 13 standalone), command count 19 -> 20, test count 1511 -> 1768. Wave C complete: 1738 -> 1768 tests (+30 across C1/C2/C3). Per plan, Wave C does NOT bump the plugin version — that lands at the wave-bundle release. The advisory text in post-mcp-verify already references the new command path so the user has a ready remediation step. Co-Authored-By: Claude Opus 4.7 --- README.md | 6 +- plugins/llm-security/CLAUDE.md | 21 ++ plugins/llm-security/README.md | 12 +- .../commands/mcp-baseline-reset.md | 74 ++++++ plugins/llm-security/commands/security.md | 1 + .../scanners/mcp-baseline-reset.mjs | 101 ++++++++ .../scanners/mcp-baseline-reset.test.mjs | 244 ++++++++++++++++++ 7 files changed, 454 insertions(+), 5 deletions(-) create mode 100644 plugins/llm-security/commands/mcp-baseline-reset.md create mode 100644 plugins/llm-security/scanners/mcp-baseline-reset.mjs create mode 100644 plugins/llm-security/tests/scanners/mcp-baseline-reset.test.mjs diff --git a/README.md b/README.md index 3835483..b6eec0b 100644 --- a/README.md +++ b/README.md @@ -33,14 +33,14 @@ Security scanning, auditing, and threat modeling for agentic AI projects. Built on OWASP LLM Top 10 (2025), OWASP Agentic AI Top 10, and the AI Agent Traps taxonomy (Google DeepMind, 2025). Three layers of protection: - **Automated enforcement** — 9 hooks that block dangerous operations in real time (prompt injection, secrets in code, destructive commands, supply chain guardrails, transcript scanning before context compaction) -- **Deterministic scanning** — 22 Node.js scanners (10 orchestrated + 12 standalone) for byte-level analysis: Shannon entropy, Unicode codepoints, typosquatting detection, taint flow, DNS resolution, git forensics, AI-BOM, attack simulation, IDE extension prescan (VS Code + JetBrains — URL fetch from Marketplace / OpenVSX / direct VSIX / JetBrains Marketplace, hardened ZIP extractor for zip-slip / symlinks / bombs, plus OS sandbox via `sandbox-exec` / `bwrap` so the kernel enforces FS confinement). Bash-normalize T1-T6 for obfuscation-resistant denylists -- **Advisory analysis** — 19 commands that scan, audit, and model threats with structured reports, letter grades, and actionable remediation +- **Deterministic scanning** — 23 Node.js scanners (10 orchestrated + 13 standalone) for byte-level analysis: Shannon entropy, Unicode codepoints, typosquatting detection, taint flow, DNS resolution, git forensics, AI-BOM, attack simulation, IDE extension prescan (VS Code + JetBrains — URL fetch from Marketplace / OpenVSX / direct VSIX / JetBrains Marketplace, hardened ZIP extractor for zip-slip / symlinks / bombs, plus OS sandbox via `sandbox-exec` / `bwrap` so the kernel enforces FS confinement), MCP cumulative-drift baseline reset (E14 — sticky baseline catches slow-burn rug-pulls). Bash-normalize T1-T6 for obfuscation-resistant denylists +- **Advisory analysis** — 20 commands that scan, audit, and model threats with structured reports, letter grades, and actionable remediation - **Enterprise governance** — Compliance mapping (EU AI Act, NIST AI RMF, ISO 42001), SARIF 2.1.0 output, structured audit trail, policy-as-code, standalone CLI - **Opus 4.7 aligned** — Agent instructions rewritten for literal instruction-following (system card §6.3.1.1), defense-in-depth posture per §5.2.1, production hardening guide Key commands: `/security posture`, `/security audit`, `/security scan`, `/security ide-scan`, `/security threat-model`, `/security plugin-audit` -6 specialized agents · 22 scanners · 9 hooks · 20 knowledge docs · 1511 tests +6 specialized agents · 23 scanners · 9 hooks · 20 knowledge docs · 1768 tests → [Full documentation](plugins/llm-security/README.md) diff --git a/plugins/llm-security/CLAUDE.md b/plugins/llm-security/CLAUDE.md index c507a8b..12d8329 100644 --- a/plugins/llm-security/CLAUDE.md +++ b/plugins/llm-security/CLAUDE.md @@ -25,6 +25,26 @@ top-level `output.suppressed` (`.llm-security-ignore` rule integer). Out-of-scope but flagged: `commands/scan.md:113-114` retains the v1 formula; resolution deferred to Batch B. +**v7.3.0 — MCP cumulative-drift baseline (in progress, Wave C of Batch C).** +Closes E14 from `docs/critical-review-2026-04-20.md`. The +`mcp-description-cache.mjs` schema gains a sticky `baseline` slot per +tool plus a 10-event rolling `history` array (FIFO). Cumulative drift = +`levenshtein(current, baseline) / max(|current|, |baseline|)`; when the +ratio crosses `mcp.cumulative_drift_threshold` (default 0.25), +`post-mcp-verify.mjs` emits a separate MEDIUM `mcp-cumulative-drift` +advisory. The existing per-update >10% drift signal is unchanged — both +fire independently. Slow-burn rug-pulls that keep each update under the +per-update threshold but cumulatively diverge from baseline are now +caught. Baseline survives the 7-day TTL purge so detection persists +across the full window. New `/security mcp-baseline-reset` slash command +(plus `scanners/mcp-baseline-reset.mjs` CLI: `--list`, `--target `, +or no-args clear-all) lets the user acknowledge a legitimate MCP server +upgrade — clearing the baseline causes the next call to seed a fresh +one from the incoming description; description, firstSeen, lastSeen, and +history are preserved for audit. `LLM_SECURITY_MCP_CACHE_FILE` env var +overrides the cache path for end-to-end testing without polluting the +user's real `~/.cache/llm-security/mcp-descriptions.json`. + ## Commands | Command | Description | @@ -36,6 +56,7 @@ formula; resolution deferred to Batch B. | `/security plugin-audit [path\|url]` | Plugin trust assessment (local or GitHub URL) | | `/security mcp-audit [--live]` | MCP server config audit (add `--live` for runtime inspection) | | `/security mcp-inspect` | Live MCP server inspection — connect via JSON-RPC 2.0, scan tool descriptions | +| `/security mcp-baseline-reset` | Reset MCP description baseline cache (E14, v7.3.0) — after legitimate MCP server upgrade | | `/security ide-scan [target\|url]` | Scan installed VS Code + JetBrains extensions/plugins — OR fetch a remote VSIX from Marketplace, OpenVSX, or direct URL (v6.4.0), OR a JetBrains plugin from `plugins.jetbrains.com` (v6.6.0). 7 VS Code checks + 7 JetBrains-specific checks (theme-with-code, broad activation, Premain-Class instrumentation, native binaries, depends-chain, typosquat, shaded jars). Hardened ZIP extractor (zip-slip, symlink, bomb, ratio caps — no fuzz-testing results published to date). Orchestrates reused scanners (UNI/ENT/NET/TNT/MEM/SCR) per extension. Offline by default, `--online` opt-in | | `/security posture` | Quick scorecard (13 categories) | | `/security threat-model` | Interactive STRIDE/MAESTRO session | diff --git a/plugins/llm-security/README.md b/plugins/llm-security/README.md index e6407b8..8e3bbbb 100644 --- a/plugins/llm-security/README.md +++ b/plugins/llm-security/README.md @@ -167,6 +167,7 @@ Or enable directly in `~/.claude/settings.json`: | `/security plugin-audit [path\|url]` | Dedicated plugin security audit with Install/Review/Do Not Install verdict (local or GitHub URL) | | `/security mcp-audit [--live]` | Focused audit of all installed MCP server configurations (add `--live` for runtime inspection) | | `/security mcp-inspect` | Connect to running MCP stdio servers and scan live tool descriptions | +| `/security mcp-baseline-reset` | Reset the cumulative-drift baseline cache after a legitimate MCP server upgrade (E14, v7.3.0) | | `/security ide-scan [target\|url]` | Scan installed VS Code (+ Cursor, Windsurf, VSCodium, code-server) / JetBrains extensions — OR fetch a remote VSIX from VS Code Marketplace, OpenVSX, or direct `.vsix` URL (v6.4.0). Typosquat, theme-with-code, sideload, broad activation, uninstall hooks, plus UNI/ENT/NET/TNT/MEM/SCR per extension. Offline by default | | `/security posture` | Quick security posture scorecard (16 categories incl. compliance) | | `/security diff [path]` | Compare scan against stored baseline — shows new/resolved/unchanged/moved findings | @@ -368,7 +369,7 @@ For deep scans (`/security scan --deep` or `/security deep-scan`), deterministic ## Deterministic Scanners -10 orchestrated + 12 standalone Node.js scanner scripts that perform byte-level analysis an LLM cannot. Zero external dependencies. Orchestrated scanners run via `node scanners/scan-orchestrator.mjs ` or through `/security deep-scan`. Supports `--fail-on `, `--compact`, `--format sarif`, `--output-file `. +10 orchestrated + 13 standalone Node.js scanner scripts that perform byte-level analysis an LLM cannot. Zero external dependencies. Orchestrated scanners run via `node scanners/scan-orchestrator.mjs ` or through `/security deep-scan`. Supports `--fail-on `, `--compact`, `--format sarif`, `--output-file `. ### Orchestrated (10) @@ -385,13 +386,14 @@ For deep scans (`/security scan --deep` or `/security deep-scan`), deterministic | `supply-chain-recheck.mjs` | SCR | Re-audit installed deps from lockfiles against blocklists, OSV.dev batch API, typosquat detection | LLM03 | | `toxic-flow-analyzer.mjs` | TFA | Lethal trifecta detection: untrusted input + sensitive data access + exfiltration sink. Cross-component correlation (runs last) | ASI01, ASI02, ASI05 | -### Standalone (12) +### Standalone (13) | Scanner | Prefix | Purpose | |---------|--------|---------| | `scan-orchestrator.mjs` | — | Entry point: runs all 10 orchestrated scanners, outputs JSON | | `posture-scanner.mjs` | PST | Deterministic posture assessment, 16 categories (incl. EU AI Act, NIST AI RMF, ISO 42001), <50ms | | `mcp-live-inspect.mjs` | MCI | Live MCP server inspection via JSON-RPC 2.0 (tool injection, shadowing, URL/IP) | +| `mcp-baseline-reset.mjs` | — | Reset cumulative-drift baseline cache (E14, v7.3.0) — `--list` / `--target ` / clear-all. Idempotent JSON output | | `ide-extension-scanner.mjs` | IDE | VS Code (+ Cursor, Windsurf, VSCodium, code-server) / JetBrains extension prescan: blocklist, theme-with-code, sideload, broad activation, typosquat, extension-pack expansion, dangerous uninstall hooks — then UNI/ENT/NET/TNT/MEM/SCR per extension | | `attack-simulator.mjs` | — | Red-team harness: 64 scenarios, 12 categories, adaptive mutation mode | | `ai-bom-generator.mjs` | BOM | CycloneDX 1.6 AI Bill of Materials | @@ -402,6 +404,12 @@ For deep scans (`/security scan --deep` or `/security deep-scan`), deterministic | `content-extractor.mjs` | — | Pre-extracts evidence from untrusted repos, strips injection patterns | | `watch-cron.mjs` | — | Cron wrapper: scans all targets in config, writes summary, exits with verdict code | +### MCP Description Drift (E14, v7.3.0) + +`scanners/lib/mcp-description-cache.mjs` anchors a sticky **baseline** description per MCP tool plus a rolling 10-event history. Cumulative drift is computed as `levenshtein(current, baseline) / max(|current|, |baseline|)`; when it crosses `mcp.cumulative_drift_threshold` (default 0.25), `post-mcp-verify.mjs` emits a MEDIUM `mcp-cumulative-drift` advisory — independent of the existing per-update >10% drift signal. Slow-burn rug-pulls that keep each update under the per-update threshold but cumulatively diverge from the baseline are now caught. + +The baseline survives the 7-day TTL purge so detection persists across the full window. After a legitimate MCP server upgrade, run `/security mcp-baseline-reset` (or `node scanners/mcp-baseline-reset.mjs --target `) to clear the stale baseline. The next call seeds a fresh baseline from the incoming description; description, firstSeen, lastSeen, and history are preserved across reset for audit. `LLM_SECURITY_MCP_CACHE_FILE` env var overrides the cache path for testing without polluting `~/.cache/llm-security/mcp-descriptions.json`. + **Why deterministic?** LLMs are powerful at semantic analysis — understanding intent, detecting social engineering, assessing context. But they cannot reliably calculate Shannon entropy, measure Levenshtein distance between package names, trace taint flow across function boundaries, or detect individual Unicode codepoints. These scanners fill that gap. **Shared library** (`scanners/lib/`): severity classification, string utilities (entropy, Levenshtein, base64 detection), output formatting, file discovery, and YAML frontmatter parsing. diff --git a/plugins/llm-security/commands/mcp-baseline-reset.md b/plugins/llm-security/commands/mcp-baseline-reset.md new file mode 100644 index 0000000..af4132b --- /dev/null +++ b/plugins/llm-security/commands/mcp-baseline-reset.md @@ -0,0 +1,74 @@ +--- +name: security:mcp-baseline-reset +description: Reset MCP description baseline cache +allowed-tools: Read, Bash, AskUserQuestion +model: sonnet +--- + +# /security mcp-baseline-reset + +Reset the sticky description baseline used by `post-mcp-verify.mjs` for cumulative-drift detection (E14, OWASP MCP05). + +## Why this matters + +The cache stores a per-tool **baseline** description plus a rolling 10-event history. Cumulative drift is measured as `levenshtein(current, baseline) / max(|current|, |baseline|)`; when the ratio crosses the threshold (default 0.25), `post-mcp-verify.mjs` emits a MEDIUM `mcp-cumulative-drift` advisory. + +After a **legitimate** MCP server upgrade the old baseline is stale — every subsequent call will keep tripping the advisory. Reset the baseline once to acknowledge the upgrade. The next MCP invocation will seed a fresh baseline from the new description. + +Resetting **removes the slow-burn detection window** for that server until the new baseline is established. Only do this for upgrades you trust. + +## Step 1 — List current baselines + +Run the listing CLI in read-only mode: + +```bash +node /scanners/mcp-baseline-reset.mjs --list +``` + +Parse the JSON `baselines[]` array. If `count == 0`, report "No baselines stored yet" and stop. + +## Step 2 — Confirm scope + +Use `AskUserQuestion` to confirm the user's intent: + +- Question: "Reset which baselines?" +- Options derived from Step 1's output: + - "All baselines (N tools)" — clears every entry + - One option per tool, e.g. `mcp__tavily__tavily_search` + - "Cancel" — abort + +## Step 3 — Execute + +If the user picked **all**: + +```bash +node /scanners/mcp-baseline-reset.mjs +``` + +If the user picked a specific tool: + +```bash +node /scanners/mcp-baseline-reset.mjs --target +``` + +Capture stdout JSON. + +## Step 4 — Confirm result + +Report from the JSON response: + +``` +Cleared baseline(s): + - + - + ... +Remaining baselines: +``` + +Add a one-line reminder: "The next MCP call to each cleared tool will seed a fresh baseline from the incoming description." + +## Notes + +- The CLI exits 0 even when nothing was cleared (idempotent). +- History entries are **preserved** across reset for audit purposes. +- This command does not connect to MCP servers — it only mutates the local cache at `~/.cache/llm-security/mcp-descriptions.json`. diff --git a/plugins/llm-security/commands/security.md b/plugins/llm-security/commands/security.md index a5d521d..d9405b3 100644 --- a/plugins/llm-security/commands/security.md +++ b/plugins/llm-security/commands/security.md @@ -21,6 +21,7 @@ Based on OWASP LLM Top 10 (2025) and OWASP Agentic AI Top 10. | `/security plugin-audit [path\|url]` | Dedicated plugin security audit with trust verdict | Before installing a third-party plugin | | `/security mcp-audit [--live]` | Focused audit of all installed MCP servers | After adding MCP servers or on suspicion | | `/security mcp-inspect` | Live inspection — connect to MCP servers, scan tool descriptions | Verify running servers have safe tool descriptions | +| `/security mcp-baseline-reset` | Reset MCP description baseline cache | After legitimate MCP server upgrade | | `/security ide-scan [target]` | Scan installed VS Code / JetBrains extensions for supply-chain risk, typosquats, malicious patterns | After installing new extensions or periodic review | | `/security posture` | Quick security posture scorecard | Daily/weekly health check | | `/security threat-model` | Interactive STRIDE/MAESTRO threat modeling session | When designing new architecture | diff --git a/plugins/llm-security/scanners/mcp-baseline-reset.mjs b/plugins/llm-security/scanners/mcp-baseline-reset.mjs new file mode 100644 index 0000000..b4f31d6 --- /dev/null +++ b/plugins/llm-security/scanners/mcp-baseline-reset.mjs @@ -0,0 +1,101 @@ +#!/usr/bin/env node +// mcp-baseline-reset.mjs — Reset MCP description-cache baselines. +// +// Purpose: +// The description cache (scanners/lib/mcp-description-cache.mjs) anchors a +// sticky baseline per MCP tool so that cumulative drift can be detected +// across many small updates. After a legitimate MCP server upgrade the +// baseline becomes a stale "what the tool used to say" reference and must +// be reset so the next call seeds a fresh baseline. +// +// Modes: +// --list Read-only — list current baselines as JSON. +// --target Clear baseline for one tool. +// (no args) Clear baselines for all tools. +// +// Output: JSON summary on stdout. Exit 0 always (idempotent). +// +// Used by /security mcp-baseline-reset slash command. Not part of +// scan-orchestrator. + +import { + clearBaseline, + listBaselines, + loadCache, +} from './lib/mcp-description-cache.mjs'; + +function parseArgs(argv) { + const args = { list: false, target: null }; + for (let i = 2; i < argv.length; i++) { + const a = argv[i]; + if (a === '--list') { + args.list = true; + } else if (a === '--target' || a === '-t') { + args.target = argv[++i] || null; + } else if (a === '--help' || a === '-h') { + args.help = true; + } else if (!a.startsWith('--')) { + // bare positional treated as target for convenience + args.target = a; + } + } + return args; +} + +function help() { + process.stdout.write( + 'mcp-baseline-reset.mjs — Reset MCP description-cache baselines.\n\n' + + 'Usage:\n' + + ' node scanners/mcp-baseline-reset.mjs --list\n' + + ' node scanners/mcp-baseline-reset.mjs --target \n' + + ' node scanners/mcp-baseline-reset.mjs # clear all\n\n' + + 'Output: JSON. Exit code 0 always.\n', + ); +} + +function emit(obj) { + process.stdout.write(JSON.stringify(obj, null, 2) + '\n'); +} + +function main() { + const args = parseArgs(process.argv); + if (args.help) { + help(); + return 0; + } + + if (args.list) { + const baselines = listBaselines(); + emit({ + mode: 'list', + count: baselines.length, + baselines: baselines.map((b) => ({ + tool: b.tool, + baseline_excerpt: (b.baseline || '').slice(0, 120), + seen_at: b.seenAt, + last_seen: b.lastSeen, + history_events: b.history, + })), + }); + return 0; + } + + // Reset path + const result = clearBaseline(args.target || undefined); + // After clearing, count remaining baselines + const cache = loadCache(); + let remaining = 0; + for (const entry of Object.values(cache)) { + if (entry && entry.baseline) remaining++; + } + emit({ + mode: 'reset', + target: args.target || null, + cleared: result.cleared, + tools: result.tools, + remaining, + }); + return 0; +} + +process.exit(main()); diff --git a/plugins/llm-security/tests/scanners/mcp-baseline-reset.test.mjs b/plugins/llm-security/tests/scanners/mcp-baseline-reset.test.mjs new file mode 100644 index 0000000..af8d831 --- /dev/null +++ b/plugins/llm-security/tests/scanners/mcp-baseline-reset.test.mjs @@ -0,0 +1,244 @@ +// mcp-baseline-reset.test.mjs — CLI tests for scanners/mcp-baseline-reset.mjs +// Zero external dependencies: node:test + node:assert + child_process.execFile. +// +// LLM_SECURITY_MCP_CACHE_FILE controls the cache path so the test does not +// pollute the user's real ~/.cache/llm-security/mcp-descriptions.json. + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { execFile } from 'node:child_process'; +import { mkdtempSync, writeFileSync, rmSync, existsSync, readFileSync } from 'node:fs'; +import { join, resolve } from 'node:path'; +import { tmpdir } from 'node:os'; + +const SCRIPT = resolve(import.meta.dirname, '../../scanners/mcp-baseline-reset.mjs'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function runCli(args, env) { + return new Promise((res) => { + execFile( + 'node', + [SCRIPT, ...args], + { env: { ...process.env, ...env }, timeout: 5000 }, + (err, stdout, stderr) => { + res({ + code: err && typeof err.code === 'number' ? err.code : 0, + stdout: stdout || '', + stderr: stderr || '', + }); + }, + ); + }); +} + +function makeTmpCache() { + const dir = mkdtempSync(join(tmpdir(), 'baseline-reset-test-')); + const cacheFile = join(dir, 'mcp-descriptions.json'); + return { dir, cacheFile }; +} + +function cleanup(dir) { + try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ } +} + +function seedCache(cacheFile, entries) { + writeFileSync(cacheFile, JSON.stringify(entries, null, 2), 'utf-8'); +} + +function parseJson(stdout) { + return JSON.parse(stdout.trim()); +} + +const NOW = Date.now(); + +function makeEntry(desc, opts = {}) { + return { + description: desc, + firstSeen: NOW - 10000, + lastSeen: NOW, + baseline: opts.noBaseline ? undefined : { description: desc, seenAt: NOW - 10000 }, + history: opts.history || [], + }; +} + +// --------------------------------------------------------------------------- +// --list mode +// --------------------------------------------------------------------------- + +describe('mcp-baseline-reset CLI — --list mode', () => { + it('returns mode=list with empty baselines on empty cache', async () => { + const { dir, cacheFile } = makeTmpCache(); + const result = await runCli(['--list'], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.mode, 'list'); + assert.equal(json.count, 0); + assert.deepEqual(json.baselines, []); + cleanup(dir); + }); + + it('lists all entries with baseline metadata', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + 'mcp__beta__t': makeEntry('Beta description text long enough'), + }); + const result = await runCli(['--list'], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.mode, 'list'); + assert.equal(json.count, 2); + const tools = json.baselines.map((b) => b.tool).sort(); + assert.deepEqual(tools, ['mcp__alpha__t', 'mcp__beta__t']); + for (const b of json.baselines) { + assert.ok(typeof b.baseline_excerpt === 'string'); + assert.ok(typeof b.seen_at === 'number'); + assert.ok(typeof b.last_seen === 'number'); + assert.ok(typeof b.history_events === 'number'); + } + cleanup(dir); + }); + + it('--list does not mutate the cache', async () => { + const { dir, cacheFile } = makeTmpCache(); + const before = { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + }; + seedCache(cacheFile, before); + await runCli(['--list'], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + const after = JSON.parse(readFileSync(cacheFile, 'utf-8')); + assert.ok(after['mcp__alpha__t'].baseline, 'baseline preserved by --list'); + cleanup(dir); + }); +}); + +// --------------------------------------------------------------------------- +// --target mode (single tool) +// --------------------------------------------------------------------------- + +describe('mcp-baseline-reset CLI — --target mode', () => { + it('clears one named baseline and reports it', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + 'mcp__beta__t': makeEntry('Beta description text long enough'), + }); + const result = await runCli( + ['--target', 'mcp__alpha__t'], + { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }, + ); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.mode, 'reset'); + assert.equal(json.cleared, 1); + assert.deepEqual(json.tools, ['mcp__alpha__t']); + assert.equal(json.remaining, 1, 'beta baseline still present'); + + // Verify on disk + const after = JSON.parse(readFileSync(cacheFile, 'utf-8')); + assert.equal(after['mcp__alpha__t'].baseline, undefined, 'alpha baseline cleared'); + assert.ok(after['mcp__beta__t'].baseline, 'beta baseline preserved'); + cleanup(dir); + }); + + it('idempotent — clearing nonexistent target reports 0 cleared', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + }); + const result = await runCli( + ['--target', 'mcp__no_such__tool'], + { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }, + ); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.cleared, 0); + assert.deepEqual(json.tools, []); + assert.equal(json.remaining, 1, 'unrelated baseline untouched'); + cleanup(dir); + }); +}); + +// --------------------------------------------------------------------------- +// Clear-all mode (no args) +// --------------------------------------------------------------------------- + +describe('mcp-baseline-reset CLI — clear-all mode', () => { + it('with no args, clears all baselines', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + 'mcp__beta__t': makeEntry('Beta description text long enough'), + 'mcp__gamma__t': makeEntry('Gamma description text long enough'), + }); + const result = await runCli([], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.mode, 'reset'); + assert.equal(json.cleared, 3); + assert.equal(json.remaining, 0); + assert.equal(json.tools.length, 3); + + const after = JSON.parse(readFileSync(cacheFile, 'utf-8')); + for (const key of ['mcp__alpha__t', 'mcp__beta__t', 'mcp__gamma__t']) { + assert.equal(after[key].baseline, undefined); + } + cleanup(dir); + }); + + it('idempotent — clear-all on empty cache returns 0', async () => { + const { dir, cacheFile } = makeTmpCache(); + const result = await runCli([], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.cleared, 0); + assert.equal(json.remaining, 0); + cleanup(dir); + }); + + it('preserves description and history after clear', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough', { + history: [{ description: 'older', seenAt: NOW - 5000, distance: 4 }], + }), + }); + await runCli([], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + const after = JSON.parse(readFileSync(cacheFile, 'utf-8')); + const entry = after['mcp__alpha__t']; + assert.equal(entry.baseline, undefined); + assert.equal(entry.description, 'Alpha description text long enough', 'description preserved'); + assert.ok(typeof entry.firstSeen === 'number'); + assert.equal(entry.history.length, 1, 'history preserved'); + cleanup(dir); + }); +}); + +// --------------------------------------------------------------------------- +// Help / unknown args +// --------------------------------------------------------------------------- + +describe('mcp-baseline-reset CLI — misc', () => { + it('--help prints usage and exits 0', async () => { + const result = await runCli(['--help'], {}); + assert.equal(result.code, 0); + assert.ok(/Usage:/i.test(result.stdout)); + }); + + it('bare positional argument is treated as --target', async () => { + const { dir, cacheFile } = makeTmpCache(); + seedCache(cacheFile, { + 'mcp__alpha__t': makeEntry('Alpha description text long enough'), + 'mcp__beta__t': makeEntry('Beta description text long enough'), + }); + const result = await runCli(['mcp__alpha__t'], { LLM_SECURITY_MCP_CACHE_FILE: cacheFile }); + assert.equal(result.code, 0); + const json = parseJson(result.stdout); + assert.equal(json.cleared, 1); + assert.deepEqual(json.tools, ['mcp__alpha__t']); + cleanup(dir); + }); +});