diff --git a/plugins/llm-security/CLAUDE.md b/plugins/llm-security/CLAUDE.md index e94a8e2..368d225 100644 --- a/plugins/llm-security/CLAUDE.md +++ b/plugins/llm-security/CLAUDE.md @@ -105,7 +105,7 @@ deprecation signal because there is nothing to deprecate yet. | `deep-scan-synthesizer-agent` | Scanner JSON → human-readable report (9 scanners) | opus | | `cleaner-agent` | Semi-auto remediation proposals | opus | -## Hooks (8) +## Hooks (9) | Script | Event | Matcher | Purpose | |--------|-------|---------|---------| @@ -117,6 +117,7 @@ deprecation signal because there is nothing to deprecate yet. | `post-mcp-verify.mjs` | PostToolUse | — (all) | Injection scan on ALL tool output (incl. MEDIUM patterns, HITL traps, sub-agent spawn, NL indirection, cognitive load, hybrid P2SQL/recursive/XSS). HTML content trap detection. Bash-specific: secrets/URLs/size. MCP: per-update description drift (MCP05) AND cumulative drift vs sticky baseline (E14, v7.3.0) — slow-burn rug-pulls that stay under the per-update threshold but diverge >=25% from baseline emit MEDIUM `mcp-cumulative-drift` advisory. Per-tool volume tracking | | `post-session-guard.mjs` | PostToolUse | — (all) | Runtime trifecta detection (Rule of Two). Sliding window (20 calls) + 100-call long-horizon. MCP-concentrated trifecta (same server = elevated severity). Sensitive path + exfil detection. Slow-burn trifecta (legs >50 calls apart = MEDIUM). Behavioral drift detection (Jensen-Shannon divergence). CaMeL-inspired data flow tagging (SHA-256 provenance tracking, output→input linking). Mode: `LLM_SECURITY_TRIFECTA_MODE=block\|warn\|off` (default: warn). Cumulative data volume tracking (100KB/500KB/1MB thresholds). Sub-agent delegation tracking (Task/Agent tools): escalation-after-input advisory when delegation occurs within `LLM_SECURITY_ESCALATION_WINDOW` calls (default 5) of untrusted input (DeepMind Agent Traps kat. 4); secondary 20-call MEDIUM advisory catches slow-burn variants outside the primary window (E17, v7.2.0) | | `update-check.mjs` | UserPromptSubmit | — | Checks for newer versions (max 1x/24h, cached). Disable: `LLM_SECURITY_UPDATE_CHECK=off` | +| `pre-compact-scan.mjs` | PreCompact | — | Scan transcript for injection patterns + credentials before context compaction; prevents poisoned content from surviving in compact form. Reads at most last 512 KB for <500ms latency. Mode: `LLM_SECURITY_PRECOMPACT_MODE=block\|warn\|off` (default: warn). Cap: `LLM_SECURITY_PRECOMPACT_MAX_BYTES` | > `pre-install-supply-chain.mjs` covers 7 package managers: npm/yarn/pnpm, pip/pip3/uv, brew, docker, go, cargo, gem. Per-ecosystem blocklists, age gate (<72h), npm audit (critical=block, high=warn), PyPI API inspection, Levenshtein typosquat detection, Docker image verification. diff --git a/plugins/llm-security/tests/lib/doc-consistency.test.mjs b/plugins/llm-security/tests/lib/doc-consistency.test.mjs index ac2d2f6..0f1864f 100644 --- a/plugins/llm-security/tests/lib/doc-consistency.test.mjs +++ b/plugins/llm-security/tests/lib/doc-consistency.test.mjs @@ -84,3 +84,78 @@ describe('doc-consistency — v2 cutoffs are documented in unified prose', () => assert.match(content, /score\s*[≥>=]+\s*65/); }); }); + +// --------------------------------------------------------------------------- +// D4 (Batch C, Wave D): Hooks count must stay synchronized across three +// surfaces — the CLAUDE.md `## Hooks (N)` header, the markdown table directly +// underneath that header, and the canonical hooks/hooks.json definition. +// Drift previously masked a missing `pre-compact-scan.mjs` row in CLAUDE.md. +// This block fails fast if any of the three surfaces drift. +// --------------------------------------------------------------------------- + +describe('doc-consistency — Hooks count consistency (D4)', () => { + const CLAUDE_MD = join(PLUGIN_ROOT, 'CLAUDE.md'); + const HOOKS_JSON = join(PLUGIN_ROOT, 'hooks', 'hooks.json'); + + function readHeaderNumber(text) { + const match = text.match(/^##\s+Hooks\s*\((\d+)\)\s*$/m); + if (!match) throw new Error('No `## Hooks (N)` header found in CLAUDE.md'); + return parseInt(match[1], 10); + } + + function readTableRowCount(text) { + // Section spans from `## Hooks (N)` to next `^## ` heading. + const startIdx = text.search(/^##\s+Hooks\s*\(\d+\)\s*$/m); + if (startIdx < 0) throw new Error('Hooks header not found'); + const tail = text.slice(startIdx); + const nextHeader = tail.search(/\n##\s+\S/); + const section = nextHeader > 0 ? tail.slice(0, nextHeader) : tail; + // Count rows that look like `| \`.mjs\` | ...`. + // Excludes the header row (which uses bare `Script` not a backtick). + const rows = section.match(/^\|\s*`[^`|]+\.mjs`\s*\|/gm) || []; + return rows.length; + } + + function readJsonHookCount(jsonText) { + const parsed = JSON.parse(jsonText); + const seen = new Set(); + for (const eventArr of Object.values(parsed.hooks || {})) { + for (const entry of eventArr) { + for (const h of entry.hooks || []) { + // Dedupe by command path — a hook registered to multiple events + // counts as one script. + if (h.command) seen.add(h.command); + } + } + } + return seen.size; + } + + it('header count, table row count, and hooks.json count agree', () => { + const claudeText = readFileSync(CLAUDE_MD, 'utf-8'); + const hooksJsonText = readFileSync(HOOKS_JSON, 'utf-8'); + + const headerNumber = readHeaderNumber(claudeText); + const tableRowCount = readTableRowCount(claudeText); + const jsonHookCount = readJsonHookCount(hooksJsonText); + + assert.equal( + headerNumber, + jsonHookCount, + `CLAUDE.md '## Hooks (${headerNumber})' header disagrees with hooks/hooks.json (${jsonHookCount} hooks). ` + + `Update the header to match.`, + ); + assert.equal( + tableRowCount, + jsonHookCount, + `CLAUDE.md hooks table has ${tableRowCount} rows but hooks/hooks.json defines ${jsonHookCount} hooks. ` + + `Add/remove rows in the table to match.`, + ); + assert.equal( + headerNumber, + tableRowCount, + `CLAUDE.md header (${headerNumber}) and table row count (${tableRowCount}) disagree. ` + + `These two surfaces must stay in sync.`, + ); + }); +});