/** * TOK Scanner — Token Hotspots / Opus 4.7 patterns * * Detects three structural Opus 4.7-era token-efficiency patterns * (severities recalibrated for tokens/turn impact in v5 F7): * CA-TOK-001 cache-breaking volatile top in CLAUDE.md (high) * CA-TOK-002 redundant tool/permission declarations (medium) * CA-TOK-003 deep @import chain (>2 hops) (low) * * Note: the v4 sonnet-era signature pattern was removed in v5 F5 — too noisy * and not actionable; live token costs are better surfaced by the hotspots * ranking and per-pattern findings. * * Also ranks every discovered config source by estimated tokens and exposes * a `hotspots` array (≤10 entries, possibly fewer for tiny projects) on the * scanner result. * * Pattern catalogue: knowledge/opus-4.7-patterns.md * Token heuristic: estimateTokens() in scanners/lib/active-config-reader.mjs * * Zero external dependencies. */ import { resolve, dirname, isAbsolute } from 'node:path'; import { stat } from 'node:fs/promises'; import { readTextFile } from './lib/file-discovery.mjs'; import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { findImports, parseJson, parseFrontmatter } from './lib/yaml-parser.mjs'; import { estimateTokens, readActiveConfig } from './lib/active-config-reader.mjs'; const SCANNER = 'TOK'; const VOLATILE_TOP_LINES = 30; const VOLATILE_PATTERNS = [ /\{timestamp\}/i, /\{uuid\}/i, /\{date\}/i, /\{session(?:_id)?\}/i, /\bactivity log\b/i, /^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/m, // ISO timestamps at line start /^\s*\[\d{4}-\d{2}-\d{2}/m, // [YYYY-MM-DD ...] log lines ]; const MAX_IMPORT_DEPTH = 2; // v5 M4: cascades above this contribute >10k tokens to every turn even before // any tool description loads. Heuristic for "context budget under pressure". const CASCADE_TOKEN_THRESHOLD = 10_000; // v5 M2: SKILL.md `description` loads on every turn even when the body does // not. Anything past this hints the description is doing the body's job. const SKILL_DESCRIPTION_THRESHOLD = 500; // v5 N1: MCP tool-schema budget thresholds (CA-TOK-005). Tool descriptions // load on every turn — high tool counts inflate the per-turn schema payload // regardless of whether the tools are invoked. Tiered severity per server: // < 20 → no finding // 20–49 → low // 50–99 → medium // 100+ → high // null → low ("tool count unknown" — manifest not parseable) const MCP_BUDGET_LOW = 20; const MCP_BUDGET_MEDIUM = 50; const MCP_BUDGET_HIGH = 100; const HOTSPOTS_MAX = 10; // v5 F7: shared evidence note appended to every TOK pattern finding. // Communicates that severity reflects a structural heuristic, not measured // runtime telemetry — tells reviewers how to interpret the rating. const CALIBRATION_NOTE = 'severity reflects estimated tokens/turn based on structural heuristic; ' + 'not measured against runtime telemetry'; /** * Classify a discovered config file into a token-estimation kind. */ function tokenKind(type) { if (type === 'claude-md' || type === 'agent-md' || type === 'command-md' || type === 'skill-md' || type === 'rule') { return 'markdown'; } if (type === 'settings-json' || type === 'mcp-json' || type === 'hooks-json' || type === 'plugin-json' || type === 'claude-json' || type === 'keybindings-json') { return 'json'; } return 'markdown'; } async function fileExists(absPath) { try { await stat(absPath); return true; } catch { return false; } } function resolveImportPath(importPath, fromFile) { let p = importPath.trim(); if (!p) return null; if (p.startsWith('~/')) { const home = process.env.HOME || process.env.USERPROFILE || ''; p = resolve(home, p.slice(2)); } else if (p.startsWith('~')) { const home = process.env.HOME || process.env.USERPROFILE || ''; p = resolve(home, p.slice(1)); } else if (!isAbsolute(p)) { p = resolve(dirname(fromFile), p); } return p; } /** * Compute the deepest @import chain reachable from `startFile`. * Returns max depth observed (0 = no imports, 1 = direct import, etc.). */ async function maxImportDepth(startFile, contentCache) { let maxDepth = 0; async function walk(file, depth, visited) { if (depth > 20 || visited.has(file)) return; visited.add(file); if (depth > maxDepth) maxDepth = depth; let content = contentCache.get(file); if (content === undefined) { content = await readTextFile(file); contentCache.set(file, content); } if (!content) return; const imports = findImports(content); for (const imp of imports) { const target = resolveImportPath(imp.path, file); if (!target) continue; if (!(await fileExists(target))) continue; await walk(target, depth + 1, new Set(visited)); } } await walk(startFile, 0, new Set()); return maxDepth; } /** * Classify an MCP server's tool count into a budget tier (v5 N1). * * Returns null if no finding should be emitted (toolCount < 20). Otherwise * returns { severity, tier, kind } where kind is 'unknown' (toolCount===null) * or 'counted'. Threshold ladder: 20 → low, 50 → medium, 100 → high. Null * toolCount maps to low + 'unknown' so users can see opaque servers without * the scanner pretending they're free. */ function classifyMcpToolBudget(toolCount) { if (toolCount === null) { return { severity: SEVERITY.low, tier: 'unknown', kind: 'unknown' }; } if (typeof toolCount !== 'number' || toolCount < MCP_BUDGET_LOW) return null; if (toolCount >= MCP_BUDGET_HIGH) return { severity: SEVERITY.high, tier: '100+', kind: 'counted' }; if (toolCount >= MCP_BUDGET_MEDIUM) return { severity: SEVERITY.medium, tier: '50-99', kind: 'counted' }; return { severity: SEVERITY.low, tier: '20-49', kind: 'counted' }; } /** * Detect cache-breaking volatile content in the first VOLATILE_TOP_LINES * lines of a CLAUDE.md file. */ function detectVolatileTop(content) { if (!content) return false; const top = content.split('\n').slice(0, VOLATILE_TOP_LINES).join('\n'); return VOLATILE_PATTERNS.some(rx => rx.test(top)); } /** * Detect redundant or overlapping permission entries in a settings JSON object. * Returns array of `{list, entry, reason}` for reporting. */ function detectRedundantPermissions(settings) { const issues = []; if (!settings || typeof settings !== 'object') return issues; const perms = settings.permissions; if (!perms || typeof perms !== 'object') return issues; for (const list of ['allow', 'deny', 'ask']) { const arr = perms[list]; if (!Array.isArray(arr)) continue; const seen = new Set(); for (const entry of arr) { if (typeof entry !== 'string') continue; // Exact duplicate if (seen.has(entry)) { issues.push({ list, entry, reason: 'duplicate entry' }); continue; } seen.add(entry); } // Subset detection: an entry like `Read(src/**)` is redundant if `Read(**)` // or bare `Read` is also present in the same list. for (const entry of arr) { if (typeof entry !== 'string') continue; const tool = entry.replace(/\(.*\)$/, '').trim(); const hasBare = arr.includes(tool); const hasWildcard = arr.includes(`${tool}(**)`) || arr.includes(`${tool}(*)`); const isBare = entry === tool; const isWildcard = entry === `${tool}(**)` || entry === `${tool}(*)`; if (!isBare && !isWildcard && (hasBare || hasWildcard)) { issues.push({ list, entry, reason: `overlapped by ${hasBare ? tool : `${tool}(**)`}` }); } } } return issues; } /** * Build the ranked hotspots array. * * v5 F1: when activeConfig is available, expand each MCP server into its own * hotspot entry (richer signal than the parent .mcp.json file). Discovery * files remain the primary source for CLAUDE.md / settings / skills. */ async function buildHotspots(discovery, targetPath, activeConfig) { const ranked = []; for (const f of discovery.files) { const kind = tokenKind(f.type); const tokens = estimateTokens(f.size, kind); if (tokens <= 0) continue; ranked.push({ absPath: f.absPath, relPath: f.relPath || f.absPath.replace(targetPath + '/', ''), type: f.type, scope: f.scope, size: f.size, estimated_tokens: tokens, }); } // Per-MCP-server entries from activeConfig (each ~500+ tokens at runtime, // not represented by the parent .mcp.json file size alone). if (activeConfig && Array.isArray(activeConfig.mcpServers)) { for (const m of activeConfig.mcpServers) { if (!m || !m.enabled) continue; ranked.push({ absPath: m.source || `mcp:${m.name}`, relPath: `mcp:${m.name} (${m.source})`, type: 'mcp-server', scope: m.source, size: 0, estimated_tokens: m.estimatedTokens || 0, }); } } ranked.sort((a, b) => b.estimated_tokens - a.estimated_tokens); const top = ranked.slice(0, HOTSPOTS_MAX); const out = []; for (let i = 0; i < top.length; i++) { const h = top[i]; const entry = { source: h.relPath || h.absPath, estimated_tokens: h.estimated_tokens, rank: i + 1, recommendations: hotspotRecommendations(h), }; // Expose the on-disk path for file-backed hotspots so the // --accurate-tokens calibration in token-hotspots-cli can read content. // MCP-server hotspots are virtual (runtime tool-schema, not file content) // so their path stays unset and calibration skips them. if (h.type !== 'mcp-server' && h.absPath) { entry.path = h.absPath; } out.push(entry); } return out; } function hotspotRecommendations(h) { const recs = []; if (h.type === 'claude-md') { recs.push('Move volatile top-of-file content to the bottom or extract to an @import-ed file.'); recs.push('Split overlong CLAUDE.md into focused @imports (≤200 lines each).'); } else if (h.type === 'settings-json' || h.type === 'mcp-json' || h.type === 'hooks-json') { recs.push('Deduplicate overlapping entries — each duplicate inflates the per-turn schema payload.'); recs.push('Move rarely-used permissions to a project-local override.'); } else if (h.type === 'skill-md' || h.type === 'agent-md' || h.type === 'command-md') { recs.push('Tighten the description field — it loads on every turn even when the body does not.'); } else { recs.push('Review whether this source needs to load on every turn.'); } // Always cap to 1–3 recommendations return recs.slice(0, 3); } /** * Main scanner entry point. * @param {string} targetPath * @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>, skipped?:number}} discovery */ export async function scan(targetPath, discovery) { const start = Date.now(); const findings = []; let filesScanned = 0; const contentCache = new Map(); // v5 F1: pull active-config snapshot once. Failures are non-fatal — the // scanner falls back to the discovery-only path used in v4. let activeConfig = null; try { activeConfig = await readActiveConfig(targetPath, {}); } catch { activeConfig = null; } // ── Pattern A: cache-breaking volatile top in CLAUDE.md ── for (const f of discovery.files) { if (f.type !== 'claude-md') continue; filesScanned++; const content = await readTextFile(f.absPath); contentCache.set(f.absPath, content); if (detectVolatileTop(content)) { findings.push(finding({ scanner: SCANNER, severity: SEVERITY.high, title: 'Cache-breaking volatile content at top of CLAUDE.md', description: `The first ${VOLATILE_TOP_LINES} lines of ${f.relPath || f.absPath} contain volatile ` + 'tokens (timestamps, session ids, or activity logs). Volatile content above stable ' + 'content defeats Opus 4.7 prompt-cache reuse on every turn.', file: f.absPath, evidence: CALIBRATION_NOTE, recommendation: 'Move volatile sections to the bottom of the file, or extract them to an @import-ed ' + 'file outside the cached prefix. Keep the first 30 lines stable across turns.', category: 'token-efficiency', })); } } // ── Pattern B: redundant tool/permission declarations ── for (const f of discovery.files) { if (f.type !== 'settings-json') continue; filesScanned++; const content = await readTextFile(f.absPath); if (!content) continue; const parsed = parseJson(content); if (!parsed) continue; const issues = detectRedundantPermissions(parsed); if (issues.length === 0) continue; findings.push(finding({ scanner: SCANNER, severity: SEVERITY.medium, title: 'Redundant permission declarations', description: `${f.relPath || f.absPath} contains ${issues.length} redundant or overlapping ` + `permission entr${issues.length === 1 ? 'y' : 'ies'}. Each duplicate inflates the ` + 'tool-schema payload sent on every turn.', file: f.absPath, evidence: issues.slice(0, 5).map(i => `${i.list}: "${i.entry}" (${i.reason})`).join('; ') + ` — ${CALIBRATION_NOTE}`, recommendation: 'Deduplicate the permissions.allow / permissions.deny arrays. Prefer the most ' + 'specific entry that still grants the intended access.', category: 'token-efficiency', })); } // ── Pattern C: deep @import chain (>2 hops) ── for (const f of discovery.files) { if (f.type !== 'claude-md') continue; const depth = await maxImportDepth(f.absPath, contentCache); if (depth > MAX_IMPORT_DEPTH) { findings.push(finding({ scanner: SCANNER, severity: SEVERITY.low, title: 'Deep @import chain defeats prompt-cache reuse', description: `${f.relPath || f.absPath} reaches @import depth ${depth} (>${MAX_IMPORT_DEPTH} hops). ` + 'Each @import boundary fragments the prompt-cache prefix; deeply chained imports ' + 'defeat caching for the deepest content even when it never changes.', file: f.absPath, evidence: `Max chain depth: ${depth} — ${CALIBRATION_NOTE}`, recommendation: 'Flatten the @import chain to ≤2 hops. Inline the deepest layer back into its parent.', category: 'token-efficiency', })); } } // ── Pattern F: SKILL.md description > 500 chars (v5 M2) ── // Scoped to discovery.files (project-local skill-md). The plan mentioned // walking activeConfig.skills, but that pulls in user's ~/.claude/skills // and installed plugin skills which are out-of-scope for a project audit // and add noise the user can't act on. Project-local discovery is what // /config-audit on a path is actually asking about. for (const f of discovery.files) { if (f.type !== 'skill-md') continue; const content = await readTextFile(f.absPath); if (!content) continue; filesScanned++; const fm = parseFrontmatter(content)?.frontmatter || null; const desc = (fm && typeof fm.description === 'string') ? fm.description : ''; if (desc.length <= SKILL_DESCRIPTION_THRESHOLD) continue; const skillName = (fm && fm.name) || f.absPath.split('/').slice(-2, -1)[0] || f.absPath; findings.push(finding({ scanner: SCANNER, severity: SEVERITY.low, title: 'Bloated skill description (loads on every turn)', description: `Skill "${skillName}" has a description of ${desc.length} characters ` + `(>${SKILL_DESCRIPTION_THRESHOLD}). The description block loads on every turn ` + 'even when the skill body does not — long descriptions inflate per-turn cost.', file: f.absPath, evidence: `description_chars=${desc.length}; threshold=${SKILL_DESCRIPTION_THRESHOLD}; ` + `skill="${skillName}" — ${CALIBRATION_NOTE}`, recommendation: 'Tighten the description to a single sentence (≤500 chars) covering trigger phrases ' + 'only. Move detailed usage / examples into the SKILL.md body.', category: 'token-efficiency', })); } // ── Pattern G: MCP tool-schema budget per server (v5 N1, CA-TOK-005) ── // Scope: project-local .mcp.json only. Plugin- and ~/.claude.json-sourced // servers are global concerns surfaced by the manifest scanner; scoping the // finding here to .mcp.json keeps /config-audit actionable for the // path the user is auditing. if (activeConfig && Array.isArray(activeConfig.mcpServers)) { for (const m of activeConfig.mcpServers) { if (!m || !m.enabled) continue; if (m.source !== '.mcp.json') continue; const budget = classifyMcpToolBudget(m.toolCount); if (!budget) continue; const severity = budget.severity; const sourceLabel = m.source ? `${m.name} (${m.source})` : m.name; const isUnknown = budget.kind === 'unknown'; const description = isUnknown ? `MCP server "${sourceLabel}" has tool count unknown — could not parse manifest ` + 'or cached tools/list. Tool schemas load on every turn; an unverified server ' + 'may be inflating the per-turn payload silently.' : `MCP server "${sourceLabel}" exposes ${m.toolCount} tools. Tool schemas load on ` + 'every turn regardless of which tools the model actually invokes — high tool ' + 'counts inflate the per-turn payload and crowd out usable context.'; const evidence = isUnknown ? `tool_count=unknown; server="${m.name}"; source="${m.source}" — ${CALIBRATION_NOTE}` : `tool_count=${m.toolCount}; tier=${budget.tier}; server="${m.name}"; ` + `source="${m.source}" — ${CALIBRATION_NOTE}`; const recommendation = isUnknown ? 'Install the package locally (so detect-mcp-tool-count can read its manifest), ' + 'or run the server once and cache its tools/list response under ' + '~/.claude/config-audit/mcp-cache/.json. See knowledge/cache-telemetry-recipe.md.' : 'Use the server\'s `tools/filter` config (or equivalent) to expose only the tools ' + 'this project actually needs. Consider splitting heavy MCP servers across project- ' + 'and user-scopes so per-project budget stays tight.'; findings.push(finding({ scanner: SCANNER, severity, title: `High MCP tool-schema budget on server "${m.name}"`, description, file: m.source && m.source !== `mcp:${m.name}` ? m.source : null, evidence, recommendation, category: 'token-efficiency', })); } } // ── Pattern E: CLAUDE.md cascade > CASCADE_TOKEN_THRESHOLD (v5 M4) ── if (activeConfig?.claudeMd?.estimatedTokens > CASCADE_TOKEN_THRESHOLD) { const cascadeTokens = activeConfig.claudeMd.estimatedTokens; const fileCount = activeConfig.claudeMd.files?.length ?? 0; findings.push(finding({ scanner: SCANNER, severity: SEVERITY.medium, title: 'CLAUDE.md cascade exceeds 10k tokens per turn', description: `The active CLAUDE.md cascade for this repo (${fileCount} files: managed + user + ` + `ancestors + project + @imports) totals ~${cascadeTokens} tokens. Every turn loads this ` + 'whole prefix; budget pressure compounds with tool schemas and MCP servers.', file: activeConfig.claudeMd.files?.find(f => f.scope === 'project')?.path || null, evidence: `cascade_tokens=${cascadeTokens}; threshold=${CASCADE_TOKEN_THRESHOLD}; ` + `files=${fileCount} — ${CALIBRATION_NOTE}`, recommendation: 'Trim the user/project CLAUDE.md, push reference material into @imports that load ' + 'on-demand, or move long sections to skills. Aim for <10k tokens in the cascade.', category: 'token-efficiency', })); } // ── Hotspots ranking ── const hotspots = await buildHotspots(discovery, targetPath, activeConfig); // ── Total estimated tokens (sum of every discovered source + activeConfig MCP) ── let totalTokens = 0; for (const f of discovery.files) { totalTokens += estimateTokens(f.size, tokenKind(f.type)); } if (activeConfig && Array.isArray(activeConfig.mcpServers)) { for (const m of activeConfig.mcpServers) { if (m && m.enabled) totalTokens += m.estimatedTokens || 0; } } const result = scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start); result.hotspots = hotspots; result.total_estimated_tokens = totalTokens; if (activeConfig) { result.activeConfig = { claudeMdEstimatedTokens: activeConfig.claudeMd?.estimatedTokens ?? 0, mcpServerCount: activeConfig.mcpServers?.length ?? 0, pluginCount: activeConfig.plugins?.length ?? 0, skillCount: activeConfig.skills?.length ?? 0, }; } return result; }