ktg-plugin-marketplace/plugins/config-audit/scanners/token-hotspots.mjs
Kjell Tore Guttormsen 6cfca82885 fix(config-audit): expose hotspot.path for --accurate-tokens calibration + SC-6b PASS
The v5.0.0-rc.1 N5 implementation looked up hotspot.path in
calibrateAgainstApi() but token-hotspots.mjs only emitted hotspot.source —
calibration silently produced 0 actual_tokens because every iteration hit
the `if (!hotspot?.path) continue` guard.

Fix: file-backed hotspots now expose `path: h.absPath` in the JSON output.
MCP-server hotspots intentionally leave path unset — their tokens are
runtime tool-schema (formula-based: 500 + toolCount × 200), not file
content readable by count_tokens.

SC-6b release-gate verified against tests/fixtures/marketplace-large:
- Actual (count_tokens, claude-opus-4-7): 589 tokens for CLAUDE.md
- Estimated (4-bytes/token byte heuristic): 594 tokens
- Delta: -5 tokens / -0.85% — well within ±5% gate. PASS.

CHANGELOG: documented the fix + SC-6b result inline under [5.0.0].

All 635 tests still green. No estimateTokens tuning required for v5.0.0.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 09:45:56 +02:00

508 lines
20 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* TOK Scanner — Token Hotspots / Opus 4.7 patterns
*
* Detects three structural Opus 4.7-era token-efficiency patterns
* (severities recalibrated for tokens/turn impact in v5 F7):
* CA-TOK-001 cache-breaking volatile top in CLAUDE.md (high)
* CA-TOK-002 redundant tool/permission declarations (medium)
* CA-TOK-003 deep @import chain (>2 hops) (low)
*
* Note: the v4 sonnet-era signature pattern was removed in v5 F5 — too noisy
* and not actionable; live token costs are better surfaced by the hotspots
* ranking and per-pattern findings.
*
* Also ranks every discovered config source by estimated tokens and exposes
* a `hotspots` array (≤10 entries, possibly fewer for tiny projects) on the
* scanner result.
*
* Pattern catalogue: knowledge/opus-4.7-patterns.md
* Token heuristic: estimateTokens() in scanners/lib/active-config-reader.mjs
*
* Zero external dependencies.
*/
import { resolve, dirname, isAbsolute } from 'node:path';
import { stat } from 'node:fs/promises';
import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
import { findImports, parseJson, parseFrontmatter } from './lib/yaml-parser.mjs';
import { estimateTokens, readActiveConfig } from './lib/active-config-reader.mjs';
const SCANNER = 'TOK';
const VOLATILE_TOP_LINES = 30;
const VOLATILE_PATTERNS = [
/\{timestamp\}/i,
/\{uuid\}/i,
/\{date\}/i,
/\{session(?:_id)?\}/i,
/\bactivity log\b/i,
/^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/m, // ISO timestamps at line start
/^\s*\[\d{4}-\d{2}-\d{2}/m, // [YYYY-MM-DD ...] log lines
];
const MAX_IMPORT_DEPTH = 2;
// v5 M4: cascades above this contribute >10k tokens to every turn even before
// any tool description loads. Heuristic for "context budget under pressure".
const CASCADE_TOKEN_THRESHOLD = 10_000;
// v5 M2: SKILL.md `description` loads on every turn even when the body does
// not. Anything past this hints the description is doing the body's job.
const SKILL_DESCRIPTION_THRESHOLD = 500;
// v5 N1: MCP tool-schema budget thresholds (CA-TOK-005). Tool descriptions
// load on every turn — high tool counts inflate the per-turn schema payload
// regardless of whether the tools are invoked. Tiered severity per server:
// < 20 → no finding
// 2049 → low
// 5099 → medium
// 100+ → high
// null → low ("tool count unknown" — manifest not parseable)
const MCP_BUDGET_LOW = 20;
const MCP_BUDGET_MEDIUM = 50;
const MCP_BUDGET_HIGH = 100;
const HOTSPOTS_MAX = 10;
// v5 F7: shared evidence note appended to every TOK pattern finding.
// Communicates that severity reflects a structural heuristic, not measured
// runtime telemetry — tells reviewers how to interpret the rating.
const CALIBRATION_NOTE =
'severity reflects estimated tokens/turn based on structural heuristic; ' +
'not measured against runtime telemetry';
/**
* Classify a discovered config file into a token-estimation kind.
*/
function tokenKind(type) {
if (type === 'claude-md' || type === 'agent-md' || type === 'command-md' || type === 'skill-md' || type === 'rule') {
return 'markdown';
}
if (type === 'settings-json' || type === 'mcp-json' || type === 'hooks-json' ||
type === 'plugin-json' || type === 'claude-json' || type === 'keybindings-json') {
return 'json';
}
return 'markdown';
}
async function fileExists(absPath) {
try { await stat(absPath); return true; } catch { return false; }
}
function resolveImportPath(importPath, fromFile) {
let p = importPath.trim();
if (!p) return null;
if (p.startsWith('~/')) {
const home = process.env.HOME || process.env.USERPROFILE || '';
p = resolve(home, p.slice(2));
} else if (p.startsWith('~')) {
const home = process.env.HOME || process.env.USERPROFILE || '';
p = resolve(home, p.slice(1));
} else if (!isAbsolute(p)) {
p = resolve(dirname(fromFile), p);
}
return p;
}
/**
* Compute the deepest @import chain reachable from `startFile`.
* Returns max depth observed (0 = no imports, 1 = direct import, etc.).
*/
async function maxImportDepth(startFile, contentCache) {
let maxDepth = 0;
async function walk(file, depth, visited) {
if (depth > 20 || visited.has(file)) return;
visited.add(file);
if (depth > maxDepth) maxDepth = depth;
let content = contentCache.get(file);
if (content === undefined) {
content = await readTextFile(file);
contentCache.set(file, content);
}
if (!content) return;
const imports = findImports(content);
for (const imp of imports) {
const target = resolveImportPath(imp.path, file);
if (!target) continue;
if (!(await fileExists(target))) continue;
await walk(target, depth + 1, new Set(visited));
}
}
await walk(startFile, 0, new Set());
return maxDepth;
}
/**
* Classify an MCP server's tool count into a budget tier (v5 N1).
*
* Returns null if no finding should be emitted (toolCount < 20). Otherwise
* returns { severity, tier, kind } where kind is 'unknown' (toolCount===null)
* or 'counted'. Threshold ladder: 20 → low, 50 → medium, 100 → high. Null
* toolCount maps to low + 'unknown' so users can see opaque servers without
* the scanner pretending they're free.
*/
function classifyMcpToolBudget(toolCount) {
if (toolCount === null) {
return { severity: SEVERITY.low, tier: 'unknown', kind: 'unknown' };
}
if (typeof toolCount !== 'number' || toolCount < MCP_BUDGET_LOW) return null;
if (toolCount >= MCP_BUDGET_HIGH) return { severity: SEVERITY.high, tier: '100+', kind: 'counted' };
if (toolCount >= MCP_BUDGET_MEDIUM) return { severity: SEVERITY.medium, tier: '50-99', kind: 'counted' };
return { severity: SEVERITY.low, tier: '20-49', kind: 'counted' };
}
/**
* Detect cache-breaking volatile content in the first VOLATILE_TOP_LINES
* lines of a CLAUDE.md file.
*/
function detectVolatileTop(content) {
if (!content) return false;
const top = content.split('\n').slice(0, VOLATILE_TOP_LINES).join('\n');
return VOLATILE_PATTERNS.some(rx => rx.test(top));
}
/**
* Detect redundant or overlapping permission entries in a settings JSON object.
* Returns array of `{list, entry, reason}` for reporting.
*/
function detectRedundantPermissions(settings) {
const issues = [];
if (!settings || typeof settings !== 'object') return issues;
const perms = settings.permissions;
if (!perms || typeof perms !== 'object') return issues;
for (const list of ['allow', 'deny', 'ask']) {
const arr = perms[list];
if (!Array.isArray(arr)) continue;
const seen = new Set();
for (const entry of arr) {
if (typeof entry !== 'string') continue;
// Exact duplicate
if (seen.has(entry)) {
issues.push({ list, entry, reason: 'duplicate entry' });
continue;
}
seen.add(entry);
}
// Subset detection: an entry like `Read(src/**)` is redundant if `Read(**)`
// or bare `Read` is also present in the same list.
for (const entry of arr) {
if (typeof entry !== 'string') continue;
const tool = entry.replace(/\(.*\)$/, '').trim();
const hasBare = arr.includes(tool);
const hasWildcard = arr.includes(`${tool}(**)`) || arr.includes(`${tool}(*)`);
const isBare = entry === tool;
const isWildcard = entry === `${tool}(**)` || entry === `${tool}(*)`;
if (!isBare && !isWildcard && (hasBare || hasWildcard)) {
issues.push({ list, entry, reason: `overlapped by ${hasBare ? tool : `${tool}(**)`}` });
}
}
}
return issues;
}
/**
* Build the ranked hotspots array.
*
* v5 F1: when activeConfig is available, expand each MCP server into its own
* hotspot entry (richer signal than the parent .mcp.json file). Discovery
* files remain the primary source for CLAUDE.md / settings / skills.
*/
async function buildHotspots(discovery, targetPath, activeConfig) {
const ranked = [];
for (const f of discovery.files) {
const kind = tokenKind(f.type);
const tokens = estimateTokens(f.size, kind);
if (tokens <= 0) continue;
ranked.push({
absPath: f.absPath,
relPath: f.relPath || f.absPath.replace(targetPath + '/', ''),
type: f.type,
scope: f.scope,
size: f.size,
estimated_tokens: tokens,
});
}
// Per-MCP-server entries from activeConfig (each ~500+ tokens at runtime,
// not represented by the parent .mcp.json file size alone).
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
for (const m of activeConfig.mcpServers) {
if (!m || !m.enabled) continue;
ranked.push({
absPath: m.source || `mcp:${m.name}`,
relPath: `mcp:${m.name} (${m.source})`,
type: 'mcp-server',
scope: m.source,
size: 0,
estimated_tokens: m.estimatedTokens || 0,
});
}
}
ranked.sort((a, b) => b.estimated_tokens - a.estimated_tokens);
const top = ranked.slice(0, HOTSPOTS_MAX);
const out = [];
for (let i = 0; i < top.length; i++) {
const h = top[i];
const entry = {
source: h.relPath || h.absPath,
estimated_tokens: h.estimated_tokens,
rank: i + 1,
recommendations: hotspotRecommendations(h),
};
// Expose the on-disk path for file-backed hotspots so the
// --accurate-tokens calibration in token-hotspots-cli can read content.
// MCP-server hotspots are virtual (runtime tool-schema, not file content)
// so their path stays unset and calibration skips them.
if (h.type !== 'mcp-server' && h.absPath) {
entry.path = h.absPath;
}
out.push(entry);
}
return out;
}
function hotspotRecommendations(h) {
const recs = [];
if (h.type === 'claude-md') {
recs.push('Move volatile top-of-file content to the bottom or extract to an @import-ed file.');
recs.push('Split overlong CLAUDE.md into focused @imports (≤200 lines each).');
} else if (h.type === 'settings-json' || h.type === 'mcp-json' || h.type === 'hooks-json') {
recs.push('Deduplicate overlapping entries — each duplicate inflates the per-turn schema payload.');
recs.push('Move rarely-used permissions to a project-local override.');
} else if (h.type === 'skill-md' || h.type === 'agent-md' || h.type === 'command-md') {
recs.push('Tighten the description field — it loads on every turn even when the body does not.');
} else {
recs.push('Review whether this source needs to load on every turn.');
}
// Always cap to 13 recommendations
return recs.slice(0, 3);
}
/**
* Main scanner entry point.
* @param {string} targetPath
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>, skipped?:number}} discovery
*/
export async function scan(targetPath, discovery) {
const start = Date.now();
const findings = [];
let filesScanned = 0;
const contentCache = new Map();
// v5 F1: pull active-config snapshot once. Failures are non-fatal — the
// scanner falls back to the discovery-only path used in v4.
let activeConfig = null;
try {
activeConfig = await readActiveConfig(targetPath, {});
} catch {
activeConfig = null;
}
// ── Pattern A: cache-breaking volatile top in CLAUDE.md ──
for (const f of discovery.files) {
if (f.type !== 'claude-md') continue;
filesScanned++;
const content = await readTextFile(f.absPath);
contentCache.set(f.absPath, content);
if (detectVolatileTop(content)) {
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.high,
title: 'Cache-breaking volatile content at top of CLAUDE.md',
description:
`The first ${VOLATILE_TOP_LINES} lines of ${f.relPath || f.absPath} contain volatile ` +
'tokens (timestamps, session ids, or activity logs). Volatile content above stable ' +
'content defeats Opus 4.7 prompt-cache reuse on every turn.',
file: f.absPath,
evidence: CALIBRATION_NOTE,
recommendation:
'Move volatile sections to the bottom of the file, or extract them to an @import-ed ' +
'file outside the cached prefix. Keep the first 30 lines stable across turns.',
category: 'token-efficiency',
}));
}
}
// ── Pattern B: redundant tool/permission declarations ──
for (const f of discovery.files) {
if (f.type !== 'settings-json') continue;
filesScanned++;
const content = await readTextFile(f.absPath);
if (!content) continue;
const parsed = parseJson(content);
if (!parsed) continue;
const issues = detectRedundantPermissions(parsed);
if (issues.length === 0) continue;
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.medium,
title: 'Redundant permission declarations',
description:
`${f.relPath || f.absPath} contains ${issues.length} redundant or overlapping ` +
`permission entr${issues.length === 1 ? 'y' : 'ies'}. Each duplicate inflates the ` +
'tool-schema payload sent on every turn.',
file: f.absPath,
evidence:
issues.slice(0, 5).map(i => `${i.list}: "${i.entry}" (${i.reason})`).join('; ') +
`${CALIBRATION_NOTE}`,
recommendation:
'Deduplicate the permissions.allow / permissions.deny arrays. Prefer the most ' +
'specific entry that still grants the intended access.',
category: 'token-efficiency',
}));
}
// ── Pattern C: deep @import chain (>2 hops) ──
for (const f of discovery.files) {
if (f.type !== 'claude-md') continue;
const depth = await maxImportDepth(f.absPath, contentCache);
if (depth > MAX_IMPORT_DEPTH) {
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.low,
title: 'Deep @import chain defeats prompt-cache reuse',
description:
`${f.relPath || f.absPath} reaches @import depth ${depth} (>${MAX_IMPORT_DEPTH} hops). ` +
'Each @import boundary fragments the prompt-cache prefix; deeply chained imports ' +
'defeat caching for the deepest content even when it never changes.',
file: f.absPath,
evidence: `Max chain depth: ${depth}${CALIBRATION_NOTE}`,
recommendation:
'Flatten the @import chain to ≤2 hops. Inline the deepest layer back into its parent.',
category: 'token-efficiency',
}));
}
}
// ── Pattern F: SKILL.md description > 500 chars (v5 M2) ──
// Scoped to discovery.files (project-local skill-md). The plan mentioned
// walking activeConfig.skills, but that pulls in user's ~/.claude/skills
// and installed plugin skills which are out-of-scope for a project audit
// and add noise the user can't act on. Project-local discovery is what
// /config-audit on a path is actually asking about.
for (const f of discovery.files) {
if (f.type !== 'skill-md') continue;
const content = await readTextFile(f.absPath);
if (!content) continue;
filesScanned++;
const fm = parseFrontmatter(content)?.frontmatter || null;
const desc = (fm && typeof fm.description === 'string') ? fm.description : '';
if (desc.length <= SKILL_DESCRIPTION_THRESHOLD) continue;
const skillName = (fm && fm.name) || f.absPath.split('/').slice(-2, -1)[0] || f.absPath;
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.low,
title: 'Bloated skill description (loads on every turn)',
description:
`Skill "${skillName}" has a description of ${desc.length} characters ` +
`(>${SKILL_DESCRIPTION_THRESHOLD}). The description block loads on every turn ` +
'even when the skill body does not — long descriptions inflate per-turn cost.',
file: f.absPath,
evidence:
`description_chars=${desc.length}; threshold=${SKILL_DESCRIPTION_THRESHOLD}; ` +
`skill="${skillName}" — ${CALIBRATION_NOTE}`,
recommendation:
'Tighten the description to a single sentence (≤500 chars) covering trigger phrases ' +
'only. Move detailed usage / examples into the SKILL.md body.',
category: 'token-efficiency',
}));
}
// ── Pattern G: MCP tool-schema budget per server (v5 N1, CA-TOK-005) ──
// Scope: project-local .mcp.json only. Plugin- and ~/.claude.json-sourced
// servers are global concerns surfaced by the manifest scanner; scoping the
// finding here to .mcp.json keeps /config-audit <path> actionable for the
// path the user is auditing.
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
for (const m of activeConfig.mcpServers) {
if (!m || !m.enabled) continue;
if (m.source !== '.mcp.json') continue;
const budget = classifyMcpToolBudget(m.toolCount);
if (!budget) continue;
const severity = budget.severity;
const sourceLabel = m.source ? `${m.name} (${m.source})` : m.name;
const isUnknown = budget.kind === 'unknown';
const description = isUnknown
? `MCP server "${sourceLabel}" has tool count unknown — could not parse manifest ` +
'or cached tools/list. Tool schemas load on every turn; an unverified server ' +
'may be inflating the per-turn payload silently.'
: `MCP server "${sourceLabel}" exposes ${m.toolCount} tools. Tool schemas load on ` +
'every turn regardless of which tools the model actually invokes — high tool ' +
'counts inflate the per-turn payload and crowd out usable context.';
const evidence = isUnknown
? `tool_count=unknown; server="${m.name}"; source="${m.source}" — ${CALIBRATION_NOTE}`
: `tool_count=${m.toolCount}; tier=${budget.tier}; server="${m.name}"; ` +
`source="${m.source}" — ${CALIBRATION_NOTE}`;
const recommendation = isUnknown
? 'Install the package locally (so detect-mcp-tool-count can read its manifest), ' +
'or run the server once and cache its tools/list response under ' +
'~/.claude/config-audit/mcp-cache/<name>.json. See knowledge/cache-telemetry-recipe.md.'
: 'Use the server\'s `tools/filter` config (or equivalent) to expose only the tools ' +
'this project actually needs. Consider splitting heavy MCP servers across project- ' +
'and user-scopes so per-project budget stays tight.';
findings.push(finding({
scanner: SCANNER,
severity,
title: `High MCP tool-schema budget on server "${m.name}"`,
description,
file: m.source && m.source !== `mcp:${m.name}` ? m.source : null,
evidence,
recommendation,
category: 'token-efficiency',
}));
}
}
// ── Pattern E: CLAUDE.md cascade > CASCADE_TOKEN_THRESHOLD (v5 M4) ──
if (activeConfig?.claudeMd?.estimatedTokens > CASCADE_TOKEN_THRESHOLD) {
const cascadeTokens = activeConfig.claudeMd.estimatedTokens;
const fileCount = activeConfig.claudeMd.files?.length ?? 0;
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.medium,
title: 'CLAUDE.md cascade exceeds 10k tokens per turn',
description:
`The active CLAUDE.md cascade for this repo (${fileCount} files: managed + user + ` +
`ancestors + project + @imports) totals ~${cascadeTokens} tokens. Every turn loads this ` +
'whole prefix; budget pressure compounds with tool schemas and MCP servers.',
file: activeConfig.claudeMd.files?.find(f => f.scope === 'project')?.path || null,
evidence:
`cascade_tokens=${cascadeTokens}; threshold=${CASCADE_TOKEN_THRESHOLD}; ` +
`files=${fileCount}${CALIBRATION_NOTE}`,
recommendation:
'Trim the user/project CLAUDE.md, push reference material into @imports that load ' +
'on-demand, or move long sections to skills. Aim for <10k tokens in the cascade.',
category: 'token-efficiency',
}));
}
// ── Hotspots ranking ──
const hotspots = await buildHotspots(discovery, targetPath, activeConfig);
// ── Total estimated tokens (sum of every discovered source + activeConfig MCP) ──
let totalTokens = 0;
for (const f of discovery.files) {
totalTokens += estimateTokens(f.size, tokenKind(f.type));
}
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
for (const m of activeConfig.mcpServers) {
if (m && m.enabled) totalTokens += m.estimatedTokens || 0;
}
}
const result = scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
result.hotspots = hotspots;
result.total_estimated_tokens = totalTokens;
if (activeConfig) {
result.activeConfig = {
claudeMdEstimatedTokens: activeConfig.claudeMd?.estimatedTokens ?? 0,
mcpServerCount: activeConfig.mcpServers?.length ?? 0,
pluginCount: activeConfig.plugins?.length ?? 0,
skillCount: activeConfig.skills?.length ?? 0,
};
}
return result;
}