The v5.0.0-rc.1 N5 implementation looked up hotspot.path in calibrateAgainstApi() but token-hotspots.mjs only emitted hotspot.source — calibration silently produced 0 actual_tokens because every iteration hit the `if (!hotspot?.path) continue` guard. Fix: file-backed hotspots now expose `path: h.absPath` in the JSON output. MCP-server hotspots intentionally leave path unset — their tokens are runtime tool-schema (formula-based: 500 + toolCount × 200), not file content readable by count_tokens. SC-6b release-gate verified against tests/fixtures/marketplace-large: - Actual (count_tokens, claude-opus-4-7): 589 tokens for CLAUDE.md - Estimated (4-bytes/token byte heuristic): 594 tokens - Delta: -5 tokens / -0.85% — well within ±5% gate. PASS. CHANGELOG: documented the fix + SC-6b result inline under [5.0.0]. All 635 tests still green. No estimateTokens tuning required for v5.0.0. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
508 lines
20 KiB
JavaScript
508 lines
20 KiB
JavaScript
/**
|
||
* TOK Scanner — Token Hotspots / Opus 4.7 patterns
|
||
*
|
||
* Detects three structural Opus 4.7-era token-efficiency patterns
|
||
* (severities recalibrated for tokens/turn impact in v5 F7):
|
||
* CA-TOK-001 cache-breaking volatile top in CLAUDE.md (high)
|
||
* CA-TOK-002 redundant tool/permission declarations (medium)
|
||
* CA-TOK-003 deep @import chain (>2 hops) (low)
|
||
*
|
||
* Note: the v4 sonnet-era signature pattern was removed in v5 F5 — too noisy
|
||
* and not actionable; live token costs are better surfaced by the hotspots
|
||
* ranking and per-pattern findings.
|
||
*
|
||
* Also ranks every discovered config source by estimated tokens and exposes
|
||
* a `hotspots` array (≤10 entries, possibly fewer for tiny projects) on the
|
||
* scanner result.
|
||
*
|
||
* Pattern catalogue: knowledge/opus-4.7-patterns.md
|
||
* Token heuristic: estimateTokens() in scanners/lib/active-config-reader.mjs
|
||
*
|
||
* Zero external dependencies.
|
||
*/
|
||
|
||
import { resolve, dirname, isAbsolute } from 'node:path';
|
||
import { stat } from 'node:fs/promises';
|
||
import { readTextFile } from './lib/file-discovery.mjs';
|
||
import { finding, scannerResult } from './lib/output.mjs';
|
||
import { SEVERITY } from './lib/severity.mjs';
|
||
import { findImports, parseJson, parseFrontmatter } from './lib/yaml-parser.mjs';
|
||
import { estimateTokens, readActiveConfig } from './lib/active-config-reader.mjs';
|
||
|
||
const SCANNER = 'TOK';
|
||
|
||
const VOLATILE_TOP_LINES = 30;
|
||
const VOLATILE_PATTERNS = [
|
||
/\{timestamp\}/i,
|
||
/\{uuid\}/i,
|
||
/\{date\}/i,
|
||
/\{session(?:_id)?\}/i,
|
||
/\bactivity log\b/i,
|
||
/^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/m, // ISO timestamps at line start
|
||
/^\s*\[\d{4}-\d{2}-\d{2}/m, // [YYYY-MM-DD ...] log lines
|
||
];
|
||
|
||
const MAX_IMPORT_DEPTH = 2;
|
||
|
||
// v5 M4: cascades above this contribute >10k tokens to every turn even before
|
||
// any tool description loads. Heuristic for "context budget under pressure".
|
||
const CASCADE_TOKEN_THRESHOLD = 10_000;
|
||
|
||
// v5 M2: SKILL.md `description` loads on every turn even when the body does
|
||
// not. Anything past this hints the description is doing the body's job.
|
||
const SKILL_DESCRIPTION_THRESHOLD = 500;
|
||
|
||
// v5 N1: MCP tool-schema budget thresholds (CA-TOK-005). Tool descriptions
|
||
// load on every turn — high tool counts inflate the per-turn schema payload
|
||
// regardless of whether the tools are invoked. Tiered severity per server:
|
||
// < 20 → no finding
|
||
// 20–49 → low
|
||
// 50–99 → medium
|
||
// 100+ → high
|
||
// null → low ("tool count unknown" — manifest not parseable)
|
||
const MCP_BUDGET_LOW = 20;
|
||
const MCP_BUDGET_MEDIUM = 50;
|
||
const MCP_BUDGET_HIGH = 100;
|
||
|
||
const HOTSPOTS_MAX = 10;
|
||
|
||
// v5 F7: shared evidence note appended to every TOK pattern finding.
|
||
// Communicates that severity reflects a structural heuristic, not measured
|
||
// runtime telemetry — tells reviewers how to interpret the rating.
|
||
const CALIBRATION_NOTE =
|
||
'severity reflects estimated tokens/turn based on structural heuristic; ' +
|
||
'not measured against runtime telemetry';
|
||
|
||
/**
|
||
* Classify a discovered config file into a token-estimation kind.
|
||
*/
|
||
function tokenKind(type) {
|
||
if (type === 'claude-md' || type === 'agent-md' || type === 'command-md' || type === 'skill-md' || type === 'rule') {
|
||
return 'markdown';
|
||
}
|
||
if (type === 'settings-json' || type === 'mcp-json' || type === 'hooks-json' ||
|
||
type === 'plugin-json' || type === 'claude-json' || type === 'keybindings-json') {
|
||
return 'json';
|
||
}
|
||
return 'markdown';
|
||
}
|
||
|
||
async function fileExists(absPath) {
|
||
try { await stat(absPath); return true; } catch { return false; }
|
||
}
|
||
|
||
function resolveImportPath(importPath, fromFile) {
|
||
let p = importPath.trim();
|
||
if (!p) return null;
|
||
if (p.startsWith('~/')) {
|
||
const home = process.env.HOME || process.env.USERPROFILE || '';
|
||
p = resolve(home, p.slice(2));
|
||
} else if (p.startsWith('~')) {
|
||
const home = process.env.HOME || process.env.USERPROFILE || '';
|
||
p = resolve(home, p.slice(1));
|
||
} else if (!isAbsolute(p)) {
|
||
p = resolve(dirname(fromFile), p);
|
||
}
|
||
return p;
|
||
}
|
||
|
||
/**
|
||
* Compute the deepest @import chain reachable from `startFile`.
|
||
* Returns max depth observed (0 = no imports, 1 = direct import, etc.).
|
||
*/
|
||
async function maxImportDepth(startFile, contentCache) {
|
||
let maxDepth = 0;
|
||
async function walk(file, depth, visited) {
|
||
if (depth > 20 || visited.has(file)) return;
|
||
visited.add(file);
|
||
if (depth > maxDepth) maxDepth = depth;
|
||
let content = contentCache.get(file);
|
||
if (content === undefined) {
|
||
content = await readTextFile(file);
|
||
contentCache.set(file, content);
|
||
}
|
||
if (!content) return;
|
||
const imports = findImports(content);
|
||
for (const imp of imports) {
|
||
const target = resolveImportPath(imp.path, file);
|
||
if (!target) continue;
|
||
if (!(await fileExists(target))) continue;
|
||
await walk(target, depth + 1, new Set(visited));
|
||
}
|
||
}
|
||
await walk(startFile, 0, new Set());
|
||
return maxDepth;
|
||
}
|
||
|
||
/**
|
||
* Classify an MCP server's tool count into a budget tier (v5 N1).
|
||
*
|
||
* Returns null if no finding should be emitted (toolCount < 20). Otherwise
|
||
* returns { severity, tier, kind } where kind is 'unknown' (toolCount===null)
|
||
* or 'counted'. Threshold ladder: 20 → low, 50 → medium, 100 → high. Null
|
||
* toolCount maps to low + 'unknown' so users can see opaque servers without
|
||
* the scanner pretending they're free.
|
||
*/
|
||
function classifyMcpToolBudget(toolCount) {
|
||
if (toolCount === null) {
|
||
return { severity: SEVERITY.low, tier: 'unknown', kind: 'unknown' };
|
||
}
|
||
if (typeof toolCount !== 'number' || toolCount < MCP_BUDGET_LOW) return null;
|
||
if (toolCount >= MCP_BUDGET_HIGH) return { severity: SEVERITY.high, tier: '100+', kind: 'counted' };
|
||
if (toolCount >= MCP_BUDGET_MEDIUM) return { severity: SEVERITY.medium, tier: '50-99', kind: 'counted' };
|
||
return { severity: SEVERITY.low, tier: '20-49', kind: 'counted' };
|
||
}
|
||
|
||
/**
|
||
* Detect cache-breaking volatile content in the first VOLATILE_TOP_LINES
|
||
* lines of a CLAUDE.md file.
|
||
*/
|
||
function detectVolatileTop(content) {
|
||
if (!content) return false;
|
||
const top = content.split('\n').slice(0, VOLATILE_TOP_LINES).join('\n');
|
||
return VOLATILE_PATTERNS.some(rx => rx.test(top));
|
||
}
|
||
|
||
/**
|
||
* Detect redundant or overlapping permission entries in a settings JSON object.
|
||
* Returns array of `{list, entry, reason}` for reporting.
|
||
*/
|
||
function detectRedundantPermissions(settings) {
|
||
const issues = [];
|
||
if (!settings || typeof settings !== 'object') return issues;
|
||
const perms = settings.permissions;
|
||
if (!perms || typeof perms !== 'object') return issues;
|
||
for (const list of ['allow', 'deny', 'ask']) {
|
||
const arr = perms[list];
|
||
if (!Array.isArray(arr)) continue;
|
||
const seen = new Set();
|
||
for (const entry of arr) {
|
||
if (typeof entry !== 'string') continue;
|
||
// Exact duplicate
|
||
if (seen.has(entry)) {
|
||
issues.push({ list, entry, reason: 'duplicate entry' });
|
||
continue;
|
||
}
|
||
seen.add(entry);
|
||
}
|
||
// Subset detection: an entry like `Read(src/**)` is redundant if `Read(**)`
|
||
// or bare `Read` is also present in the same list.
|
||
for (const entry of arr) {
|
||
if (typeof entry !== 'string') continue;
|
||
const tool = entry.replace(/\(.*\)$/, '').trim();
|
||
const hasBare = arr.includes(tool);
|
||
const hasWildcard = arr.includes(`${tool}(**)`) || arr.includes(`${tool}(*)`);
|
||
const isBare = entry === tool;
|
||
const isWildcard = entry === `${tool}(**)` || entry === `${tool}(*)`;
|
||
if (!isBare && !isWildcard && (hasBare || hasWildcard)) {
|
||
issues.push({ list, entry, reason: `overlapped by ${hasBare ? tool : `${tool}(**)`}` });
|
||
}
|
||
}
|
||
}
|
||
return issues;
|
||
}
|
||
|
||
/**
|
||
* Build the ranked hotspots array.
|
||
*
|
||
* v5 F1: when activeConfig is available, expand each MCP server into its own
|
||
* hotspot entry (richer signal than the parent .mcp.json file). Discovery
|
||
* files remain the primary source for CLAUDE.md / settings / skills.
|
||
*/
|
||
async function buildHotspots(discovery, targetPath, activeConfig) {
|
||
const ranked = [];
|
||
for (const f of discovery.files) {
|
||
const kind = tokenKind(f.type);
|
||
const tokens = estimateTokens(f.size, kind);
|
||
if (tokens <= 0) continue;
|
||
ranked.push({
|
||
absPath: f.absPath,
|
||
relPath: f.relPath || f.absPath.replace(targetPath + '/', ''),
|
||
type: f.type,
|
||
scope: f.scope,
|
||
size: f.size,
|
||
estimated_tokens: tokens,
|
||
});
|
||
}
|
||
// Per-MCP-server entries from activeConfig (each ~500+ tokens at runtime,
|
||
// not represented by the parent .mcp.json file size alone).
|
||
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
|
||
for (const m of activeConfig.mcpServers) {
|
||
if (!m || !m.enabled) continue;
|
||
ranked.push({
|
||
absPath: m.source || `mcp:${m.name}`,
|
||
relPath: `mcp:${m.name} (${m.source})`,
|
||
type: 'mcp-server',
|
||
scope: m.source,
|
||
size: 0,
|
||
estimated_tokens: m.estimatedTokens || 0,
|
||
});
|
||
}
|
||
}
|
||
ranked.sort((a, b) => b.estimated_tokens - a.estimated_tokens);
|
||
|
||
const top = ranked.slice(0, HOTSPOTS_MAX);
|
||
const out = [];
|
||
for (let i = 0; i < top.length; i++) {
|
||
const h = top[i];
|
||
const entry = {
|
||
source: h.relPath || h.absPath,
|
||
estimated_tokens: h.estimated_tokens,
|
||
rank: i + 1,
|
||
recommendations: hotspotRecommendations(h),
|
||
};
|
||
// Expose the on-disk path for file-backed hotspots so the
|
||
// --accurate-tokens calibration in token-hotspots-cli can read content.
|
||
// MCP-server hotspots are virtual (runtime tool-schema, not file content)
|
||
// so their path stays unset and calibration skips them.
|
||
if (h.type !== 'mcp-server' && h.absPath) {
|
||
entry.path = h.absPath;
|
||
}
|
||
out.push(entry);
|
||
}
|
||
|
||
return out;
|
||
}
|
||
|
||
function hotspotRecommendations(h) {
|
||
const recs = [];
|
||
if (h.type === 'claude-md') {
|
||
recs.push('Move volatile top-of-file content to the bottom or extract to an @import-ed file.');
|
||
recs.push('Split overlong CLAUDE.md into focused @imports (≤200 lines each).');
|
||
} else if (h.type === 'settings-json' || h.type === 'mcp-json' || h.type === 'hooks-json') {
|
||
recs.push('Deduplicate overlapping entries — each duplicate inflates the per-turn schema payload.');
|
||
recs.push('Move rarely-used permissions to a project-local override.');
|
||
} else if (h.type === 'skill-md' || h.type === 'agent-md' || h.type === 'command-md') {
|
||
recs.push('Tighten the description field — it loads on every turn even when the body does not.');
|
||
} else {
|
||
recs.push('Review whether this source needs to load on every turn.');
|
||
}
|
||
// Always cap to 1–3 recommendations
|
||
return recs.slice(0, 3);
|
||
}
|
||
|
||
/**
|
||
* Main scanner entry point.
|
||
* @param {string} targetPath
|
||
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>, skipped?:number}} discovery
|
||
*/
|
||
export async function scan(targetPath, discovery) {
|
||
const start = Date.now();
|
||
const findings = [];
|
||
let filesScanned = 0;
|
||
const contentCache = new Map();
|
||
|
||
// v5 F1: pull active-config snapshot once. Failures are non-fatal — the
|
||
// scanner falls back to the discovery-only path used in v4.
|
||
let activeConfig = null;
|
||
try {
|
||
activeConfig = await readActiveConfig(targetPath, {});
|
||
} catch {
|
||
activeConfig = null;
|
||
}
|
||
|
||
// ── Pattern A: cache-breaking volatile top in CLAUDE.md ──
|
||
for (const f of discovery.files) {
|
||
if (f.type !== 'claude-md') continue;
|
||
filesScanned++;
|
||
const content = await readTextFile(f.absPath);
|
||
contentCache.set(f.absPath, content);
|
||
if (detectVolatileTop(content)) {
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.high,
|
||
title: 'Cache-breaking volatile content at top of CLAUDE.md',
|
||
description:
|
||
`The first ${VOLATILE_TOP_LINES} lines of ${f.relPath || f.absPath} contain volatile ` +
|
||
'tokens (timestamps, session ids, or activity logs). Volatile content above stable ' +
|
||
'content defeats Opus 4.7 prompt-cache reuse on every turn.',
|
||
file: f.absPath,
|
||
evidence: CALIBRATION_NOTE,
|
||
recommendation:
|
||
'Move volatile sections to the bottom of the file, or extract them to an @import-ed ' +
|
||
'file outside the cached prefix. Keep the first 30 lines stable across turns.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
}
|
||
|
||
// ── Pattern B: redundant tool/permission declarations ──
|
||
for (const f of discovery.files) {
|
||
if (f.type !== 'settings-json') continue;
|
||
filesScanned++;
|
||
const content = await readTextFile(f.absPath);
|
||
if (!content) continue;
|
||
const parsed = parseJson(content);
|
||
if (!parsed) continue;
|
||
const issues = detectRedundantPermissions(parsed);
|
||
if (issues.length === 0) continue;
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.medium,
|
||
title: 'Redundant permission declarations',
|
||
description:
|
||
`${f.relPath || f.absPath} contains ${issues.length} redundant or overlapping ` +
|
||
`permission entr${issues.length === 1 ? 'y' : 'ies'}. Each duplicate inflates the ` +
|
||
'tool-schema payload sent on every turn.',
|
||
file: f.absPath,
|
||
evidence:
|
||
issues.slice(0, 5).map(i => `${i.list}: "${i.entry}" (${i.reason})`).join('; ') +
|
||
` — ${CALIBRATION_NOTE}`,
|
||
recommendation:
|
||
'Deduplicate the permissions.allow / permissions.deny arrays. Prefer the most ' +
|
||
'specific entry that still grants the intended access.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
|
||
// ── Pattern C: deep @import chain (>2 hops) ──
|
||
for (const f of discovery.files) {
|
||
if (f.type !== 'claude-md') continue;
|
||
const depth = await maxImportDepth(f.absPath, contentCache);
|
||
if (depth > MAX_IMPORT_DEPTH) {
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.low,
|
||
title: 'Deep @import chain defeats prompt-cache reuse',
|
||
description:
|
||
`${f.relPath || f.absPath} reaches @import depth ${depth} (>${MAX_IMPORT_DEPTH} hops). ` +
|
||
'Each @import boundary fragments the prompt-cache prefix; deeply chained imports ' +
|
||
'defeat caching for the deepest content even when it never changes.',
|
||
file: f.absPath,
|
||
evidence: `Max chain depth: ${depth} — ${CALIBRATION_NOTE}`,
|
||
recommendation:
|
||
'Flatten the @import chain to ≤2 hops. Inline the deepest layer back into its parent.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
}
|
||
|
||
// ── Pattern F: SKILL.md description > 500 chars (v5 M2) ──
|
||
// Scoped to discovery.files (project-local skill-md). The plan mentioned
|
||
// walking activeConfig.skills, but that pulls in user's ~/.claude/skills
|
||
// and installed plugin skills which are out-of-scope for a project audit
|
||
// and add noise the user can't act on. Project-local discovery is what
|
||
// /config-audit on a path is actually asking about.
|
||
for (const f of discovery.files) {
|
||
if (f.type !== 'skill-md') continue;
|
||
const content = await readTextFile(f.absPath);
|
||
if (!content) continue;
|
||
filesScanned++;
|
||
const fm = parseFrontmatter(content)?.frontmatter || null;
|
||
const desc = (fm && typeof fm.description === 'string') ? fm.description : '';
|
||
if (desc.length <= SKILL_DESCRIPTION_THRESHOLD) continue;
|
||
const skillName = (fm && fm.name) || f.absPath.split('/').slice(-2, -1)[0] || f.absPath;
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.low,
|
||
title: 'Bloated skill description (loads on every turn)',
|
||
description:
|
||
`Skill "${skillName}" has a description of ${desc.length} characters ` +
|
||
`(>${SKILL_DESCRIPTION_THRESHOLD}). The description block loads on every turn ` +
|
||
'even when the skill body does not — long descriptions inflate per-turn cost.',
|
||
file: f.absPath,
|
||
evidence:
|
||
`description_chars=${desc.length}; threshold=${SKILL_DESCRIPTION_THRESHOLD}; ` +
|
||
`skill="${skillName}" — ${CALIBRATION_NOTE}`,
|
||
recommendation:
|
||
'Tighten the description to a single sentence (≤500 chars) covering trigger phrases ' +
|
||
'only. Move detailed usage / examples into the SKILL.md body.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
|
||
// ── Pattern G: MCP tool-schema budget per server (v5 N1, CA-TOK-005) ──
|
||
// Scope: project-local .mcp.json only. Plugin- and ~/.claude.json-sourced
|
||
// servers are global concerns surfaced by the manifest scanner; scoping the
|
||
// finding here to .mcp.json keeps /config-audit <path> actionable for the
|
||
// path the user is auditing.
|
||
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
|
||
for (const m of activeConfig.mcpServers) {
|
||
if (!m || !m.enabled) continue;
|
||
if (m.source !== '.mcp.json') continue;
|
||
const budget = classifyMcpToolBudget(m.toolCount);
|
||
if (!budget) continue;
|
||
const severity = budget.severity;
|
||
const sourceLabel = m.source ? `${m.name} (${m.source})` : m.name;
|
||
const isUnknown = budget.kind === 'unknown';
|
||
const description = isUnknown
|
||
? `MCP server "${sourceLabel}" has tool count unknown — could not parse manifest ` +
|
||
'or cached tools/list. Tool schemas load on every turn; an unverified server ' +
|
||
'may be inflating the per-turn payload silently.'
|
||
: `MCP server "${sourceLabel}" exposes ${m.toolCount} tools. Tool schemas load on ` +
|
||
'every turn regardless of which tools the model actually invokes — high tool ' +
|
||
'counts inflate the per-turn payload and crowd out usable context.';
|
||
const evidence = isUnknown
|
||
? `tool_count=unknown; server="${m.name}"; source="${m.source}" — ${CALIBRATION_NOTE}`
|
||
: `tool_count=${m.toolCount}; tier=${budget.tier}; server="${m.name}"; ` +
|
||
`source="${m.source}" — ${CALIBRATION_NOTE}`;
|
||
const recommendation = isUnknown
|
||
? 'Install the package locally (so detect-mcp-tool-count can read its manifest), ' +
|
||
'or run the server once and cache its tools/list response under ' +
|
||
'~/.claude/config-audit/mcp-cache/<name>.json. See knowledge/cache-telemetry-recipe.md.'
|
||
: 'Use the server\'s `tools/filter` config (or equivalent) to expose only the tools ' +
|
||
'this project actually needs. Consider splitting heavy MCP servers across project- ' +
|
||
'and user-scopes so per-project budget stays tight.';
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity,
|
||
title: `High MCP tool-schema budget on server "${m.name}"`,
|
||
description,
|
||
file: m.source && m.source !== `mcp:${m.name}` ? m.source : null,
|
||
evidence,
|
||
recommendation,
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
}
|
||
|
||
// ── Pattern E: CLAUDE.md cascade > CASCADE_TOKEN_THRESHOLD (v5 M4) ──
|
||
if (activeConfig?.claudeMd?.estimatedTokens > CASCADE_TOKEN_THRESHOLD) {
|
||
const cascadeTokens = activeConfig.claudeMd.estimatedTokens;
|
||
const fileCount = activeConfig.claudeMd.files?.length ?? 0;
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.medium,
|
||
title: 'CLAUDE.md cascade exceeds 10k tokens per turn',
|
||
description:
|
||
`The active CLAUDE.md cascade for this repo (${fileCount} files: managed + user + ` +
|
||
`ancestors + project + @imports) totals ~${cascadeTokens} tokens. Every turn loads this ` +
|
||
'whole prefix; budget pressure compounds with tool schemas and MCP servers.',
|
||
file: activeConfig.claudeMd.files?.find(f => f.scope === 'project')?.path || null,
|
||
evidence:
|
||
`cascade_tokens=${cascadeTokens}; threshold=${CASCADE_TOKEN_THRESHOLD}; ` +
|
||
`files=${fileCount} — ${CALIBRATION_NOTE}`,
|
||
recommendation:
|
||
'Trim the user/project CLAUDE.md, push reference material into @imports that load ' +
|
||
'on-demand, or move long sections to skills. Aim for <10k tokens in the cascade.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
|
||
// ── Hotspots ranking ──
|
||
const hotspots = await buildHotspots(discovery, targetPath, activeConfig);
|
||
|
||
// ── Total estimated tokens (sum of every discovered source + activeConfig MCP) ──
|
||
let totalTokens = 0;
|
||
for (const f of discovery.files) {
|
||
totalTokens += estimateTokens(f.size, tokenKind(f.type));
|
||
}
|
||
if (activeConfig && Array.isArray(activeConfig.mcpServers)) {
|
||
for (const m of activeConfig.mcpServers) {
|
||
if (m && m.enabled) totalTokens += m.estimatedTokens || 0;
|
||
}
|
||
}
|
||
|
||
const result = scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
|
||
result.hotspots = hotspots;
|
||
result.total_estimated_tokens = totalTokens;
|
||
if (activeConfig) {
|
||
result.activeConfig = {
|
||
claudeMdEstimatedTokens: activeConfig.claudeMd?.estimatedTokens ?? 0,
|
||
mcpServerCount: activeConfig.mcpServers?.length ?? 0,
|
||
pluginCount: activeConfig.plugins?.length ?? 0,
|
||
skillCount: activeConfig.skills?.length ?? 0,
|
||
};
|
||
}
|
||
return result;
|
||
}
|