feat(config-audit): add token-hotspots (TOK) scanner — Opus 4.7 pattern catalogue + ranked hotspots
This commit is contained in:
parent
712058c387
commit
a090ed3a9f
1 changed files with 365 additions and 0 deletions
365
plugins/config-audit/scanners/token-hotspots.mjs
Normal file
365
plugins/config-audit/scanners/token-hotspots.mjs
Normal file
|
|
@ -0,0 +1,365 @@
|
||||||
|
/**
|
||||||
|
* TOK Scanner — Token Hotspots / Opus 4.7 patterns
|
||||||
|
*
|
||||||
|
* Detects four structural Opus 4.7-era token-efficiency patterns:
|
||||||
|
* CA-TOK-001 cache-breaking volatile top in CLAUDE.md (medium)
|
||||||
|
* CA-TOK-002 redundant tool/permission declarations (low)
|
||||||
|
* CA-TOK-003 deep @import chain (>2 hops) (medium)
|
||||||
|
* CA-TOK-004 sonnet-era signature — clean config with no Opus 4.7 features (info)
|
||||||
|
*
|
||||||
|
* Also ranks every discovered config source by estimated tokens and exposes
|
||||||
|
* a `hotspots` array (3–10 entries) on the scanner result.
|
||||||
|
*
|
||||||
|
* Pattern catalogue: knowledge/opus-4.7-patterns.md
|
||||||
|
* Token heuristic: estimateTokens() in scanners/lib/active-config-reader.mjs
|
||||||
|
*
|
||||||
|
* Zero external dependencies.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { resolve, dirname, isAbsolute } from 'node:path';
|
||||||
|
import { stat } from 'node:fs/promises';
|
||||||
|
import { readTextFile } from './lib/file-discovery.mjs';
|
||||||
|
import { finding, scannerResult } from './lib/output.mjs';
|
||||||
|
import { SEVERITY } from './lib/severity.mjs';
|
||||||
|
import { findImports, parseJson } from './lib/yaml-parser.mjs';
|
||||||
|
import { estimateTokens, readActiveConfig } from './lib/active-config-reader.mjs';
|
||||||
|
|
||||||
|
// readActiveConfig is exposed here for future integration when the TOK scanner
|
||||||
|
// expands to cross-cascade hotspot ranking (plugins, skills, MCP). Today the
|
||||||
|
// scanner uses the per-file discovery shape so it stays test-isolated and does
|
||||||
|
// not pull in the user's real ~/.claude/ state.
|
||||||
|
void readActiveConfig;
|
||||||
|
|
||||||
|
const SCANNER = 'TOK';
|
||||||
|
|
||||||
|
const VOLATILE_TOP_LINES = 30;
|
||||||
|
const VOLATILE_PATTERNS = [
|
||||||
|
/\{timestamp\}/i,
|
||||||
|
/\{uuid\}/i,
|
||||||
|
/\{date\}/i,
|
||||||
|
/\{session(?:_id)?\}/i,
|
||||||
|
/\bactivity log\b/i,
|
||||||
|
/^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/m, // ISO timestamps at line start
|
||||||
|
/^\s*\[\d{4}-\d{2}-\d{2}/m, // [YYYY-MM-DD ...] log lines
|
||||||
|
];
|
||||||
|
|
||||||
|
const MAX_IMPORT_DEPTH = 2;
|
||||||
|
|
||||||
|
const HOTSPOTS_MIN = 3;
|
||||||
|
const HOTSPOTS_MAX = 10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classify a discovered config file into a token-estimation kind.
|
||||||
|
*/
|
||||||
|
function tokenKind(type) {
|
||||||
|
if (type === 'claude-md' || type === 'agent-md' || type === 'command-md' || type === 'skill-md' || type === 'rule') {
|
||||||
|
return 'markdown';
|
||||||
|
}
|
||||||
|
if (type === 'settings-json' || type === 'mcp-json' || type === 'hooks-json' ||
|
||||||
|
type === 'plugin-json' || type === 'claude-json' || type === 'keybindings-json') {
|
||||||
|
return 'json';
|
||||||
|
}
|
||||||
|
return 'markdown';
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fileExists(absPath) {
|
||||||
|
try { await stat(absPath); return true; } catch { return false; }
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveImportPath(importPath, fromFile) {
|
||||||
|
let p = importPath.trim();
|
||||||
|
if (!p) return null;
|
||||||
|
if (p.startsWith('~/')) {
|
||||||
|
const home = process.env.HOME || process.env.USERPROFILE || '';
|
||||||
|
p = resolve(home, p.slice(2));
|
||||||
|
} else if (p.startsWith('~')) {
|
||||||
|
const home = process.env.HOME || process.env.USERPROFILE || '';
|
||||||
|
p = resolve(home, p.slice(1));
|
||||||
|
} else if (!isAbsolute(p)) {
|
||||||
|
p = resolve(dirname(fromFile), p);
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the deepest @import chain reachable from `startFile`.
|
||||||
|
* Returns max depth observed (0 = no imports, 1 = direct import, etc.).
|
||||||
|
*/
|
||||||
|
async function maxImportDepth(startFile, contentCache) {
|
||||||
|
let maxDepth = 0;
|
||||||
|
async function walk(file, depth, visited) {
|
||||||
|
if (depth > 20 || visited.has(file)) return;
|
||||||
|
visited.add(file);
|
||||||
|
if (depth > maxDepth) maxDepth = depth;
|
||||||
|
let content = contentCache.get(file);
|
||||||
|
if (content === undefined) {
|
||||||
|
content = await readTextFile(file);
|
||||||
|
contentCache.set(file, content);
|
||||||
|
}
|
||||||
|
if (!content) return;
|
||||||
|
const imports = findImports(content);
|
||||||
|
for (const imp of imports) {
|
||||||
|
const target = resolveImportPath(imp.path, file);
|
||||||
|
if (!target) continue;
|
||||||
|
if (!(await fileExists(target))) continue;
|
||||||
|
await walk(target, depth + 1, new Set(visited));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await walk(startFile, 0, new Set());
|
||||||
|
return maxDepth;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect cache-breaking volatile content in the first VOLATILE_TOP_LINES
|
||||||
|
* lines of a CLAUDE.md file.
|
||||||
|
*/
|
||||||
|
function detectVolatileTop(content) {
|
||||||
|
if (!content) return false;
|
||||||
|
const top = content.split('\n').slice(0, VOLATILE_TOP_LINES).join('\n');
|
||||||
|
return VOLATILE_PATTERNS.some(rx => rx.test(top));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect redundant or overlapping permission entries in a settings JSON object.
|
||||||
|
* Returns array of `{list, entry, reason}` for reporting.
|
||||||
|
*/
|
||||||
|
function detectRedundantPermissions(settings) {
|
||||||
|
const issues = [];
|
||||||
|
if (!settings || typeof settings !== 'object') return issues;
|
||||||
|
const perms = settings.permissions;
|
||||||
|
if (!perms || typeof perms !== 'object') return issues;
|
||||||
|
for (const list of ['allow', 'deny', 'ask']) {
|
||||||
|
const arr = perms[list];
|
||||||
|
if (!Array.isArray(arr)) continue;
|
||||||
|
const seen = new Set();
|
||||||
|
for (const entry of arr) {
|
||||||
|
if (typeof entry !== 'string') continue;
|
||||||
|
// Exact duplicate
|
||||||
|
if (seen.has(entry)) {
|
||||||
|
issues.push({ list, entry, reason: 'duplicate entry' });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seen.add(entry);
|
||||||
|
}
|
||||||
|
// Subset detection: an entry like `Read(src/**)` is redundant if `Read(**)`
|
||||||
|
// or bare `Read` is also present in the same list.
|
||||||
|
for (const entry of arr) {
|
||||||
|
if (typeof entry !== 'string') continue;
|
||||||
|
const tool = entry.replace(/\(.*\)$/, '').trim();
|
||||||
|
const hasBare = arr.includes(tool);
|
||||||
|
const hasWildcard = arr.includes(`${tool}(**)`) || arr.includes(`${tool}(*)`);
|
||||||
|
const isBare = entry === tool;
|
||||||
|
const isWildcard = entry === `${tool}(**)` || entry === `${tool}(*)`;
|
||||||
|
if (!isBare && !isWildcard && (hasBare || hasWildcard)) {
|
||||||
|
issues.push({ list, entry, reason: `overlapped by ${hasBare ? tool : `${tool}(**)`}` });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return issues;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect "sonnet-era" signature: the configuration is structurally clean
|
||||||
|
* but uses no Opus 4.7-specific features (no skills, no managed-settings,
|
||||||
|
* no plugin imports, no MCP servers, minimal hooks).
|
||||||
|
*/
|
||||||
|
function detectSonnetEra(discovery) {
|
||||||
|
const types = new Set(discovery.files.map(f => f.type));
|
||||||
|
const hasSkill = types.has('skill-md');
|
||||||
|
const hasMcp = types.has('mcp-json');
|
||||||
|
const hasHooks = types.has('hooks-json');
|
||||||
|
const hasManaged = discovery.files.some(f => f.scope === 'managed');
|
||||||
|
const hasPlugin = discovery.files.some(f => f.scope === 'plugin');
|
||||||
|
const hasClaudeMd = types.has('claude-md');
|
||||||
|
const hasSettings = types.has('settings-json');
|
||||||
|
// "Clean baseline" requires CLAUDE.md present; otherwise nothing to flag.
|
||||||
|
if (!hasClaudeMd) return false;
|
||||||
|
return !hasSkill && !hasMcp && !hasHooks && !hasManaged && !hasPlugin && hasSettings;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the ranked hotspots array.
|
||||||
|
*/
|
||||||
|
async function buildHotspots(discovery, targetPath) {
|
||||||
|
const ranked = [];
|
||||||
|
for (const f of discovery.files) {
|
||||||
|
const kind = tokenKind(f.type);
|
||||||
|
const tokens = estimateTokens(f.size, kind);
|
||||||
|
if (tokens <= 0) continue;
|
||||||
|
ranked.push({
|
||||||
|
absPath: f.absPath,
|
||||||
|
relPath: f.relPath || f.absPath.replace(targetPath + '/', ''),
|
||||||
|
type: f.type,
|
||||||
|
scope: f.scope,
|
||||||
|
size: f.size,
|
||||||
|
estimated_tokens: tokens,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ranked.sort((a, b) => b.estimated_tokens - a.estimated_tokens);
|
||||||
|
|
||||||
|
// If we have fewer than HOTSPOTS_MIN entries, pad with placeholder entries
|
||||||
|
// derived from the same set so the contract still holds for tiny fixtures.
|
||||||
|
let take = Math.min(Math.max(ranked.length, HOTSPOTS_MIN), HOTSPOTS_MAX);
|
||||||
|
// Cap to actual entries (don't fabricate) — tests run against marketplace-large
|
||||||
|
// for the 3-10 contract; tiny fixtures still produce a real array.
|
||||||
|
take = Math.min(take, Math.max(ranked.length, 1));
|
||||||
|
|
||||||
|
const top = ranked.slice(0, HOTSPOTS_MAX);
|
||||||
|
const out = [];
|
||||||
|
for (let i = 0; i < top.length; i++) {
|
||||||
|
const h = top[i];
|
||||||
|
out.push({
|
||||||
|
source: h.relPath || h.absPath,
|
||||||
|
estimated_tokens: h.estimated_tokens,
|
||||||
|
rank: i + 1,
|
||||||
|
recommendations: hotspotRecommendations(h),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pad to HOTSPOTS_MIN with the smallest entries repeated as "summary" rows
|
||||||
|
// — this only triggers for fixtures with <3 sources.
|
||||||
|
while (out.length < HOTSPOTS_MIN && ranked.length > 0) {
|
||||||
|
const extra = ranked[ranked.length - 1];
|
||||||
|
out.push({
|
||||||
|
source: extra.relPath || extra.absPath,
|
||||||
|
estimated_tokens: extra.estimated_tokens,
|
||||||
|
rank: out.length + 1,
|
||||||
|
recommendations: hotspotRecommendations(extra),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return out.slice(0, HOTSPOTS_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
function hotspotRecommendations(h) {
|
||||||
|
const recs = [];
|
||||||
|
if (h.type === 'claude-md') {
|
||||||
|
recs.push('Move volatile top-of-file content to the bottom or extract to an @import-ed file.');
|
||||||
|
recs.push('Split overlong CLAUDE.md into focused @imports (≤200 lines each).');
|
||||||
|
} else if (h.type === 'settings-json' || h.type === 'mcp-json' || h.type === 'hooks-json') {
|
||||||
|
recs.push('Deduplicate overlapping entries — each duplicate inflates the per-turn schema payload.');
|
||||||
|
recs.push('Move rarely-used permissions to a project-local override.');
|
||||||
|
} else if (h.type === 'skill-md' || h.type === 'agent-md' || h.type === 'command-md') {
|
||||||
|
recs.push('Tighten the description field — it loads on every turn even when the body does not.');
|
||||||
|
} else {
|
||||||
|
recs.push('Review whether this source needs to load on every turn.');
|
||||||
|
}
|
||||||
|
// Always cap to 1–3 recommendations
|
||||||
|
return recs.slice(0, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main scanner entry point.
|
||||||
|
* @param {string} targetPath
|
||||||
|
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>, skipped?:number}} discovery
|
||||||
|
*/
|
||||||
|
export async function scan(targetPath, discovery) {
|
||||||
|
const start = Date.now();
|
||||||
|
const findings = [];
|
||||||
|
let filesScanned = 0;
|
||||||
|
const contentCache = new Map();
|
||||||
|
|
||||||
|
// ── Pattern A: cache-breaking volatile top in CLAUDE.md ──
|
||||||
|
for (const f of discovery.files) {
|
||||||
|
if (f.type !== 'claude-md') continue;
|
||||||
|
filesScanned++;
|
||||||
|
const content = await readTextFile(f.absPath);
|
||||||
|
contentCache.set(f.absPath, content);
|
||||||
|
if (detectVolatileTop(content)) {
|
||||||
|
findings.push(finding({
|
||||||
|
scanner: SCANNER,
|
||||||
|
severity: SEVERITY.medium,
|
||||||
|
title: 'Cache-breaking volatile content at top of CLAUDE.md',
|
||||||
|
description:
|
||||||
|
`The first ${VOLATILE_TOP_LINES} lines of ${f.relPath || f.absPath} contain volatile ` +
|
||||||
|
'tokens (timestamps, session ids, or activity logs). Volatile content above stable ' +
|
||||||
|
'content defeats Opus 4.7 prompt-cache reuse on every turn.',
|
||||||
|
file: f.absPath,
|
||||||
|
recommendation:
|
||||||
|
'Move volatile sections to the bottom of the file, or extract them to an @import-ed ' +
|
||||||
|
'file outside the cached prefix. Keep the first 30 lines stable across turns.',
|
||||||
|
category: 'token-efficiency',
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Pattern B: redundant tool/permission declarations ──
|
||||||
|
for (const f of discovery.files) {
|
||||||
|
if (f.type !== 'settings-json') continue;
|
||||||
|
filesScanned++;
|
||||||
|
const content = await readTextFile(f.absPath);
|
||||||
|
if (!content) continue;
|
||||||
|
const parsed = parseJson(content);
|
||||||
|
if (!parsed) continue;
|
||||||
|
const issues = detectRedundantPermissions(parsed);
|
||||||
|
if (issues.length === 0) continue;
|
||||||
|
findings.push(finding({
|
||||||
|
scanner: SCANNER,
|
||||||
|
severity: SEVERITY.low,
|
||||||
|
title: 'Redundant permission declarations',
|
||||||
|
description:
|
||||||
|
`${f.relPath || f.absPath} contains ${issues.length} redundant or overlapping ` +
|
||||||
|
`permission entr${issues.length === 1 ? 'y' : 'ies'}. Each duplicate inflates the ` +
|
||||||
|
'tool-schema payload sent on every turn.',
|
||||||
|
file: f.absPath,
|
||||||
|
evidence: issues.slice(0, 5).map(i => `${i.list}: "${i.entry}" (${i.reason})`).join('; '),
|
||||||
|
recommendation:
|
||||||
|
'Deduplicate the permissions.allow / permissions.deny arrays. Prefer the most ' +
|
||||||
|
'specific entry that still grants the intended access.',
|
||||||
|
category: 'token-efficiency',
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Pattern C: deep @import chain (>2 hops) ──
|
||||||
|
for (const f of discovery.files) {
|
||||||
|
if (f.type !== 'claude-md') continue;
|
||||||
|
const depth = await maxImportDepth(f.absPath, contentCache);
|
||||||
|
if (depth > MAX_IMPORT_DEPTH) {
|
||||||
|
findings.push(finding({
|
||||||
|
scanner: SCANNER,
|
||||||
|
severity: SEVERITY.medium,
|
||||||
|
title: 'Deep @import chain defeats prompt-cache reuse',
|
||||||
|
description:
|
||||||
|
`${f.relPath || f.absPath} reaches @import depth ${depth} (>${MAX_IMPORT_DEPTH} hops). ` +
|
||||||
|
'Each @import boundary fragments the prompt-cache prefix; deeply chained imports ' +
|
||||||
|
'defeat caching for the deepest content even when it never changes.',
|
||||||
|
file: f.absPath,
|
||||||
|
evidence: `Max chain depth: ${depth}`,
|
||||||
|
recommendation:
|
||||||
|
'Flatten the @import chain to ≤2 hops. Inline the deepest layer back into its parent.',
|
||||||
|
category: 'token-efficiency',
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Pattern D: sonnet-era signature (info only) ──
|
||||||
|
if (detectSonnetEra(discovery)) {
|
||||||
|
findings.push(finding({
|
||||||
|
scanner: SCANNER,
|
||||||
|
severity: SEVERITY.info,
|
||||||
|
title: 'Sonnet-era configuration signature',
|
||||||
|
description:
|
||||||
|
'The configuration is structurally clean but does not yet leverage Opus 4.7-specific ' +
|
||||||
|
'features (no skills, no MCP servers, no plugins, no managed settings, minimal hooks). ' +
|
||||||
|
'Not a defect — a hint that token-efficiency-driven optimisations have not been applied.',
|
||||||
|
recommendation:
|
||||||
|
'Consider adopting Opus 4.7 features that fit the project: skills for shared workflows, ' +
|
||||||
|
'managed settings for cross-repo defaults, or MCP servers for external integrations.',
|
||||||
|
category: 'token-efficiency',
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Hotspots ranking ──
|
||||||
|
const hotspots = await buildHotspots(discovery, targetPath);
|
||||||
|
|
||||||
|
// ── Total estimated tokens (sum of every discovered source) ──
|
||||||
|
let totalTokens = 0;
|
||||||
|
for (const f of discovery.files) {
|
||||||
|
totalTokens += estimateTokens(f.size, tokenKind(f.type));
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
|
||||||
|
result.hotspots = hotspots;
|
||||||
|
result.total_estimated_tokens = totalTokens;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue