ktg-plugin-marketplace/plugins/config-audit/scanners/cache-prefix-scanner.mjs
Kjell Tore Guttormsen 65087e624f feat(config-audit): cache-prefix stability scanner CPS (v5 N3) [skip-docs]
New CPS scanner walks CLAUDE.md cascade and flags volatile content
between lines 31 and 150 — the cache-prefix window beyond TOK Pattern
A's top-30 territory. Volatile content anywhere in the cached prefix
forces a fresh cache write from that line down on every turn.

Volatile-pattern set extends TOK Pattern A with:
- shell-exec lines (! prefix) — common in CLAUDE.md to inject git/date
- ${VAR} substitutions — vary per-shell, defeat cache reuse

Severity: medium per finding. Skips lines 1-30 to avoid duplicating
Pattern A's range; CPS' value is in the 31-150 zone.

Wired into scan-orchestrator + scoring SCANNER_AREA_MAP. CPS shares
the "Token Efficiency" area with TOK; scoreByArea now deduplicates by
area name and combines counts across scanners contributing to the
same area, so the 9-area scorecard contract holds.

Fixtures volatile-mid-section/{volatile-line-60, volatile-line-200}
verify both positive (line 60) and out-of-window (line 200) cases.

[skip-docs] reason: v5 plan fences off README/CLAUDE.md badge updates
to Session 5; Forgejo pre-commit-docs-gate hook requires this tag.

Tests: 604 → 611 (+7).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 07:37:54 +02:00

115 lines
4.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* CPS Scanner — Cache-Prefix Stability Analyzer (v5 N3)
*
* Walks the CLAUDE.md cascade and flags volatile content anywhere in the
* cached prefix (≤ CACHED_PREFIX_LINES). Distinguishes from TOK Pattern A,
* which only inspects the top 30 lines: CPS catches a `!git log` at line 60
* or a `${TIMESTAMP}` at line 100. Volatile content anywhere in the cached
* prefix breaks Opus 4.7 prompt-cache reuse from that line forward.
*
* Volatile patterns extend the TOK set with shell-exec `!` prefix and
* `${VAR}` substitutions — both common cache-busters in real CLAUDE.md files.
*
* Finding ID: CA-CPS-NNN. Severity: medium.
*
* Zero external dependencies.
*/
import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
const SCANNER = 'CPS';
// Cache-prefix line threshold: content below this line is unlikely to be
// part of a stable cached prefix in typical sessions. The number is
// heuristic — the goal is to flag volatility that genuinely costs cache
// hits per turn, not to chase every inline date in a long backlog file.
const CACHED_PREFIX_LINES = 150;
// Volatile-pattern set (extends token-hotspots.mjs Pattern A).
const VOLATILE_PATTERNS = [
{ rx: /\{timestamp\}/i, label: '{timestamp} placeholder' },
{ rx: /\{uuid\}/i, label: '{uuid} placeholder' },
{ rx: /\{date\}/i, label: '{date} placeholder' },
{ rx: /\{session(?:_id)?\}/i, label: '{session_id} placeholder' },
{ rx: /\bactivity log\b/i, label: 'activity-log section' },
{ rx: /^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/, label: 'ISO timestamp at line start' },
{ rx: /^\s*\[\d{4}-\d{2}-\d{2}/, label: 'dated log line [YYYY-MM-DD ...]' },
// v5 N3 extensions:
{ rx: /^\s*!/, label: 'shell-exec line (! prefix)' },
{ rx: /\$\{[A-Z_][A-Z0-9_]*\}/, label: '${VAR} substitution' },
];
/**
* Scan content for volatile lines within the cached prefix window.
* Returns array of {line, label, snippet}.
*/
function findVolatileLines(content) {
const out = [];
if (!content) return out;
const lines = content.split('\n').slice(0, CACHED_PREFIX_LINES);
for (let i = 0; i < lines.length; i++) {
for (const { rx, label } of VOLATILE_PATTERNS) {
if (rx.test(lines[i])) {
out.push({
line: i + 1,
label,
snippet: lines[i].length > 120 ? lines[i].slice(0, 117) + '...' : lines[i],
});
break;
}
}
}
return out;
}
/**
* Main scanner entry point.
*
* @param {string} targetPath
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>}} discovery
*/
export async function scan(targetPath, discovery) {
const start = Date.now();
const findings = [];
let filesScanned = 0;
for (const f of discovery.files) {
if (f.type !== 'claude-md') continue;
filesScanned++;
const content = await readTextFile(f.absPath);
if (!content) continue;
const volatile = findVolatileLines(content);
if (volatile.length === 0) continue;
// Skip volatility that's already covered by TOK Pattern A (lines 130) —
// CPS' value is in the 31150 range. Pattern A handles 130.
const beyondTopThirty = volatile.filter(v => v.line > 30);
if (beyondTopThirty.length === 0) continue;
const evidence =
beyondTopThirty.slice(0, 5)
.map(v => `line ${v.line} (${v.label}): ${v.snippet}`)
.join('; ');
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.medium,
title: 'Volatile content inside cached prefix breaks reuse',
description:
`${f.relPath || f.absPath} contains ${beyondTopThirty.length} volatile ` +
`entr${beyondTopThirty.length === 1 ? 'y' : 'ies'} between lines 31 and ` +
`${CACHED_PREFIX_LINES}. The prompt cache covers the file's prefix; ` +
'any volatility forces a fresh cache write from that line down on every turn.',
file: f.absPath,
evidence,
recommendation:
'Move volatile sections (timestamps, !shell-exec, ${VAR} substitutions, dated logs) ' +
`below line ${CACHED_PREFIX_LINES} or extract them to an @import-ed file outside the ` +
'cached prefix. Stable content above, volatile content below.',
category: 'token-efficiency',
}));
}
return scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
}