New CPS scanner walks CLAUDE.md cascade and flags volatile content
between lines 31 and 150 — the cache-prefix window beyond TOK Pattern
A's top-30 territory. Volatile content anywhere in the cached prefix
forces a fresh cache write from that line down on every turn.
Volatile-pattern set extends TOK Pattern A with:
- shell-exec lines (! prefix) — common in CLAUDE.md to inject git/date
- ${VAR} substitutions — vary per-shell, defeat cache reuse
Severity: medium per finding. Skips lines 1-30 to avoid duplicating
Pattern A's range; CPS' value is in the 31-150 zone.
Wired into scan-orchestrator + scoring SCANNER_AREA_MAP. CPS shares
the "Token Efficiency" area with TOK; scoreByArea now deduplicates by
area name and combines counts across scanners contributing to the
same area, so the 9-area scorecard contract holds.
Fixtures volatile-mid-section/{volatile-line-60, volatile-line-200}
verify both positive (line 60) and out-of-window (line 200) cases.
[skip-docs] reason: v5 plan fences off README/CLAUDE.md badge updates
to Session 5; Forgejo pre-commit-docs-gate hook requires this tag.
Tests: 604 → 611 (+7).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
115 lines
4.3 KiB
JavaScript
115 lines
4.3 KiB
JavaScript
/**
|
||
* CPS Scanner — Cache-Prefix Stability Analyzer (v5 N3)
|
||
*
|
||
* Walks the CLAUDE.md cascade and flags volatile content anywhere in the
|
||
* cached prefix (≤ CACHED_PREFIX_LINES). Distinguishes from TOK Pattern A,
|
||
* which only inspects the top 30 lines: CPS catches a `!git log` at line 60
|
||
* or a `${TIMESTAMP}` at line 100. Volatile content anywhere in the cached
|
||
* prefix breaks Opus 4.7 prompt-cache reuse from that line forward.
|
||
*
|
||
* Volatile patterns extend the TOK set with shell-exec `!` prefix and
|
||
* `${VAR}` substitutions — both common cache-busters in real CLAUDE.md files.
|
||
*
|
||
* Finding ID: CA-CPS-NNN. Severity: medium.
|
||
*
|
||
* Zero external dependencies.
|
||
*/
|
||
|
||
import { readTextFile } from './lib/file-discovery.mjs';
|
||
import { finding, scannerResult } from './lib/output.mjs';
|
||
import { SEVERITY } from './lib/severity.mjs';
|
||
|
||
const SCANNER = 'CPS';
|
||
|
||
// Cache-prefix line threshold: content below this line is unlikely to be
|
||
// part of a stable cached prefix in typical sessions. The number is
|
||
// heuristic — the goal is to flag volatility that genuinely costs cache
|
||
// hits per turn, not to chase every inline date in a long backlog file.
|
||
const CACHED_PREFIX_LINES = 150;
|
||
|
||
// Volatile-pattern set (extends token-hotspots.mjs Pattern A).
|
||
const VOLATILE_PATTERNS = [
|
||
{ rx: /\{timestamp\}/i, label: '{timestamp} placeholder' },
|
||
{ rx: /\{uuid\}/i, label: '{uuid} placeholder' },
|
||
{ rx: /\{date\}/i, label: '{date} placeholder' },
|
||
{ rx: /\{session(?:_id)?\}/i, label: '{session_id} placeholder' },
|
||
{ rx: /\bactivity log\b/i, label: 'activity-log section' },
|
||
{ rx: /^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/, label: 'ISO timestamp at line start' },
|
||
{ rx: /^\s*\[\d{4}-\d{2}-\d{2}/, label: 'dated log line [YYYY-MM-DD ...]' },
|
||
// v5 N3 extensions:
|
||
{ rx: /^\s*!/, label: 'shell-exec line (! prefix)' },
|
||
{ rx: /\$\{[A-Z_][A-Z0-9_]*\}/, label: '${VAR} substitution' },
|
||
];
|
||
|
||
/**
|
||
* Scan content for volatile lines within the cached prefix window.
|
||
* Returns array of {line, label, snippet}.
|
||
*/
|
||
function findVolatileLines(content) {
|
||
const out = [];
|
||
if (!content) return out;
|
||
const lines = content.split('\n').slice(0, CACHED_PREFIX_LINES);
|
||
for (let i = 0; i < lines.length; i++) {
|
||
for (const { rx, label } of VOLATILE_PATTERNS) {
|
||
if (rx.test(lines[i])) {
|
||
out.push({
|
||
line: i + 1,
|
||
label,
|
||
snippet: lines[i].length > 120 ? lines[i].slice(0, 117) + '...' : lines[i],
|
||
});
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
return out;
|
||
}
|
||
|
||
/**
|
||
* Main scanner entry point.
|
||
*
|
||
* @param {string} targetPath
|
||
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>}} discovery
|
||
*/
|
||
export async function scan(targetPath, discovery) {
|
||
const start = Date.now();
|
||
const findings = [];
|
||
let filesScanned = 0;
|
||
|
||
for (const f of discovery.files) {
|
||
if (f.type !== 'claude-md') continue;
|
||
filesScanned++;
|
||
const content = await readTextFile(f.absPath);
|
||
if (!content) continue;
|
||
const volatile = findVolatileLines(content);
|
||
if (volatile.length === 0) continue;
|
||
|
||
// Skip volatility that's already covered by TOK Pattern A (lines 1–30) —
|
||
// CPS' value is in the 31–150 range. Pattern A handles 1–30.
|
||
const beyondTopThirty = volatile.filter(v => v.line > 30);
|
||
if (beyondTopThirty.length === 0) continue;
|
||
|
||
const evidence =
|
||
beyondTopThirty.slice(0, 5)
|
||
.map(v => `line ${v.line} (${v.label}): ${v.snippet}`)
|
||
.join('; ');
|
||
findings.push(finding({
|
||
scanner: SCANNER,
|
||
severity: SEVERITY.medium,
|
||
title: 'Volatile content inside cached prefix breaks reuse',
|
||
description:
|
||
`${f.relPath || f.absPath} contains ${beyondTopThirty.length} volatile ` +
|
||
`entr${beyondTopThirty.length === 1 ? 'y' : 'ies'} between lines 31 and ` +
|
||
`${CACHED_PREFIX_LINES}. The prompt cache covers the file's prefix; ` +
|
||
'any volatility forces a fresh cache write from that line down on every turn.',
|
||
file: f.absPath,
|
||
evidence,
|
||
recommendation:
|
||
'Move volatile sections (timestamps, !shell-exec, ${VAR} substitutions, dated logs) ' +
|
||
`below line ${CACHED_PREFIX_LINES} or extract them to an @import-ed file outside the ` +
|
||
'cached prefix. Stable content above, volatile content below.',
|
||
category: 'token-efficiency',
|
||
}));
|
||
}
|
||
|
||
return scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
|
||
}
|