feat(config-audit): cache-prefix stability scanner CPS (v5 N3) [skip-docs]

New CPS scanner walks CLAUDE.md cascade and flags volatile content
between lines 31 and 150 — the cache-prefix window beyond TOK Pattern
A's top-30 territory. Volatile content anywhere in the cached prefix
forces a fresh cache write from that line down on every turn.

Volatile-pattern set extends TOK Pattern A with:
- shell-exec lines (! prefix) — common in CLAUDE.md to inject git/date
- ${VAR} substitutions — vary per-shell, defeat cache reuse

Severity: medium per finding. Skips lines 1-30 to avoid duplicating
Pattern A's range; CPS' value is in the 31-150 zone.

Wired into scan-orchestrator + scoring SCANNER_AREA_MAP. CPS shares
the "Token Efficiency" area with TOK; scoreByArea now deduplicates by
area name and combines counts across scanners contributing to the
same area, so the 9-area scorecard contract holds.

Fixtures volatile-mid-section/{volatile-line-60, volatile-line-200}
verify both positive (line 60) and out-of-window (line 200) cases.

[skip-docs] reason: v5 plan fences off README/CLAUDE.md badge updates
to Session 5; Forgejo pre-commit-docs-gate hook requires this tag.

Tests: 604 → 611 (+7).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 07:37:54 +02:00
commit 65087e624f
6 changed files with 517 additions and 9 deletions

View file

@ -0,0 +1,115 @@
/**
* CPS Scanner Cache-Prefix Stability Analyzer (v5 N3)
*
* Walks the CLAUDE.md cascade and flags volatile content anywhere in the
* cached prefix ( CACHED_PREFIX_LINES). Distinguishes from TOK Pattern A,
* which only inspects the top 30 lines: CPS catches a `!git log` at line 60
* or a `${TIMESTAMP}` at line 100. Volatile content anywhere in the cached
* prefix breaks Opus 4.7 prompt-cache reuse from that line forward.
*
* Volatile patterns extend the TOK set with shell-exec `!` prefix and
* `${VAR}` substitutions both common cache-busters in real CLAUDE.md files.
*
* Finding ID: CA-CPS-NNN. Severity: medium.
*
* Zero external dependencies.
*/
import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
const SCANNER = 'CPS';
// Cache-prefix line threshold: content below this line is unlikely to be
// part of a stable cached prefix in typical sessions. The number is
// heuristic — the goal is to flag volatility that genuinely costs cache
// hits per turn, not to chase every inline date in a long backlog file.
const CACHED_PREFIX_LINES = 150;
// Volatile-pattern set (extends token-hotspots.mjs Pattern A).
const VOLATILE_PATTERNS = [
{ rx: /\{timestamp\}/i, label: '{timestamp} placeholder' },
{ rx: /\{uuid\}/i, label: '{uuid} placeholder' },
{ rx: /\{date\}/i, label: '{date} placeholder' },
{ rx: /\{session(?:_id)?\}/i, label: '{session_id} placeholder' },
{ rx: /\bactivity log\b/i, label: 'activity-log section' },
{ rx: /^\s*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/, label: 'ISO timestamp at line start' },
{ rx: /^\s*\[\d{4}-\d{2}-\d{2}/, label: 'dated log line [YYYY-MM-DD ...]' },
// v5 N3 extensions:
{ rx: /^\s*!/, label: 'shell-exec line (! prefix)' },
{ rx: /\$\{[A-Z_][A-Z0-9_]*\}/, label: '${VAR} substitution' },
];
/**
* Scan content for volatile lines within the cached prefix window.
* Returns array of {line, label, snippet}.
*/
function findVolatileLines(content) {
const out = [];
if (!content) return out;
const lines = content.split('\n').slice(0, CACHED_PREFIX_LINES);
for (let i = 0; i < lines.length; i++) {
for (const { rx, label } of VOLATILE_PATTERNS) {
if (rx.test(lines[i])) {
out.push({
line: i + 1,
label,
snippet: lines[i].length > 120 ? lines[i].slice(0, 117) + '...' : lines[i],
});
break;
}
}
}
return out;
}
/**
* Main scanner entry point.
*
* @param {string} targetPath
* @param {{files: Array<{absPath:string, relPath:string, type:string, scope:string, size:number}>}} discovery
*/
export async function scan(targetPath, discovery) {
const start = Date.now();
const findings = [];
let filesScanned = 0;
for (const f of discovery.files) {
if (f.type !== 'claude-md') continue;
filesScanned++;
const content = await readTextFile(f.absPath);
if (!content) continue;
const volatile = findVolatileLines(content);
if (volatile.length === 0) continue;
// Skip volatility that's already covered by TOK Pattern A (lines 130) —
// CPS' value is in the 31150 range. Pattern A handles 130.
const beyondTopThirty = volatile.filter(v => v.line > 30);
if (beyondTopThirty.length === 0) continue;
const evidence =
beyondTopThirty.slice(0, 5)
.map(v => `line ${v.line} (${v.label}): ${v.snippet}`)
.join('; ');
findings.push(finding({
scanner: SCANNER,
severity: SEVERITY.medium,
title: 'Volatile content inside cached prefix breaks reuse',
description:
`${f.relPath || f.absPath} contains ${beyondTopThirty.length} volatile ` +
`entr${beyondTopThirty.length === 1 ? 'y' : 'ies'} between lines 31 and ` +
`${CACHED_PREFIX_LINES}. The prompt cache covers the file's prefix; ` +
'any volatility forces a fresh cache write from that line down on every turn.',
file: f.absPath,
evidence,
recommendation:
'Move volatile sections (timestamps, !shell-exec, ${VAR} substitutions, dated logs) ' +
`below line ${CACHED_PREFIX_LINES} or extract them to an @import-ed file outside the ` +
'cached prefix. Stable content above, volatile content below.',
category: 'token-efficiency',
}));
}
return scannerResult(SCANNER, 'ok', findings, filesScanned, Date.now() - start);
}

View file

@ -151,6 +151,7 @@ const SCANNER_AREA_MAP = {
CNF: 'Conflicts',
GAP: 'Feature Coverage',
TOK: 'Token Efficiency',
CPS: 'Token Efficiency',
};
/**
@ -181,21 +182,35 @@ function severityPenalty(counts) {
* @returns {{ areas: Array<{ id: string, name: string, grade: string, score: number, findingCount: number }>, overallGrade: string, scoringVersion: string }}
*/
export function scoreByArea(scannerResults) {
const areas = [];
// Group scanner results by area name so multiple scanners that share an area
// (e.g. TOK + CPS both → "Token Efficiency") produce one combined row.
const grouped = new Map();
for (const result of scannerResults) {
const name = SCANNER_AREA_MAP[result.scanner] || result.scanner;
const findingCount = result.findings.length;
if (!grouped.has(name)) grouped.set(name, []);
grouped.get(name).push(result);
}
const areas = [];
for (const [name, results] of grouped) {
const findings = results.flatMap(r => r.findings || []);
const findingCount = findings.length;
let score;
if (result.scanner === 'GAP') {
const util = calculateUtilization(result.findings);
if (results.some(r => r.scanner === 'GAP')) {
// GAP scoring uses utilization, not severity penalty
const util = calculateUtilization(findings);
score = util.score;
} else {
// v5 severity-weighted: penalty proportional to a per-scanner budget.
// maxBudget = max(10, findingCount * 4) — adding more lows doesn't crater the
// grade, but a single high-severity finding consumes a large fraction of budget.
const counts = result.counts || {};
// v5 severity-weighted: penalty proportional to a per-area budget.
// Combine counts across all scanners contributing to this area.
const counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
for (const r of results) {
for (const k of Object.keys(counts)) {
counts[k] += (r.counts && r.counts[k]) || 0;
}
}
const penalty = severityPenalty(counts);
const maxBudget = Math.max(10, findingCount * 4);
const passRate = Math.max(0, 100 - (penalty / maxBudget) * 100);

View file

@ -24,6 +24,7 @@ import { scan as scanImports } from './import-resolver.mjs';
import { scan as scanConflicts } from './conflict-detector.mjs';
import { scan as scanGap } from './feature-gap-scanner.mjs';
import { scan as scanTokenHotspots } from './token-hotspots.mjs';
import { scan as scanCachePrefix } from './cache-prefix-scanner.mjs';
// Directory names that identify test fixture / example directories
const FIXTURE_DIR_NAMES = ['tests', 'examples', '__tests__', 'test-fixtures'];
@ -55,6 +56,7 @@ const SCANNERS = [
{ name: 'CNF', fn: scanConflicts, label: 'Conflict Detector' },
{ name: 'GAP', fn: scanGap, label: 'Feature Gap Scanner' },
{ name: 'TOK', fn: scanTokenHotspots, label: 'Token Hotspots' },
{ name: 'CPS', fn: scanCachePrefix, label: 'Cache-Prefix Stability' },
];
/**

View file

@ -0,0 +1,218 @@
# Project
Stable preamble.
Body line 4.
Body line 5.
Body line 6.
Body line 7.
Body line 8.
Body line 9.
Body line 10.
Body line 11.
Body line 12.
Body line 13.
Body line 14.
Body line 15.
Body line 16.
Body line 17.
Body line 18.
Body line 19.
Body line 20.
Body line 21.
Body line 22.
Body line 23.
Body line 24.
Body line 25.
Body line 26.
Body line 27.
Body line 28.
Body line 29.
Body line 30.
Body line 31.
Body line 32.
Body line 33.
Body line 34.
Body line 35.
Body line 36.
Body line 37.
Body line 38.
Body line 39.
Body line 40.
Body line 41.
Body line 42.
Body line 43.
Body line 44.
Body line 45.
Body line 46.
Body line 47.
Body line 48.
Body line 49.
Body line 50.
Body line 51.
Body line 52.
Body line 53.
Body line 54.
Body line 55.
Body line 56.
Body line 57.
Body line 58.
Body line 59.
Body line 60.
Body line 61.
Body line 62.
Body line 63.
Body line 64.
Body line 65.
Body line 66.
Body line 67.
Body line 68.
Body line 69.
Body line 70.
Body line 71.
Body line 72.
Body line 73.
Body line 74.
Body line 75.
Body line 76.
Body line 77.
Body line 78.
Body line 79.
Body line 80.
Body line 81.
Body line 82.
Body line 83.
Body line 84.
Body line 85.
Body line 86.
Body line 87.
Body line 88.
Body line 89.
Body line 90.
Body line 91.
Body line 92.
Body line 93.
Body line 94.
Body line 95.
Body line 96.
Body line 97.
Body line 98.
Body line 99.
Body line 100.
Body line 101.
Body line 102.
Body line 103.
Body line 104.
Body line 105.
Body line 106.
Body line 107.
Body line 108.
Body line 109.
Body line 110.
Body line 111.
Body line 112.
Body line 113.
Body line 114.
Body line 115.
Body line 116.
Body line 117.
Body line 118.
Body line 119.
Body line 120.
Body line 121.
Body line 122.
Body line 123.
Body line 124.
Body line 125.
Body line 126.
Body line 127.
Body line 128.
Body line 129.
Body line 130.
Body line 131.
Body line 132.
Body line 133.
Body line 134.
Body line 135.
Body line 136.
Body line 137.
Body line 138.
Body line 139.
Body line 140.
Body line 141.
Body line 142.
Body line 143.
Body line 144.
Body line 145.
Body line 146.
Body line 147.
Body line 148.
Body line 149.
Body line 150.
Body line 151.
Body line 152.
Body line 153.
Body line 154.
Body line 155.
Body line 156.
Body line 157.
Body line 158.
Body line 159.
Body line 160.
Body line 161.
Body line 162.
Body line 163.
Body line 164.
Body line 165.
Body line 166.
Body line 167.
Body line 168.
Body line 169.
Body line 170.
Body line 171.
Body line 172.
Body line 173.
Body line 174.
Body line 175.
Body line 176.
Body line 177.
Body line 178.
Body line 179.
Body line 180.
Body line 181.
Body line 182.
Body line 183.
Body line 184.
Body line 185.
Body line 186.
Body line 187.
Body line 188.
Body line 189.
Body line 190.
Body line 191.
Body line 192.
Body line 193.
Body line 194.
Body line 195.
Body line 196.
Body line 197.
Body line 198.
[2026-04-15] Inline date in body — not above cache.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.

View file

@ -0,0 +1,79 @@
# Project
Stable preamble.
Body line 4.
Body line 5.
Body line 6.
Body line 7.
Body line 8.
Body line 9.
Body line 10.
Body line 11.
Body line 12.
Body line 13.
Body line 14.
Body line 15.
Body line 16.
Body line 17.
Body line 18.
Body line 19.
Body line 20.
Body line 21.
Body line 22.
Body line 23.
Body line 24.
Body line 25.
Body line 26.
Body line 27.
Body line 28.
Body line 29.
Body line 30.
Body line 31.
Body line 32.
Body line 33.
Body line 34.
Body line 35.
Body line 36.
Body line 37.
Body line 38.
Body line 39.
Body line 40.
Body line 41.
Body line 42.
Body line 43.
Body line 44.
Body line 45.
Body line 46.
Body line 47.
Body line 48.
Body line 49.
Body line 50.
Body line 51.
Body line 52.
Body line 53.
Body line 54.
Body line 55.
Body line 56.
Body line 57.
Body line 58.
Body line 59.
!git log -5 # volatile shell-exec at line 60
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.
More body.

View file

@ -0,0 +1,79 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { resetCounter } from '../../scanners/lib/output.mjs';
import { scan } from '../../scanners/cache-prefix-scanner.mjs';
import { discoverConfigFiles } from '../../scanners/lib/file-discovery.mjs';
const __dirname = fileURLToPath(new URL('.', import.meta.url));
const FIXTURES = resolve(__dirname, '../fixtures');
async function runScanner(fixtureName) {
resetCounter();
const path = resolve(FIXTURES, fixtureName);
const discovery = await discoverConfigFiles(path);
return scan(path, discovery);
}
describe('CPS scanner — basic structure', () => {
it('reports scanner prefix CPS', async () => {
const result = await runScanner('volatile-mid-section/volatile-line-60');
assert.equal(result.scanner, 'CPS');
});
it('finding IDs match CA-CPS-NNN pattern', async () => {
const result = await runScanner('volatile-mid-section/volatile-line-60');
for (const f of result.findings) {
assert.match(f.id, /^CA-CPS-\d{3}$/);
}
});
});
describe('CPS scanner — volatile content within cached prefix', () => {
it('flags !git log at line 60 (medium severity)', async () => {
const result = await runScanner('volatile-mid-section/volatile-line-60');
const f = result.findings.find(x => /volatile content inside cached prefix/i.test(x.title || ''));
assert.ok(f, `expected volatile-prefix finding; got: ${result.findings.map(x => x.title).join(' | ')}`);
assert.equal(f.severity, 'medium', `expected medium, got ${f.severity}`);
assert.match(String(f.evidence || ''), /line 60/);
assert.match(String(f.evidence || ''), /shell-exec/i);
});
});
describe('CPS scanner — volatile content beyond cache window', () => {
it('does NOT flag volatility at line 200+ (outside 150-line window)', async () => {
const result = await runScanner('volatile-mid-section/volatile-line-200');
const f = result.findings.find(x => /volatile content inside cached prefix/i.test(x.title || ''));
assert.equal(f, undefined,
`expected no finding for line-200 fixture; got: ${f?.title}`);
});
});
describe('CPS scanner — does not duplicate TOK Pattern A territory', () => {
it('volatility at lines 130 is left for TOK Pattern A (no CPS finding)', async () => {
// The opus-47/cache-breaking fixture has volatile content at the very top.
// CPS skips lines 130 to avoid duplicating Pattern A's territory.
const result = await runScanner('opus-47/cache-breaking');
const f = result.findings.find(x => /volatile content inside cached prefix/i.test(x.title || ''));
assert.equal(f, undefined,
`expected no CPS finding when volatility is only in lines 130 (Pattern A's range)`);
});
});
describe('CPS scanner — orchestrator wiring', () => {
it('CPS appears in scan-orchestrator scanner list', async () => {
const orch = await import('../../scanners/scan-orchestrator.mjs');
const path = resolve(FIXTURES, 'volatile-mid-section/volatile-line-60');
const env = await orch.runAllScanners(path, { filterFixtures: false });
const cps = env.scanners.find(r => r.scanner === 'CPS');
assert.ok(cps, `expected CPS in orchestrator results; got: ${env.scanners.map(r => r.scanner).join(', ')}`);
});
it('CPS findings carry the token-efficiency category', async () => {
const result = await runScanner('volatile-mid-section/volatile-line-60');
const f = result.findings.find(x => /volatile content inside cached prefix/i.test(x.title || ''));
assert.ok(f);
assert.equal(f.category, 'token-efficiency');
});
});