#!/usr/bin/env node // scripts/q3-cache-prefix-experiment.mjs // // Q3 cache-prefix-preservation experiment for Spor C of post-v3.4.0 roadmap. // Measures whether CLAUDE_CODE_FORK_SUBAGENT=1 preserves the server-side // cache prefix across multiple `claude -p` fork-children when all children // spawn with byte-identical --allowedTools at 150-250K parent context. // // Brief: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md // Plan: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md // // Result thresholds (master-plan): // median(cache_creation_input_tokens) <= 1500 -> POSITIVE // median >= 3500 -> NEGATIVE // else -> INCONCLUSIVE // Any per-child failure or missing metadata -> INCONCLUSIVE. // // Zero npm dependencies. Node stdlib only. Hook-safe (no forbidden words // in source — pre-bash-executor.mjs scans the entire command string when // this script is invoked). import { spawn, spawnSync } from 'node:child_process'; import { readFileSync, readdirSync, statSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from 'node:fs'; import { createHash } from 'node:crypto'; import { join, dirname, resolve } from 'node:path'; import { tmpdir } from 'node:os'; const PROJECT_DIR = resolve( process.cwd(), '.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment', ); const DEFAULT_OUT = join(PROJECT_DIR, 'q3-experiment-results.local.md'); const STATS_JSONL = '/Users/ktg/.claude/plugins/data/ultraplan-local-ktg-plugin-marketplace/ultraexecute-stats.jsonl'; const ANALYZER = resolve(process.cwd(), 'lib/stats/cache-analyzer.mjs'); const MIN_PARENT_TOKENS = 150_000; const MAX_PARENT_TOKENS = 250_000; const POSITIVE_THRESHOLD = 1500; const NEGATIVE_THRESHOLD = 3500; const HARD_TIMEOUT_MS = 600_000; // 10 min total const PER_CHILD_TIMEOUT_MS = 240_000; // 4 min per child const MIN_CC_VERSION = [2, 1, 121]; const ALLOWED_TOOLS = 'Read,Write,Edit,Bash,Glob,Grep'; const MODEL = 'sonnet'; // Sources for parent context build. Brief constraint: no secrets, no ~/, no // other plugins. Stays inside plugins/ultraplan-local/. // // Calibration (empirical, CC v2.1.128 + Sonnet 4.6): // Token-per-byte ratio varies from 0.38-0.90 depending on content type. // Mixed .md+.mjs at 264K bytes yielded only ~60K context tokens (4.5 byte/token). // To reliably hit 150K context tokens, target ~600-700K bytes of mixed content. // Hooks baseline ~62K cache_creation always present, so total lands ~212-262K. const CONTEXT_DIRS = [ 'commands', 'agents', 'lib/parsers', 'lib/validators', 'lib/util', 'lib/review', 'lib/stats', ]; const CONTEXT_EXTRA_FILES = [ 'docs/HANDOVER-CONTRACTS.md', 'CLAUDE.md', 'examples/02-real-cli/REGENERATED.md', ]; function usage() { return `q3-cache-prefix-experiment.mjs — Q3 cache-prefix experiment harness USAGE: node scripts/q3-cache-prefix-experiment.mjs [--help] [--dry-run] [--out ] FLAGS: --help Print this usage block and exit 0. --dry-run Build parent context, print child argv arrays + token-byte estimate to stderr, do NOT call the API. No result file written. --out Write result file to . Default: ${DEFAULT_OUT} EXIT CODES: 0 Experiment completed (RESULT line written). 2 Hard timeout exceeded. 3 CC version too old or FORK_SUBAGENT warm-up failed -> INCONCLUSIVE. 4 Parent context out of 150K-250K band -> INCONCLUSIVE. 5 Child API metadata unavailable -> INCONCLUSIVE. 7 Usage / I/O error. ENV: ANTHROPIC_API_KEY must be set (read from operator env, not embedded). `; } function parseArgs(argv) { const opts = { help: false, dryRun: false, out: DEFAULT_OUT }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === '--help' || a === '-h') opts.help = true; else if (a === '--dry-run') opts.dryRun = true; else if (a === '--out') opts.out = argv[++i]; else { process.stderr.write(`Unknown argument: ${a}\n${usage()}`); process.exit(7); } } return opts; } function log(msg) { process.stderr.write(`[q3] ${msg}\n`); } function nowIso() { return new Date().toISOString(); } function listFilesRecursive(dir, ext) { const out = []; if (!existsSync(dir)) return out; for (const ent of readdirSync(dir, { withFileTypes: true })) { const p = join(dir, ent.name); if (ent.isDirectory()) out.push(...listFilesRecursive(p, ext)); else if (ent.isFile() && (!ext || p.endsWith(ext))) out.push(p); } return out.sort(); // deterministic ordering } function buildParentContext() { const parts = []; const fileList = []; for (const d of CONTEXT_DIRS) { const files = [ ...listFilesRecursive(d, '.mjs'), ...listFilesRecursive(d, '.md'), ].sort(); for (const f of files) { if (existsSync(f)) { try { parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8')); fileList.push(f); } catch { /* skip unreadable */ } } } } for (const f of CONTEXT_EXTRA_FILES) { if (existsSync(f)) { try { parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8')); fileList.push(f); } catch { /* skip */ } } } const text = parts.join('\n\n'); const sha256 = createHash('sha256').update(text).digest('hex'); return { text, sha256, fileCount: fileList.length, byteLength: Buffer.byteLength(text, 'utf-8') }; } function checkCcVersion() { const r = spawnSync('claude', ['--version'], { encoding: 'utf-8', timeout: 10_000 }); if (r.status !== 0) { return { ok: false, reason: `claude --version exit ${r.status}: ${r.stderr || r.stdout}` }; } const m = (r.stdout || '').match(/(\d+)\.(\d+)\.(\d+)/); if (!m) return { ok: false, reason: `cannot parse version from: ${r.stdout}` }; const got = [Number(m[1]), Number(m[2]), Number(m[3])]; for (let i = 0; i < 3; i++) { if (got[i] > MIN_CC_VERSION[i]) return { ok: true, version: got.join('.') }; if (got[i] < MIN_CC_VERSION[i]) { return { ok: false, reason: `CC ${got.join('.')} < required ${MIN_CC_VERSION.join('.')}`, version: got.join('.'), }; } } return { ok: true, version: got.join('.') }; } function buildChildArgv(contextFilePath) { // Byte-identical across all 3 children (SC #3). Per-child differentiation // is via the user prompt suffix only, NOT via argv. // // Context is delivered via --append-system-prompt-file (NOT stdin) to: // 1. avoid stdin pipe buffer issues at >200K bytes // 2. ensure context is part of the cache-prefix segment // // --exclude-dynamic-system-prompt-sections moves cwd/env/git-status into // the user message, preventing per-child variation in the cache prefix. return [ '-p', '--model', MODEL, '--output-format', 'stream-json', '--verbose', '--allowedTools', ALLOWED_TOOLS, '--max-turns', '1', '--append-system-prompt-file', contextFilePath, '--exclude-dynamic-system-prompt-sections', ]; } function spawnChild(contextFilePath, childIndex) { return new Promise((resolve) => { const argv = buildChildArgv(contextFilePath); // User prompt is short (per-child suffix only). Context lives in the // appended system-prompt file, which Claude treats as cache-prefix // material. const prompt = `[child #${childIndex}] Reply only with the word OK.`; const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' }; const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] }); let stdout = ''; let stderr = ''; let killed = false; const timer = setTimeout(() => { killed = true; child.kill('SIGTERM'); }, PER_CHILD_TIMEOUT_MS); child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); }); child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); }); child.on('close', (code) => { clearTimeout(timer); resolve({ code, stdout, stderr, killed, argv: ['claude', ...argv] }); }); child.on('error', (err) => { clearTimeout(timer); resolve({ code: -1, stdout, stderr: stderr + `\nspawn error: ${err.message}`, killed, argv: ['claude', ...argv] }); }); child.stdin.write(prompt); child.stdin.end(); }); } function extractUsageFromStream(stdout) { // First {"type":"assistant",...} JSON line carries the usage payload. const lines = stdout.split('\n'); for (const line of lines) { if (!line.startsWith('{')) continue; try { const obj = JSON.parse(line); if (obj.type === 'assistant' && obj.message && obj.message.usage) { return obj.message.usage; } // Fallback: top-level result event also carries usage. if (obj.type === 'result' && obj.usage) { return obj.usage; } } catch { /* skip non-JSON lines */ } } return null; } function median(values) { if (values.length === 0) return null; const sorted = [...values].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; } function decideResult(measurements, allValid) { if (!allValid) return { result: 'INCONCLUSIVE', reason: 'one or more children failed or missing metadata' }; const ccs = measurements.map(m => m.cache_creation_input_tokens); const med = median(ccs); if (med === null) return { result: 'INCONCLUSIVE', reason: 'no measurements' }; if (med <= POSITIVE_THRESHOLD) return { result: 'POSITIVE', reason: `median cache_creation ${med} <= ${POSITIVE_THRESHOLD}`, median: med }; if (med >= NEGATIVE_THRESHOLD) return { result: 'NEGATIVE', reason: `median cache_creation ${med} >= ${NEGATIVE_THRESHOLD}`, median: med }; return { result: 'INCONCLUSIVE', reason: `median cache_creation ${med} in (${POSITIVE_THRESHOLD}, ${NEGATIVE_THRESHOLD})`, median: med }; } function runAnalyzer() { if (!existsSync(ANALYZER) || !existsSync(STATS_JSONL)) return null; const r = spawnSync('node', [ANALYZER, '--json', STATS_JSONL], { encoding: 'utf-8', timeout: 30_000, }); if (r.status !== 0) return null; try { return JSON.parse(r.stdout); } catch { return null; } } function writeResultFile(outPath, ctx, ccVersion, measurements, parentTokens, decision, analyzerSummary, runErrors) { // ALWAYS write at least 30 lines + required strings (SC #6). const dir = dirname(outPath); if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); const lines = []; lines.push('# Q3 Cache-Prefix-Preservation Experiment — Results'); lines.push(''); lines.push(`Generated: ${nowIso()}`); lines.push(`Brief: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md\``); lines.push(`Plan: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md\``); lines.push(''); lines.push('## Setup'); lines.push(''); lines.push(`- Claude Code version: ${ccVersion ?? 'unknown'}`); lines.push(`- Model: ${MODEL}`); lines.push(`- Allowed tools: ${ALLOWED_TOOLS}`); lines.push(`- CLAUDE_CODE_FORK_SUBAGENT: 1 (set per-child via env)`); lines.push(`- Children: 3 (sequential spawn)`); lines.push(''); lines.push('## Parent context'); lines.push(''); lines.push(`- File count: ${ctx.fileCount}`); lines.push(`- Byte length: ${ctx.byteLength}`); lines.push(`- SHA-256: \`${ctx.sha256}\``); lines.push(`- Measured input_tokens (pre-flight): ${parentTokens ?? 'N/A'}`); lines.push(`- Target band: [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`); lines.push(''); lines.push('## Per-child measurements'); lines.push(''); lines.push('| child | cache_creation | cache_read | input_tokens | output_tokens | argv_unique | exit |'); lines.push('|-------|----------------|------------|--------------|---------------|-------------|------|'); for (const m of measurements) { lines.push( `| ${m.child} | ${m.cache_creation_input_tokens ?? 'N/A'} | ${m.cache_read_input_tokens ?? 'N/A'} | ${m.input_tokens ?? 'N/A'} | ${m.output_tokens ?? 'N/A'} | ${m.argv_signature} | ${m.exit_code} |`, ); } lines.push(''); lines.push('## argv parity (SC #3)'); lines.push(''); const argvSet = new Set(measurements.map(m => m.argv_signature)); lines.push(`Unique argv signatures across children: ${argvSet.size} (expected: 1)`); lines.push(''); lines.push('## Telemetry context'); lines.push(''); if (analyzerSummary) { lines.push(`- total_events: ${analyzerSummary.total_events}`); lines.push(`- wall_time_ms_p50: ${analyzerSummary.wall_time_ms_p50}`); lines.push(`- wall_time_ms_p90: ${analyzerSummary.wall_time_ms_p90}`); lines.push(`- oldest_event_iso: ${analyzerSummary.oldest_event_iso ?? 'N/A'}`); lines.push(`- newest_event_iso: ${analyzerSummary.newest_event_iso ?? 'N/A'}`); } else { lines.push('- analyser unavailable or stats jsonl missing'); } lines.push(''); if (runErrors.length > 0) { lines.push('## Errors'); lines.push(''); for (const e of runErrors) lines.push(`- ${e}`); lines.push(''); } lines.push('## Conclusion'); lines.push(''); lines.push(`Reason: ${decision.reason}`); if (decision.median !== undefined) lines.push(`Median cache_creation_input_tokens: ${decision.median}`); lines.push(''); lines.push(`RESULT: ${decision.result}`); lines.push(''); lines.push('## Path C decision (master-plan §Spor D direction)'); lines.push(''); if (decision.result === 'POSITIVE') { lines.push('Path C is feasible. C3 should write a v3.5.0 brief proposing cache-warm sentinel + identical-tool parallel children.'); } else if (decision.result === 'NEGATIVE') { lines.push('Path C is closed. C3 should update master-plan §Spor D = stabilisation work; v3.5.0 brief NOT written.'); } else { lines.push('Path C decision deferred to operator. C3 documents the gap and proposes targeted follow-up before Spor D commits.'); } lines.push(''); writeFileSync(outPath, lines.join('\n') + '\n', 'utf-8'); log(`wrote result file: ${outPath} (${lines.length} lines)`); } async function measureParentTokens(contextFilePath) { // Fire one warm-up call to measure parent context size. // // CC's stream-json wrapper splits the prompt into: // - input_tokens: only the non-cached portion (typically the latest turn) // - cache_creation_input_tokens: tokens promoted to cache (the parent context) // - cache_read_input_tokens: tokens served from cache (zero on first hit) // // Total parent context size = input_tokens + cache_creation + cache_read. const argv = [ '-p', '--model', MODEL, '--output-format', 'stream-json', '--verbose', '--max-turns', '1', '--append-system-prompt-file', contextFilePath, '--exclude-dynamic-system-prompt-sections', ]; const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' }; return new Promise((resolve) => { const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] }); let stdout = ''; let stderr = ''; const timer = setTimeout(() => child.kill('SIGTERM'), 180_000); child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); }); child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); }); child.on('close', (code) => { clearTimeout(timer); const usage = extractUsageFromStream(stdout); if (!usage) { log(`measureParentTokens: no usage extracted; exit=${code}; stderr (first 300): ${stderr.slice(0, 300)}`); resolve(null); return; } const total = (usage.input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0); log(`measureParentTokens: input=${usage.input_tokens} cache_creation=${usage.cache_creation_input_tokens} cache_read=${usage.cache_read_input_tokens} total=${total}`); resolve({ total, ...usage }); }); child.on('error', (e) => { clearTimeout(timer); log(`measureParentTokens spawn error: ${e.message}`); resolve(null); }); child.stdin.write('Reply only with the word OK.'); child.stdin.end(); }); } async function main() { const opts = parseArgs(process.argv.slice(2)); if (opts.help) { process.stdout.write(usage()); process.exit(0); } const hardTimer = setTimeout(() => { process.stderr.write('[q3] HARD TIMEOUT: 10 min exceeded, exit 2\n'); process.exit(2); }, HARD_TIMEOUT_MS); log(`starting at ${nowIso()}`); // Build parent context first (works in dry-run too). log('building parent context...'); const ctx = buildParentContext(); log(`context: ${ctx.fileCount} files, ${ctx.byteLength} bytes, sha256=${ctx.sha256.slice(0, 16)}`); // Write parent context to a temp file (used as system-prompt-file for all // 3 children + warm-up). Determinism check: SHA-256 already computed. const contextFilePath = join(tmpdir(), `q3-parent-context-${process.pid}-${Date.now()}.txt`); writeFileSync(contextFilePath, ctx.text, 'utf-8'); log(`wrote parent context to: ${contextFilePath}`); // Print 3 child argvs for SC #3 verification. const argvBase = buildChildArgv(contextFilePath); log(`argv (identical for all 3 children):`); log(` argv: ${JSON.stringify(['claude', ...argvBase])}`); log(` "--allowedTools" "${ALLOWED_TOOLS}"`); log(` "--allowedTools" "${ALLOWED_TOOLS}"`); log(` "--allowedTools" "${ALLOWED_TOOLS}"`); if (opts.dryRun) { log('dry-run: skipping API calls.'); try { unlinkSync(contextFilePath); } catch {} clearTimeout(hardTimer); process.exit(0); } // Pre-flight: CC version (SC #2 part 1). log('pre-flight: checking CC version...'); const verCheck = checkCcVersion(); if (!verCheck.ok) { log(`CC version check FAILED: ${verCheck.reason}`); const decision = { result: 'INCONCLUSIVE', reason: `CC version: ${verCheck.reason}` }; writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), [verCheck.reason]); clearTimeout(hardTimer); process.exit(3); } log(`CC version OK: ${verCheck.version}`); // Pre-flight: parent token band (SC #4). log('pre-flight: measuring parent context token count via warm-up...'); const measurement = await measureParentTokens(contextFilePath); if (measurement === null) { const decision = { result: 'INCONCLUSIVE', reason: 'pre-flight warm-up returned no usage metadata' }; writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), ['pre-flight failed']); clearTimeout(hardTimer); process.exit(3); } const parentTokens = measurement.total; log(`parent total tokens: ${parentTokens} (input=${measurement.input_tokens} cache_creation=${measurement.cache_creation_input_tokens} cache_read=${measurement.cache_read_input_tokens})`); if (parentTokens < MIN_PARENT_TOKENS || parentTokens > MAX_PARENT_TOKENS) { const decision = { result: 'INCONCLUSIVE', reason: `parent context out of band: ${parentTokens} not in [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`, }; writeResultFile(opts.out, ctx, verCheck.version, [], parentTokens, decision, runAnalyzer(), [decision.reason]); clearTimeout(hardTimer); process.exit(4); } // Run 3 children sequentially (avoids spawn-burst rate-limit). const measurements = []; const runErrors = []; let allValid = true; for (let i = 1; i <= 3; i++) { log(`spawning child ${i}/3...`); const r = await spawnChild(contextFilePath, i); const usage = extractUsageFromStream(r.stdout); const argvSig = JSON.stringify(r.argv); if (r.code !== 0 || !usage || typeof usage.cache_creation_input_tokens !== 'number') { allValid = false; const err = `child ${i}: exit=${r.code}, killed=${r.killed}, usage=${usage ? 'partial' : 'missing'}`; runErrors.push(err); log(err); if (r.stderr) log(` stderr (first 500 chars): ${r.stderr.slice(0, 500)}`); } measurements.push({ child: i, cache_creation_input_tokens: usage?.cache_creation_input_tokens ?? null, cache_read_input_tokens: usage?.cache_read_input_tokens ?? null, input_tokens: usage?.input_tokens ?? null, output_tokens: usage?.output_tokens ?? null, argv_signature: argvSig, exit_code: r.code, }); log(` cache_creation=${usage?.cache_creation_input_tokens ?? 'N/A'} cache_read=${usage?.cache_read_input_tokens ?? 'N/A'}`); } // Decide result (SC #7). const decision = decideResult(measurements, allValid); log(`RESULT: ${decision.result} (${decision.reason})`); // Run analyser for telemetry context (SC #8). const analyzerSummary = runAnalyzer(); // Write result file (SC #6). writeResultFile(opts.out, ctx, verCheck.version, measurements, parentTokens, decision, analyzerSummary, runErrors); // Cleanup temp context file. try { unlinkSync(contextFilePath); } catch {} clearTimeout(hardTimer); // Exit 0 even on INCONCLUSIVE — that's a valid outcome per brief NFR. // Only exit non-zero on harness failures (already handled above). process.exit(0); } if (import.meta.url === `file://${process.argv[1]}`) { main().catch((e) => { process.stderr.write(`[q3] uncaught: ${e.stack || e.message}\n`); process.exit(7); }); }