feat(spor-c): add Q3 cache-prefix experiment harness + analyser [skip-docs]
Implements Spor C of post-v3.4.0 roadmap. Zero-dep harness measures CLAUDE_CODE_FORK_SUBAGENT cache-prefix preservation across 3 fork-children with identical --allowedTools at 150-250K parent context. Harness uses --append-system-prompt-file (avoids stdin buffer cap at >200K bytes) + --exclude-dynamic-system-prompt-sections (prevents per-child cache-prefix divergence from cwd/env/git-status). Companion analyser summarizes accumulated ultraexecute-stats.jsonl: percentile wall_time (p50/p90/max), total events, ISO time range. Output: JSON via --json <path> CLI shim. Result file is gitignored (*.local.md). Master-plan thresholds (<= 1.5K positive / >= 3.5K negative) gate the v3.5.0 Path C decision. Brief: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md Master-plan: .claude/projects/2026-05-04-post-v3.4.0-roadmap/master-plan.md
This commit is contained in:
parent
4e78dc77d7
commit
f0fd129d3d
2 changed files with 657 additions and 0 deletions
117
plugins/ultraplan-local/lib/stats/cache-analyzer.mjs
Normal file
117
plugins/ultraplan-local/lib/stats/cache-analyzer.mjs
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
// lib/stats/cache-analyzer.mjs
|
||||||
|
// Summarizes ultraexecute-stats.jsonl: total events, percentile wall times,
|
||||||
|
// time range. Companion to event-emit.mjs (which produces the jsonl).
|
||||||
|
//
|
||||||
|
// Designed for /ultraplan-local Spor C: gives C3 telemetry context when
|
||||||
|
// interpreting Q3 experiment numbers (5+ weeks of accumulated data on the
|
||||||
|
// operator's machine as of 2026-05-04).
|
||||||
|
//
|
||||||
|
// Zero npm dependencies. Node stdlib only.
|
||||||
|
|
||||||
|
import { readFileSync, existsSync } from 'node:fs';
|
||||||
|
|
||||||
|
function usage() {
|
||||||
|
return `cache-analyzer.mjs — summarize ultraexecute-stats.jsonl
|
||||||
|
|
||||||
|
USAGE:
|
||||||
|
node lib/stats/cache-analyzer.mjs --json <path-to-jsonl>
|
||||||
|
|
||||||
|
OUTPUT (stdout, JSON):
|
||||||
|
{
|
||||||
|
"total_events": <n>,
|
||||||
|
"events_with_duration": <n>,
|
||||||
|
"wall_time_ms_p50": <ms or null>,
|
||||||
|
"wall_time_ms_p90": <ms or null>,
|
||||||
|
"wall_time_ms_max": <ms or null>,
|
||||||
|
"unique_event_names": [...],
|
||||||
|
"oldest_event_iso": "<iso8601 or null>",
|
||||||
|
"newest_event_iso": "<iso8601 or null>"
|
||||||
|
}
|
||||||
|
|
||||||
|
EXIT:
|
||||||
|
0 success, 1 file not found / read error, 2 usage error.
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function summarize(lines) {
|
||||||
|
const summary = {
|
||||||
|
total_events: 0,
|
||||||
|
events_with_duration: 0,
|
||||||
|
wall_time_ms_p50: null,
|
||||||
|
wall_time_ms_p90: null,
|
||||||
|
wall_time_ms_max: null,
|
||||||
|
unique_event_names: [],
|
||||||
|
oldest_event_iso: null,
|
||||||
|
newest_event_iso: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const durations = [];
|
||||||
|
const names = new Set();
|
||||||
|
let oldestMs = null;
|
||||||
|
let newestMs = null;
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
const trimmed = line.trim();
|
||||||
|
if (trimmed === '') continue;
|
||||||
|
let obj;
|
||||||
|
try { obj = JSON.parse(trimmed); }
|
||||||
|
catch { continue; }
|
||||||
|
summary.total_events++;
|
||||||
|
if (obj.event && typeof obj.event === 'string') names.add(obj.event);
|
||||||
|
else if (obj.name && typeof obj.name === 'string') names.add(obj.name);
|
||||||
|
if (typeof obj.duration_ms === 'number' && Number.isFinite(obj.duration_ms)) {
|
||||||
|
durations.push(obj.duration_ms);
|
||||||
|
summary.events_with_duration++;
|
||||||
|
}
|
||||||
|
const tsField = obj.timestamp || obj.ts || obj.iso || obj.time;
|
||||||
|
if (typeof tsField === 'string') {
|
||||||
|
const t = Date.parse(tsField);
|
||||||
|
if (!Number.isNaN(t)) {
|
||||||
|
if (oldestMs === null || t < oldestMs) oldestMs = t;
|
||||||
|
if (newestMs === null || t > newestMs) newestMs = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (durations.length > 0) {
|
||||||
|
durations.sort((a, b) => a - b);
|
||||||
|
const p50Idx = Math.floor(durations.length * 0.5);
|
||||||
|
const p90Idx = Math.floor(durations.length * 0.9);
|
||||||
|
summary.wall_time_ms_p50 = durations[Math.min(p50Idx, durations.length - 1)];
|
||||||
|
summary.wall_time_ms_p90 = durations[Math.min(p90Idx, durations.length - 1)];
|
||||||
|
summary.wall_time_ms_max = durations[durations.length - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
summary.unique_event_names = [...names].sort();
|
||||||
|
if (oldestMs !== null) summary.oldest_event_iso = new Date(oldestMs).toISOString();
|
||||||
|
if (newestMs !== null) summary.newest_event_iso = new Date(newestMs).toISOString();
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function summarizeFile(path) {
|
||||||
|
if (!existsSync(path)) {
|
||||||
|
return { error: `file not found: ${path}` };
|
||||||
|
}
|
||||||
|
let text;
|
||||||
|
try { text = readFileSync(path, 'utf-8'); }
|
||||||
|
catch (e) { return { error: `read error: ${e.message}` }; }
|
||||||
|
return summarize(text.split('\n'));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const jsonIdx = args.indexOf('--json');
|
||||||
|
if (jsonIdx === -1 || !args[jsonIdx + 1]) {
|
||||||
|
process.stderr.write(usage());
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
const path = args[jsonIdx + 1];
|
||||||
|
const result = summarizeFile(path);
|
||||||
|
if (result.error) {
|
||||||
|
process.stderr.write(`cache-analyzer: ${result.error}\n`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
540
plugins/ultraplan-local/scripts/q3-cache-prefix-experiment.mjs
Normal file
540
plugins/ultraplan-local/scripts/q3-cache-prefix-experiment.mjs
Normal file
|
|
@ -0,0 +1,540 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
// scripts/q3-cache-prefix-experiment.mjs
|
||||||
|
//
|
||||||
|
// Q3 cache-prefix-preservation experiment for Spor C of post-v3.4.0 roadmap.
|
||||||
|
// Measures whether CLAUDE_CODE_FORK_SUBAGENT=1 preserves the server-side
|
||||||
|
// cache prefix across multiple `claude -p` fork-children when all children
|
||||||
|
// spawn with byte-identical --allowedTools at 150-250K parent context.
|
||||||
|
//
|
||||||
|
// Brief: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md
|
||||||
|
// Plan: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md
|
||||||
|
//
|
||||||
|
// Result thresholds (master-plan):
|
||||||
|
// median(cache_creation_input_tokens) <= 1500 -> POSITIVE
|
||||||
|
// median >= 3500 -> NEGATIVE
|
||||||
|
// else -> INCONCLUSIVE
|
||||||
|
// Any per-child failure or missing metadata -> INCONCLUSIVE.
|
||||||
|
//
|
||||||
|
// Zero npm dependencies. Node stdlib only. Hook-safe (no forbidden words
|
||||||
|
// in source — pre-bash-executor.mjs scans the entire command string when
|
||||||
|
// this script is invoked).
|
||||||
|
|
||||||
|
import { spawn, spawnSync } from 'node:child_process';
|
||||||
|
import { readFileSync, readdirSync, statSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from 'node:fs';
|
||||||
|
import { createHash } from 'node:crypto';
|
||||||
|
import { join, dirname, resolve } from 'node:path';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
|
||||||
|
const PROJECT_DIR = resolve(
|
||||||
|
process.cwd(),
|
||||||
|
'.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment',
|
||||||
|
);
|
||||||
|
const DEFAULT_OUT = join(PROJECT_DIR, 'q3-experiment-results.local.md');
|
||||||
|
const STATS_JSONL = '/Users/ktg/.claude/plugins/data/ultraplan-local-ktg-plugin-marketplace/ultraexecute-stats.jsonl';
|
||||||
|
const ANALYZER = resolve(process.cwd(), 'lib/stats/cache-analyzer.mjs');
|
||||||
|
|
||||||
|
const MIN_PARENT_TOKENS = 150_000;
|
||||||
|
const MAX_PARENT_TOKENS = 250_000;
|
||||||
|
const POSITIVE_THRESHOLD = 1500;
|
||||||
|
const NEGATIVE_THRESHOLD = 3500;
|
||||||
|
const HARD_TIMEOUT_MS = 600_000; // 10 min total
|
||||||
|
const PER_CHILD_TIMEOUT_MS = 240_000; // 4 min per child
|
||||||
|
const MIN_CC_VERSION = [2, 1, 121];
|
||||||
|
const ALLOWED_TOOLS = 'Read,Write,Edit,Bash,Glob,Grep';
|
||||||
|
const MODEL = 'sonnet';
|
||||||
|
|
||||||
|
// Sources for parent context build. Brief constraint: no secrets, no ~/, no
|
||||||
|
// other plugins. Stays inside plugins/ultraplan-local/.
|
||||||
|
//
|
||||||
|
// Calibration (empirical, CC v2.1.128 + Sonnet 4.6):
|
||||||
|
// Token-per-byte ratio varies from 0.38-0.90 depending on content type.
|
||||||
|
// Mixed .md+.mjs at 264K bytes yielded only ~60K context tokens (4.5 byte/token).
|
||||||
|
// To reliably hit 150K context tokens, target ~600-700K bytes of mixed content.
|
||||||
|
// Hooks baseline ~62K cache_creation always present, so total lands ~212-262K.
|
||||||
|
const CONTEXT_DIRS = [
|
||||||
|
'commands',
|
||||||
|
'agents',
|
||||||
|
'lib/parsers',
|
||||||
|
'lib/validators',
|
||||||
|
'lib/util',
|
||||||
|
'lib/review',
|
||||||
|
'lib/stats',
|
||||||
|
];
|
||||||
|
const CONTEXT_EXTRA_FILES = [
|
||||||
|
'docs/HANDOVER-CONTRACTS.md',
|
||||||
|
'CLAUDE.md',
|
||||||
|
'examples/02-real-cli/REGENERATED.md',
|
||||||
|
];
|
||||||
|
|
||||||
|
function usage() {
|
||||||
|
return `q3-cache-prefix-experiment.mjs — Q3 cache-prefix experiment harness
|
||||||
|
|
||||||
|
USAGE:
|
||||||
|
node scripts/q3-cache-prefix-experiment.mjs [--help] [--dry-run] [--out <path>]
|
||||||
|
|
||||||
|
FLAGS:
|
||||||
|
--help Print this usage block and exit 0.
|
||||||
|
--dry-run Build parent context, print child argv arrays + token-byte
|
||||||
|
estimate to stderr, do NOT call the API. No result file written.
|
||||||
|
--out <path> Write result file to <path>. Default:
|
||||||
|
${DEFAULT_OUT}
|
||||||
|
|
||||||
|
EXIT CODES:
|
||||||
|
0 Experiment completed (RESULT line written).
|
||||||
|
2 Hard timeout exceeded.
|
||||||
|
3 CC version too old or FORK_SUBAGENT warm-up failed -> INCONCLUSIVE.
|
||||||
|
4 Parent context out of 150K-250K band -> INCONCLUSIVE.
|
||||||
|
5 Child API metadata unavailable -> INCONCLUSIVE.
|
||||||
|
7 Usage / I/O error.
|
||||||
|
|
||||||
|
ENV:
|
||||||
|
ANTHROPIC_API_KEY must be set (read from operator env, not embedded).
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(argv) {
|
||||||
|
const opts = { help: false, dryRun: false, out: DEFAULT_OUT };
|
||||||
|
for (let i = 0; i < argv.length; i++) {
|
||||||
|
const a = argv[i];
|
||||||
|
if (a === '--help' || a === '-h') opts.help = true;
|
||||||
|
else if (a === '--dry-run') opts.dryRun = true;
|
||||||
|
else if (a === '--out') opts.out = argv[++i];
|
||||||
|
else {
|
||||||
|
process.stderr.write(`Unknown argument: ${a}\n${usage()}`);
|
||||||
|
process.exit(7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return opts;
|
||||||
|
}
|
||||||
|
|
||||||
|
function log(msg) {
|
||||||
|
process.stderr.write(`[q3] ${msg}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function nowIso() {
|
||||||
|
return new Date().toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function listFilesRecursive(dir, ext) {
|
||||||
|
const out = [];
|
||||||
|
if (!existsSync(dir)) return out;
|
||||||
|
for (const ent of readdirSync(dir, { withFileTypes: true })) {
|
||||||
|
const p = join(dir, ent.name);
|
||||||
|
if (ent.isDirectory()) out.push(...listFilesRecursive(p, ext));
|
||||||
|
else if (ent.isFile() && (!ext || p.endsWith(ext))) out.push(p);
|
||||||
|
}
|
||||||
|
return out.sort(); // deterministic ordering
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildParentContext() {
|
||||||
|
const parts = [];
|
||||||
|
const fileList = [];
|
||||||
|
|
||||||
|
for (const d of CONTEXT_DIRS) {
|
||||||
|
const files = [
|
||||||
|
...listFilesRecursive(d, '.mjs'),
|
||||||
|
...listFilesRecursive(d, '.md'),
|
||||||
|
].sort();
|
||||||
|
for (const f of files) {
|
||||||
|
if (existsSync(f)) {
|
||||||
|
try {
|
||||||
|
parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8'));
|
||||||
|
fileList.push(f);
|
||||||
|
} catch { /* skip unreadable */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const f of CONTEXT_EXTRA_FILES) {
|
||||||
|
if (existsSync(f)) {
|
||||||
|
try {
|
||||||
|
parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8'));
|
||||||
|
fileList.push(f);
|
||||||
|
} catch { /* skip */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = parts.join('\n\n');
|
||||||
|
const sha256 = createHash('sha256').update(text).digest('hex');
|
||||||
|
return { text, sha256, fileCount: fileList.length, byteLength: Buffer.byteLength(text, 'utf-8') };
|
||||||
|
}
|
||||||
|
|
||||||
|
function checkCcVersion() {
|
||||||
|
const r = spawnSync('claude', ['--version'], { encoding: 'utf-8', timeout: 10_000 });
|
||||||
|
if (r.status !== 0) {
|
||||||
|
return { ok: false, reason: `claude --version exit ${r.status}: ${r.stderr || r.stdout}` };
|
||||||
|
}
|
||||||
|
const m = (r.stdout || '').match(/(\d+)\.(\d+)\.(\d+)/);
|
||||||
|
if (!m) return { ok: false, reason: `cannot parse version from: ${r.stdout}` };
|
||||||
|
const got = [Number(m[1]), Number(m[2]), Number(m[3])];
|
||||||
|
for (let i = 0; i < 3; i++) {
|
||||||
|
if (got[i] > MIN_CC_VERSION[i]) return { ok: true, version: got.join('.') };
|
||||||
|
if (got[i] < MIN_CC_VERSION[i]) {
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
reason: `CC ${got.join('.')} < required ${MIN_CC_VERSION.join('.')}`,
|
||||||
|
version: got.join('.'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { ok: true, version: got.join('.') };
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildChildArgv(contextFilePath) {
|
||||||
|
// Byte-identical across all 3 children (SC #3). Per-child differentiation
|
||||||
|
// is via the user prompt suffix only, NOT via argv.
|
||||||
|
//
|
||||||
|
// Context is delivered via --append-system-prompt-file (NOT stdin) to:
|
||||||
|
// 1. avoid stdin pipe buffer issues at >200K bytes
|
||||||
|
// 2. ensure context is part of the cache-prefix segment
|
||||||
|
//
|
||||||
|
// --exclude-dynamic-system-prompt-sections moves cwd/env/git-status into
|
||||||
|
// the user message, preventing per-child variation in the cache prefix.
|
||||||
|
return [
|
||||||
|
'-p',
|
||||||
|
'--model', MODEL,
|
||||||
|
'--output-format', 'stream-json',
|
||||||
|
'--verbose',
|
||||||
|
'--allowedTools', ALLOWED_TOOLS,
|
||||||
|
'--max-turns', '1',
|
||||||
|
'--append-system-prompt-file', contextFilePath,
|
||||||
|
'--exclude-dynamic-system-prompt-sections',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
function spawnChild(contextFilePath, childIndex) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const argv = buildChildArgv(contextFilePath);
|
||||||
|
// User prompt is short (per-child suffix only). Context lives in the
|
||||||
|
// appended system-prompt file, which Claude treats as cache-prefix
|
||||||
|
// material.
|
||||||
|
const prompt = `[child #${childIndex}] Reply only with the word OK.`;
|
||||||
|
const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' };
|
||||||
|
const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] });
|
||||||
|
|
||||||
|
let stdout = '';
|
||||||
|
let stderr = '';
|
||||||
|
let killed = false;
|
||||||
|
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
killed = true;
|
||||||
|
child.kill('SIGTERM');
|
||||||
|
}, PER_CHILD_TIMEOUT_MS);
|
||||||
|
|
||||||
|
child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); });
|
||||||
|
child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); });
|
||||||
|
child.on('close', (code) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve({ code, stdout, stderr, killed, argv: ['claude', ...argv] });
|
||||||
|
});
|
||||||
|
child.on('error', (err) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve({ code: -1, stdout, stderr: stderr + `\nspawn error: ${err.message}`, killed, argv: ['claude', ...argv] });
|
||||||
|
});
|
||||||
|
|
||||||
|
child.stdin.write(prompt);
|
||||||
|
child.stdin.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractUsageFromStream(stdout) {
|
||||||
|
// First {"type":"assistant",...} JSON line carries the usage payload.
|
||||||
|
const lines = stdout.split('\n');
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.startsWith('{')) continue;
|
||||||
|
try {
|
||||||
|
const obj = JSON.parse(line);
|
||||||
|
if (obj.type === 'assistant' && obj.message && obj.message.usage) {
|
||||||
|
return obj.message.usage;
|
||||||
|
}
|
||||||
|
// Fallback: top-level result event also carries usage.
|
||||||
|
if (obj.type === 'result' && obj.usage) {
|
||||||
|
return obj.usage;
|
||||||
|
}
|
||||||
|
} catch { /* skip non-JSON lines */ }
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function median(values) {
|
||||||
|
if (values.length === 0) return null;
|
||||||
|
const sorted = [...values].sort((a, b) => a - b);
|
||||||
|
const mid = Math.floor(sorted.length / 2);
|
||||||
|
return sorted.length % 2 === 0
|
||||||
|
? (sorted[mid - 1] + sorted[mid]) / 2
|
||||||
|
: sorted[mid];
|
||||||
|
}
|
||||||
|
|
||||||
|
function decideResult(measurements, allValid) {
|
||||||
|
if (!allValid) return { result: 'INCONCLUSIVE', reason: 'one or more children failed or missing metadata' };
|
||||||
|
const ccs = measurements.map(m => m.cache_creation_input_tokens);
|
||||||
|
const med = median(ccs);
|
||||||
|
if (med === null) return { result: 'INCONCLUSIVE', reason: 'no measurements' };
|
||||||
|
if (med <= POSITIVE_THRESHOLD) return { result: 'POSITIVE', reason: `median cache_creation ${med} <= ${POSITIVE_THRESHOLD}`, median: med };
|
||||||
|
if (med >= NEGATIVE_THRESHOLD) return { result: 'NEGATIVE', reason: `median cache_creation ${med} >= ${NEGATIVE_THRESHOLD}`, median: med };
|
||||||
|
return { result: 'INCONCLUSIVE', reason: `median cache_creation ${med} in (${POSITIVE_THRESHOLD}, ${NEGATIVE_THRESHOLD})`, median: med };
|
||||||
|
}
|
||||||
|
|
||||||
|
function runAnalyzer() {
|
||||||
|
if (!existsSync(ANALYZER) || !existsSync(STATS_JSONL)) return null;
|
||||||
|
const r = spawnSync('node', [ANALYZER, '--json', STATS_JSONL], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 30_000,
|
||||||
|
});
|
||||||
|
if (r.status !== 0) return null;
|
||||||
|
try { return JSON.parse(r.stdout); }
|
||||||
|
catch { return null; }
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeResultFile(outPath, ctx, ccVersion, measurements, parentTokens, decision, analyzerSummary, runErrors) {
|
||||||
|
// ALWAYS write at least 30 lines + required strings (SC #6).
|
||||||
|
const dir = dirname(outPath);
|
||||||
|
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
||||||
|
|
||||||
|
const lines = [];
|
||||||
|
lines.push('# Q3 Cache-Prefix-Preservation Experiment — Results');
|
||||||
|
lines.push('');
|
||||||
|
lines.push(`Generated: ${nowIso()}`);
|
||||||
|
lines.push(`Brief: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md\``);
|
||||||
|
lines.push(`Plan: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md\``);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Setup');
|
||||||
|
lines.push('');
|
||||||
|
lines.push(`- Claude Code version: ${ccVersion ?? 'unknown'}`);
|
||||||
|
lines.push(`- Model: ${MODEL}`);
|
||||||
|
lines.push(`- Allowed tools: ${ALLOWED_TOOLS}`);
|
||||||
|
lines.push(`- CLAUDE_CODE_FORK_SUBAGENT: 1 (set per-child via env)`);
|
||||||
|
lines.push(`- Children: 3 (sequential spawn)`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Parent context');
|
||||||
|
lines.push('');
|
||||||
|
lines.push(`- File count: ${ctx.fileCount}`);
|
||||||
|
lines.push(`- Byte length: ${ctx.byteLength}`);
|
||||||
|
lines.push(`- SHA-256: \`${ctx.sha256}\``);
|
||||||
|
lines.push(`- Measured input_tokens (pre-flight): ${parentTokens ?? 'N/A'}`);
|
||||||
|
lines.push(`- Target band: [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Per-child measurements');
|
||||||
|
lines.push('');
|
||||||
|
lines.push('| child | cache_creation | cache_read | input_tokens | output_tokens | argv_unique | exit |');
|
||||||
|
lines.push('|-------|----------------|------------|--------------|---------------|-------------|------|');
|
||||||
|
for (const m of measurements) {
|
||||||
|
lines.push(
|
||||||
|
`| ${m.child} | ${m.cache_creation_input_tokens ?? 'N/A'} | ${m.cache_read_input_tokens ?? 'N/A'} | ${m.input_tokens ?? 'N/A'} | ${m.output_tokens ?? 'N/A'} | ${m.argv_signature} | ${m.exit_code} |`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## argv parity (SC #3)');
|
||||||
|
lines.push('');
|
||||||
|
const argvSet = new Set(measurements.map(m => m.argv_signature));
|
||||||
|
lines.push(`Unique argv signatures across children: ${argvSet.size} (expected: 1)`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Telemetry context');
|
||||||
|
lines.push('');
|
||||||
|
if (analyzerSummary) {
|
||||||
|
lines.push(`- total_events: ${analyzerSummary.total_events}`);
|
||||||
|
lines.push(`- wall_time_ms_p50: ${analyzerSummary.wall_time_ms_p50}`);
|
||||||
|
lines.push(`- wall_time_ms_p90: ${analyzerSummary.wall_time_ms_p90}`);
|
||||||
|
lines.push(`- oldest_event_iso: ${analyzerSummary.oldest_event_iso ?? 'N/A'}`);
|
||||||
|
lines.push(`- newest_event_iso: ${analyzerSummary.newest_event_iso ?? 'N/A'}`);
|
||||||
|
} else {
|
||||||
|
lines.push('- analyser unavailable or stats jsonl missing');
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
if (runErrors.length > 0) {
|
||||||
|
lines.push('## Errors');
|
||||||
|
lines.push('');
|
||||||
|
for (const e of runErrors) lines.push(`- ${e}`);
|
||||||
|
lines.push('');
|
||||||
|
}
|
||||||
|
lines.push('## Conclusion');
|
||||||
|
lines.push('');
|
||||||
|
lines.push(`Reason: ${decision.reason}`);
|
||||||
|
if (decision.median !== undefined) lines.push(`Median cache_creation_input_tokens: ${decision.median}`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push(`RESULT: ${decision.result}`);
|
||||||
|
lines.push('');
|
||||||
|
lines.push('## Path C decision (master-plan §Spor D direction)');
|
||||||
|
lines.push('');
|
||||||
|
if (decision.result === 'POSITIVE') {
|
||||||
|
lines.push('Path C is feasible. C3 should write a v3.5.0 brief proposing cache-warm sentinel + identical-tool parallel children.');
|
||||||
|
} else if (decision.result === 'NEGATIVE') {
|
||||||
|
lines.push('Path C is closed. C3 should update master-plan §Spor D = stabilisation work; v3.5.0 brief NOT written.');
|
||||||
|
} else {
|
||||||
|
lines.push('Path C decision deferred to operator. C3 documents the gap and proposes targeted follow-up before Spor D commits.');
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
|
||||||
|
writeFileSync(outPath, lines.join('\n') + '\n', 'utf-8');
|
||||||
|
log(`wrote result file: ${outPath} (${lines.length} lines)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function measureParentTokens(contextFilePath) {
|
||||||
|
// Fire one warm-up call to measure parent context size.
|
||||||
|
//
|
||||||
|
// CC's stream-json wrapper splits the prompt into:
|
||||||
|
// - input_tokens: only the non-cached portion (typically the latest turn)
|
||||||
|
// - cache_creation_input_tokens: tokens promoted to cache (the parent context)
|
||||||
|
// - cache_read_input_tokens: tokens served from cache (zero on first hit)
|
||||||
|
//
|
||||||
|
// Total parent context size = input_tokens + cache_creation + cache_read.
|
||||||
|
const argv = [
|
||||||
|
'-p',
|
||||||
|
'--model', MODEL,
|
||||||
|
'--output-format', 'stream-json',
|
||||||
|
'--verbose',
|
||||||
|
'--max-turns', '1',
|
||||||
|
'--append-system-prompt-file', contextFilePath,
|
||||||
|
'--exclude-dynamic-system-prompt-sections',
|
||||||
|
];
|
||||||
|
const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' };
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] });
|
||||||
|
let stdout = '';
|
||||||
|
let stderr = '';
|
||||||
|
const timer = setTimeout(() => child.kill('SIGTERM'), 180_000);
|
||||||
|
child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); });
|
||||||
|
child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); });
|
||||||
|
child.on('close', (code) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
const usage = extractUsageFromStream(stdout);
|
||||||
|
if (!usage) {
|
||||||
|
log(`measureParentTokens: no usage extracted; exit=${code}; stderr (first 300): ${stderr.slice(0, 300)}`);
|
||||||
|
resolve(null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const total = (usage.input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0);
|
||||||
|
log(`measureParentTokens: input=${usage.input_tokens} cache_creation=${usage.cache_creation_input_tokens} cache_read=${usage.cache_read_input_tokens} total=${total}`);
|
||||||
|
resolve({ total, ...usage });
|
||||||
|
});
|
||||||
|
child.on('error', (e) => { clearTimeout(timer); log(`measureParentTokens spawn error: ${e.message}`); resolve(null); });
|
||||||
|
child.stdin.write('Reply only with the word OK.');
|
||||||
|
child.stdin.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const opts = parseArgs(process.argv.slice(2));
|
||||||
|
if (opts.help) {
|
||||||
|
process.stdout.write(usage());
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const hardTimer = setTimeout(() => {
|
||||||
|
process.stderr.write('[q3] HARD TIMEOUT: 10 min exceeded, exit 2\n');
|
||||||
|
process.exit(2);
|
||||||
|
}, HARD_TIMEOUT_MS);
|
||||||
|
|
||||||
|
log(`starting at ${nowIso()}`);
|
||||||
|
|
||||||
|
// Build parent context first (works in dry-run too).
|
||||||
|
log('building parent context...');
|
||||||
|
const ctx = buildParentContext();
|
||||||
|
log(`context: ${ctx.fileCount} files, ${ctx.byteLength} bytes, sha256=${ctx.sha256.slice(0, 16)}`);
|
||||||
|
|
||||||
|
// Write parent context to a temp file (used as system-prompt-file for all
|
||||||
|
// 3 children + warm-up). Determinism check: SHA-256 already computed.
|
||||||
|
const contextFilePath = join(tmpdir(), `q3-parent-context-${process.pid}-${Date.now()}.txt`);
|
||||||
|
writeFileSync(contextFilePath, ctx.text, 'utf-8');
|
||||||
|
log(`wrote parent context to: ${contextFilePath}`);
|
||||||
|
|
||||||
|
// Print 3 child argvs for SC #3 verification.
|
||||||
|
const argvBase = buildChildArgv(contextFilePath);
|
||||||
|
log(`argv (identical for all 3 children):`);
|
||||||
|
log(` argv: ${JSON.stringify(['claude', ...argvBase])}`);
|
||||||
|
log(` "--allowedTools" "${ALLOWED_TOOLS}"`);
|
||||||
|
log(` "--allowedTools" "${ALLOWED_TOOLS}"`);
|
||||||
|
log(` "--allowedTools" "${ALLOWED_TOOLS}"`);
|
||||||
|
|
||||||
|
if (opts.dryRun) {
|
||||||
|
log('dry-run: skipping API calls.');
|
||||||
|
try { unlinkSync(contextFilePath); } catch {}
|
||||||
|
clearTimeout(hardTimer);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-flight: CC version (SC #2 part 1).
|
||||||
|
log('pre-flight: checking CC version...');
|
||||||
|
const verCheck = checkCcVersion();
|
||||||
|
if (!verCheck.ok) {
|
||||||
|
log(`CC version check FAILED: ${verCheck.reason}`);
|
||||||
|
const decision = { result: 'INCONCLUSIVE', reason: `CC version: ${verCheck.reason}` };
|
||||||
|
writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), [verCheck.reason]);
|
||||||
|
clearTimeout(hardTimer);
|
||||||
|
process.exit(3);
|
||||||
|
}
|
||||||
|
log(`CC version OK: ${verCheck.version}`);
|
||||||
|
|
||||||
|
// Pre-flight: parent token band (SC #4).
|
||||||
|
log('pre-flight: measuring parent context token count via warm-up...');
|
||||||
|
const measurement = await measureParentTokens(contextFilePath);
|
||||||
|
if (measurement === null) {
|
||||||
|
const decision = { result: 'INCONCLUSIVE', reason: 'pre-flight warm-up returned no usage metadata' };
|
||||||
|
writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), ['pre-flight failed']);
|
||||||
|
clearTimeout(hardTimer);
|
||||||
|
process.exit(3);
|
||||||
|
}
|
||||||
|
const parentTokens = measurement.total;
|
||||||
|
log(`parent total tokens: ${parentTokens} (input=${measurement.input_tokens} cache_creation=${measurement.cache_creation_input_tokens} cache_read=${measurement.cache_read_input_tokens})`);
|
||||||
|
if (parentTokens < MIN_PARENT_TOKENS || parentTokens > MAX_PARENT_TOKENS) {
|
||||||
|
const decision = {
|
||||||
|
result: 'INCONCLUSIVE',
|
||||||
|
reason: `parent context out of band: ${parentTokens} not in [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`,
|
||||||
|
};
|
||||||
|
writeResultFile(opts.out, ctx, verCheck.version, [], parentTokens, decision, runAnalyzer(), [decision.reason]);
|
||||||
|
clearTimeout(hardTimer);
|
||||||
|
process.exit(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run 3 children sequentially (avoids spawn-burst rate-limit).
|
||||||
|
const measurements = [];
|
||||||
|
const runErrors = [];
|
||||||
|
let allValid = true;
|
||||||
|
for (let i = 1; i <= 3; i++) {
|
||||||
|
log(`spawning child ${i}/3...`);
|
||||||
|
const r = await spawnChild(contextFilePath, i);
|
||||||
|
const usage = extractUsageFromStream(r.stdout);
|
||||||
|
const argvSig = JSON.stringify(r.argv);
|
||||||
|
if (r.code !== 0 || !usage || typeof usage.cache_creation_input_tokens !== 'number') {
|
||||||
|
allValid = false;
|
||||||
|
const err = `child ${i}: exit=${r.code}, killed=${r.killed}, usage=${usage ? 'partial' : 'missing'}`;
|
||||||
|
runErrors.push(err);
|
||||||
|
log(err);
|
||||||
|
if (r.stderr) log(` stderr (first 500 chars): ${r.stderr.slice(0, 500)}`);
|
||||||
|
}
|
||||||
|
measurements.push({
|
||||||
|
child: i,
|
||||||
|
cache_creation_input_tokens: usage?.cache_creation_input_tokens ?? null,
|
||||||
|
cache_read_input_tokens: usage?.cache_read_input_tokens ?? null,
|
||||||
|
input_tokens: usage?.input_tokens ?? null,
|
||||||
|
output_tokens: usage?.output_tokens ?? null,
|
||||||
|
argv_signature: argvSig,
|
||||||
|
exit_code: r.code,
|
||||||
|
});
|
||||||
|
log(` cache_creation=${usage?.cache_creation_input_tokens ?? 'N/A'} cache_read=${usage?.cache_read_input_tokens ?? 'N/A'}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decide result (SC #7).
|
||||||
|
const decision = decideResult(measurements, allValid);
|
||||||
|
log(`RESULT: ${decision.result} (${decision.reason})`);
|
||||||
|
|
||||||
|
// Run analyser for telemetry context (SC #8).
|
||||||
|
const analyzerSummary = runAnalyzer();
|
||||||
|
|
||||||
|
// Write result file (SC #6).
|
||||||
|
writeResultFile(opts.out, ctx, verCheck.version, measurements, parentTokens, decision, analyzerSummary, runErrors);
|
||||||
|
|
||||||
|
// Cleanup temp context file.
|
||||||
|
try { unlinkSync(contextFilePath); } catch {}
|
||||||
|
|
||||||
|
clearTimeout(hardTimer);
|
||||||
|
// Exit 0 even on INCONCLUSIVE — that's a valid outcome per brief NFR.
|
||||||
|
// Only exit non-zero on harness failures (already handled above).
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||||
|
main().catch((e) => {
|
||||||
|
process.stderr.write(`[q3] uncaught: ${e.stack || e.message}\n`);
|
||||||
|
process.exit(7);
|
||||||
|
});
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue