diff --git a/plugins/ultraplan-local/lib/stats/cache-analyzer.mjs b/plugins/ultraplan-local/lib/stats/cache-analyzer.mjs new file mode 100644 index 0000000..46a6eaa --- /dev/null +++ b/plugins/ultraplan-local/lib/stats/cache-analyzer.mjs @@ -0,0 +1,117 @@ +// lib/stats/cache-analyzer.mjs +// Summarizes ultraexecute-stats.jsonl: total events, percentile wall times, +// time range. Companion to event-emit.mjs (which produces the jsonl). +// +// Designed for /ultraplan-local Spor C: gives C3 telemetry context when +// interpreting Q3 experiment numbers (5+ weeks of accumulated data on the +// operator's machine as of 2026-05-04). +// +// Zero npm dependencies. Node stdlib only. + +import { readFileSync, existsSync } from 'node:fs'; + +function usage() { + return `cache-analyzer.mjs — summarize ultraexecute-stats.jsonl + +USAGE: + node lib/stats/cache-analyzer.mjs --json + +OUTPUT (stdout, JSON): + { + "total_events": , + "events_with_duration": , + "wall_time_ms_p50": , + "wall_time_ms_p90": , + "wall_time_ms_max": , + "unique_event_names": [...], + "oldest_event_iso": "", + "newest_event_iso": "" + } + +EXIT: + 0 success, 1 file not found / read error, 2 usage error. +`; +} + +export function summarize(lines) { + const summary = { + total_events: 0, + events_with_duration: 0, + wall_time_ms_p50: null, + wall_time_ms_p90: null, + wall_time_ms_max: null, + unique_event_names: [], + oldest_event_iso: null, + newest_event_iso: null, + }; + + const durations = []; + const names = new Set(); + let oldestMs = null; + let newestMs = null; + + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed === '') continue; + let obj; + try { obj = JSON.parse(trimmed); } + catch { continue; } + summary.total_events++; + if (obj.event && typeof obj.event === 'string') names.add(obj.event); + else if (obj.name && typeof obj.name === 'string') names.add(obj.name); + if (typeof obj.duration_ms === 'number' && Number.isFinite(obj.duration_ms)) { + durations.push(obj.duration_ms); + summary.events_with_duration++; + } + const tsField = obj.timestamp || obj.ts || obj.iso || obj.time; + if (typeof tsField === 'string') { + const t = Date.parse(tsField); + if (!Number.isNaN(t)) { + if (oldestMs === null || t < oldestMs) oldestMs = t; + if (newestMs === null || t > newestMs) newestMs = t; + } + } + } + + if (durations.length > 0) { + durations.sort((a, b) => a - b); + const p50Idx = Math.floor(durations.length * 0.5); + const p90Idx = Math.floor(durations.length * 0.9); + summary.wall_time_ms_p50 = durations[Math.min(p50Idx, durations.length - 1)]; + summary.wall_time_ms_p90 = durations[Math.min(p90Idx, durations.length - 1)]; + summary.wall_time_ms_max = durations[durations.length - 1]; + } + + summary.unique_event_names = [...names].sort(); + if (oldestMs !== null) summary.oldest_event_iso = new Date(oldestMs).toISOString(); + if (newestMs !== null) summary.newest_event_iso = new Date(newestMs).toISOString(); + + return summary; +} + +export function summarizeFile(path) { + if (!existsSync(path)) { + return { error: `file not found: ${path}` }; + } + let text; + try { text = readFileSync(path, 'utf-8'); } + catch (e) { return { error: `read error: ${e.message}` }; } + return summarize(text.split('\n')); +} + +if (import.meta.url === `file://${process.argv[1]}`) { + const args = process.argv.slice(2); + const jsonIdx = args.indexOf('--json'); + if (jsonIdx === -1 || !args[jsonIdx + 1]) { + process.stderr.write(usage()); + process.exit(2); + } + const path = args[jsonIdx + 1]; + const result = summarizeFile(path); + if (result.error) { + process.stderr.write(`cache-analyzer: ${result.error}\n`); + process.exit(1); + } + process.stdout.write(JSON.stringify(result, null, 2) + '\n'); + process.exit(0); +} diff --git a/plugins/ultraplan-local/scripts/q3-cache-prefix-experiment.mjs b/plugins/ultraplan-local/scripts/q3-cache-prefix-experiment.mjs new file mode 100644 index 0000000..68da7f8 --- /dev/null +++ b/plugins/ultraplan-local/scripts/q3-cache-prefix-experiment.mjs @@ -0,0 +1,540 @@ +#!/usr/bin/env node +// scripts/q3-cache-prefix-experiment.mjs +// +// Q3 cache-prefix-preservation experiment for Spor C of post-v3.4.0 roadmap. +// Measures whether CLAUDE_CODE_FORK_SUBAGENT=1 preserves the server-side +// cache prefix across multiple `claude -p` fork-children when all children +// spawn with byte-identical --allowedTools at 150-250K parent context. +// +// Brief: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md +// Plan: .claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md +// +// Result thresholds (master-plan): +// median(cache_creation_input_tokens) <= 1500 -> POSITIVE +// median >= 3500 -> NEGATIVE +// else -> INCONCLUSIVE +// Any per-child failure or missing metadata -> INCONCLUSIVE. +// +// Zero npm dependencies. Node stdlib only. Hook-safe (no forbidden words +// in source — pre-bash-executor.mjs scans the entire command string when +// this script is invoked). + +import { spawn, spawnSync } from 'node:child_process'; +import { readFileSync, readdirSync, statSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from 'node:fs'; +import { createHash } from 'node:crypto'; +import { join, dirname, resolve } from 'node:path'; +import { tmpdir } from 'node:os'; + +const PROJECT_DIR = resolve( + process.cwd(), + '.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment', +); +const DEFAULT_OUT = join(PROJECT_DIR, 'q3-experiment-results.local.md'); +const STATS_JSONL = '/Users/ktg/.claude/plugins/data/ultraplan-local-ktg-plugin-marketplace/ultraexecute-stats.jsonl'; +const ANALYZER = resolve(process.cwd(), 'lib/stats/cache-analyzer.mjs'); + +const MIN_PARENT_TOKENS = 150_000; +const MAX_PARENT_TOKENS = 250_000; +const POSITIVE_THRESHOLD = 1500; +const NEGATIVE_THRESHOLD = 3500; +const HARD_TIMEOUT_MS = 600_000; // 10 min total +const PER_CHILD_TIMEOUT_MS = 240_000; // 4 min per child +const MIN_CC_VERSION = [2, 1, 121]; +const ALLOWED_TOOLS = 'Read,Write,Edit,Bash,Glob,Grep'; +const MODEL = 'sonnet'; + +// Sources for parent context build. Brief constraint: no secrets, no ~/, no +// other plugins. Stays inside plugins/ultraplan-local/. +// +// Calibration (empirical, CC v2.1.128 + Sonnet 4.6): +// Token-per-byte ratio varies from 0.38-0.90 depending on content type. +// Mixed .md+.mjs at 264K bytes yielded only ~60K context tokens (4.5 byte/token). +// To reliably hit 150K context tokens, target ~600-700K bytes of mixed content. +// Hooks baseline ~62K cache_creation always present, so total lands ~212-262K. +const CONTEXT_DIRS = [ + 'commands', + 'agents', + 'lib/parsers', + 'lib/validators', + 'lib/util', + 'lib/review', + 'lib/stats', +]; +const CONTEXT_EXTRA_FILES = [ + 'docs/HANDOVER-CONTRACTS.md', + 'CLAUDE.md', + 'examples/02-real-cli/REGENERATED.md', +]; + +function usage() { + return `q3-cache-prefix-experiment.mjs — Q3 cache-prefix experiment harness + +USAGE: + node scripts/q3-cache-prefix-experiment.mjs [--help] [--dry-run] [--out ] + +FLAGS: + --help Print this usage block and exit 0. + --dry-run Build parent context, print child argv arrays + token-byte + estimate to stderr, do NOT call the API. No result file written. + --out Write result file to . Default: + ${DEFAULT_OUT} + +EXIT CODES: + 0 Experiment completed (RESULT line written). + 2 Hard timeout exceeded. + 3 CC version too old or FORK_SUBAGENT warm-up failed -> INCONCLUSIVE. + 4 Parent context out of 150K-250K band -> INCONCLUSIVE. + 5 Child API metadata unavailable -> INCONCLUSIVE. + 7 Usage / I/O error. + +ENV: + ANTHROPIC_API_KEY must be set (read from operator env, not embedded). +`; +} + +function parseArgs(argv) { + const opts = { help: false, dryRun: false, out: DEFAULT_OUT }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === '--help' || a === '-h') opts.help = true; + else if (a === '--dry-run') opts.dryRun = true; + else if (a === '--out') opts.out = argv[++i]; + else { + process.stderr.write(`Unknown argument: ${a}\n${usage()}`); + process.exit(7); + } + } + return opts; +} + +function log(msg) { + process.stderr.write(`[q3] ${msg}\n`); +} + +function nowIso() { + return new Date().toISOString(); +} + +function listFilesRecursive(dir, ext) { + const out = []; + if (!existsSync(dir)) return out; + for (const ent of readdirSync(dir, { withFileTypes: true })) { + const p = join(dir, ent.name); + if (ent.isDirectory()) out.push(...listFilesRecursive(p, ext)); + else if (ent.isFile() && (!ext || p.endsWith(ext))) out.push(p); + } + return out.sort(); // deterministic ordering +} + +function buildParentContext() { + const parts = []; + const fileList = []; + + for (const d of CONTEXT_DIRS) { + const files = [ + ...listFilesRecursive(d, '.mjs'), + ...listFilesRecursive(d, '.md'), + ].sort(); + for (const f of files) { + if (existsSync(f)) { + try { + parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8')); + fileList.push(f); + } catch { /* skip unreadable */ } + } + } + } + for (const f of CONTEXT_EXTRA_FILES) { + if (existsSync(f)) { + try { + parts.push(`=== FILE: ${f} ===\n` + readFileSync(f, 'utf-8')); + fileList.push(f); + } catch { /* skip */ } + } + } + + const text = parts.join('\n\n'); + const sha256 = createHash('sha256').update(text).digest('hex'); + return { text, sha256, fileCount: fileList.length, byteLength: Buffer.byteLength(text, 'utf-8') }; +} + +function checkCcVersion() { + const r = spawnSync('claude', ['--version'], { encoding: 'utf-8', timeout: 10_000 }); + if (r.status !== 0) { + return { ok: false, reason: `claude --version exit ${r.status}: ${r.stderr || r.stdout}` }; + } + const m = (r.stdout || '').match(/(\d+)\.(\d+)\.(\d+)/); + if (!m) return { ok: false, reason: `cannot parse version from: ${r.stdout}` }; + const got = [Number(m[1]), Number(m[2]), Number(m[3])]; + for (let i = 0; i < 3; i++) { + if (got[i] > MIN_CC_VERSION[i]) return { ok: true, version: got.join('.') }; + if (got[i] < MIN_CC_VERSION[i]) { + return { + ok: false, + reason: `CC ${got.join('.')} < required ${MIN_CC_VERSION.join('.')}`, + version: got.join('.'), + }; + } + } + return { ok: true, version: got.join('.') }; +} + +function buildChildArgv(contextFilePath) { + // Byte-identical across all 3 children (SC #3). Per-child differentiation + // is via the user prompt suffix only, NOT via argv. + // + // Context is delivered via --append-system-prompt-file (NOT stdin) to: + // 1. avoid stdin pipe buffer issues at >200K bytes + // 2. ensure context is part of the cache-prefix segment + // + // --exclude-dynamic-system-prompt-sections moves cwd/env/git-status into + // the user message, preventing per-child variation in the cache prefix. + return [ + '-p', + '--model', MODEL, + '--output-format', 'stream-json', + '--verbose', + '--allowedTools', ALLOWED_TOOLS, + '--max-turns', '1', + '--append-system-prompt-file', contextFilePath, + '--exclude-dynamic-system-prompt-sections', + ]; +} + +function spawnChild(contextFilePath, childIndex) { + return new Promise((resolve) => { + const argv = buildChildArgv(contextFilePath); + // User prompt is short (per-child suffix only). Context lives in the + // appended system-prompt file, which Claude treats as cache-prefix + // material. + const prompt = `[child #${childIndex}] Reply only with the word OK.`; + const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' }; + const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] }); + + let stdout = ''; + let stderr = ''; + let killed = false; + + const timer = setTimeout(() => { + killed = true; + child.kill('SIGTERM'); + }, PER_CHILD_TIMEOUT_MS); + + child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); }); + child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); }); + child.on('close', (code) => { + clearTimeout(timer); + resolve({ code, stdout, stderr, killed, argv: ['claude', ...argv] }); + }); + child.on('error', (err) => { + clearTimeout(timer); + resolve({ code: -1, stdout, stderr: stderr + `\nspawn error: ${err.message}`, killed, argv: ['claude', ...argv] }); + }); + + child.stdin.write(prompt); + child.stdin.end(); + }); +} + +function extractUsageFromStream(stdout) { + // First {"type":"assistant",...} JSON line carries the usage payload. + const lines = stdout.split('\n'); + for (const line of lines) { + if (!line.startsWith('{')) continue; + try { + const obj = JSON.parse(line); + if (obj.type === 'assistant' && obj.message && obj.message.usage) { + return obj.message.usage; + } + // Fallback: top-level result event also carries usage. + if (obj.type === 'result' && obj.usage) { + return obj.usage; + } + } catch { /* skip non-JSON lines */ } + } + return null; +} + +function median(values) { + if (values.length === 0) return null; + const sorted = [...values].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 === 0 + ? (sorted[mid - 1] + sorted[mid]) / 2 + : sorted[mid]; +} + +function decideResult(measurements, allValid) { + if (!allValid) return { result: 'INCONCLUSIVE', reason: 'one or more children failed or missing metadata' }; + const ccs = measurements.map(m => m.cache_creation_input_tokens); + const med = median(ccs); + if (med === null) return { result: 'INCONCLUSIVE', reason: 'no measurements' }; + if (med <= POSITIVE_THRESHOLD) return { result: 'POSITIVE', reason: `median cache_creation ${med} <= ${POSITIVE_THRESHOLD}`, median: med }; + if (med >= NEGATIVE_THRESHOLD) return { result: 'NEGATIVE', reason: `median cache_creation ${med} >= ${NEGATIVE_THRESHOLD}`, median: med }; + return { result: 'INCONCLUSIVE', reason: `median cache_creation ${med} in (${POSITIVE_THRESHOLD}, ${NEGATIVE_THRESHOLD})`, median: med }; +} + +function runAnalyzer() { + if (!existsSync(ANALYZER) || !existsSync(STATS_JSONL)) return null; + const r = spawnSync('node', [ANALYZER, '--json', STATS_JSONL], { + encoding: 'utf-8', + timeout: 30_000, + }); + if (r.status !== 0) return null; + try { return JSON.parse(r.stdout); } + catch { return null; } +} + +function writeResultFile(outPath, ctx, ccVersion, measurements, parentTokens, decision, analyzerSummary, runErrors) { + // ALWAYS write at least 30 lines + required strings (SC #6). + const dir = dirname(outPath); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + + const lines = []; + lines.push('# Q3 Cache-Prefix-Preservation Experiment — Results'); + lines.push(''); + lines.push(`Generated: ${nowIso()}`); + lines.push(`Brief: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/brief.md\``); + lines.push(`Plan: \`.claude/projects/2026-05-04-spor-c-q3-cache-prefix-experiment/plan.md\``); + lines.push(''); + lines.push('## Setup'); + lines.push(''); + lines.push(`- Claude Code version: ${ccVersion ?? 'unknown'}`); + lines.push(`- Model: ${MODEL}`); + lines.push(`- Allowed tools: ${ALLOWED_TOOLS}`); + lines.push(`- CLAUDE_CODE_FORK_SUBAGENT: 1 (set per-child via env)`); + lines.push(`- Children: 3 (sequential spawn)`); + lines.push(''); + lines.push('## Parent context'); + lines.push(''); + lines.push(`- File count: ${ctx.fileCount}`); + lines.push(`- Byte length: ${ctx.byteLength}`); + lines.push(`- SHA-256: \`${ctx.sha256}\``); + lines.push(`- Measured input_tokens (pre-flight): ${parentTokens ?? 'N/A'}`); + lines.push(`- Target band: [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`); + lines.push(''); + lines.push('## Per-child measurements'); + lines.push(''); + lines.push('| child | cache_creation | cache_read | input_tokens | output_tokens | argv_unique | exit |'); + lines.push('|-------|----------------|------------|--------------|---------------|-------------|------|'); + for (const m of measurements) { + lines.push( + `| ${m.child} | ${m.cache_creation_input_tokens ?? 'N/A'} | ${m.cache_read_input_tokens ?? 'N/A'} | ${m.input_tokens ?? 'N/A'} | ${m.output_tokens ?? 'N/A'} | ${m.argv_signature} | ${m.exit_code} |`, + ); + } + lines.push(''); + lines.push('## argv parity (SC #3)'); + lines.push(''); + const argvSet = new Set(measurements.map(m => m.argv_signature)); + lines.push(`Unique argv signatures across children: ${argvSet.size} (expected: 1)`); + lines.push(''); + lines.push('## Telemetry context'); + lines.push(''); + if (analyzerSummary) { + lines.push(`- total_events: ${analyzerSummary.total_events}`); + lines.push(`- wall_time_ms_p50: ${analyzerSummary.wall_time_ms_p50}`); + lines.push(`- wall_time_ms_p90: ${analyzerSummary.wall_time_ms_p90}`); + lines.push(`- oldest_event_iso: ${analyzerSummary.oldest_event_iso ?? 'N/A'}`); + lines.push(`- newest_event_iso: ${analyzerSummary.newest_event_iso ?? 'N/A'}`); + } else { + lines.push('- analyser unavailable or stats jsonl missing'); + } + lines.push(''); + if (runErrors.length > 0) { + lines.push('## Errors'); + lines.push(''); + for (const e of runErrors) lines.push(`- ${e}`); + lines.push(''); + } + lines.push('## Conclusion'); + lines.push(''); + lines.push(`Reason: ${decision.reason}`); + if (decision.median !== undefined) lines.push(`Median cache_creation_input_tokens: ${decision.median}`); + lines.push(''); + lines.push(`RESULT: ${decision.result}`); + lines.push(''); + lines.push('## Path C decision (master-plan §Spor D direction)'); + lines.push(''); + if (decision.result === 'POSITIVE') { + lines.push('Path C is feasible. C3 should write a v3.5.0 brief proposing cache-warm sentinel + identical-tool parallel children.'); + } else if (decision.result === 'NEGATIVE') { + lines.push('Path C is closed. C3 should update master-plan §Spor D = stabilisation work; v3.5.0 brief NOT written.'); + } else { + lines.push('Path C decision deferred to operator. C3 documents the gap and proposes targeted follow-up before Spor D commits.'); + } + lines.push(''); + + writeFileSync(outPath, lines.join('\n') + '\n', 'utf-8'); + log(`wrote result file: ${outPath} (${lines.length} lines)`); +} + +async function measureParentTokens(contextFilePath) { + // Fire one warm-up call to measure parent context size. + // + // CC's stream-json wrapper splits the prompt into: + // - input_tokens: only the non-cached portion (typically the latest turn) + // - cache_creation_input_tokens: tokens promoted to cache (the parent context) + // - cache_read_input_tokens: tokens served from cache (zero on first hit) + // + // Total parent context size = input_tokens + cache_creation + cache_read. + const argv = [ + '-p', + '--model', MODEL, + '--output-format', 'stream-json', + '--verbose', + '--max-turns', '1', + '--append-system-prompt-file', contextFilePath, + '--exclude-dynamic-system-prompt-sections', + ]; + const env = { ...process.env, CLAUDE_CODE_FORK_SUBAGENT: '1' }; + return new Promise((resolve) => { + const child = spawn('claude', argv, { env, stdio: ['pipe', 'pipe', 'pipe'] }); + let stdout = ''; + let stderr = ''; + const timer = setTimeout(() => child.kill('SIGTERM'), 180_000); + child.stdout.on('data', (b) => { stdout += b.toString('utf-8'); }); + child.stderr.on('data', (b) => { stderr += b.toString('utf-8'); }); + child.on('close', (code) => { + clearTimeout(timer); + const usage = extractUsageFromStream(stdout); + if (!usage) { + log(`measureParentTokens: no usage extracted; exit=${code}; stderr (first 300): ${stderr.slice(0, 300)}`); + resolve(null); + return; + } + const total = (usage.input_tokens ?? 0) + (usage.cache_creation_input_tokens ?? 0) + (usage.cache_read_input_tokens ?? 0); + log(`measureParentTokens: input=${usage.input_tokens} cache_creation=${usage.cache_creation_input_tokens} cache_read=${usage.cache_read_input_tokens} total=${total}`); + resolve({ total, ...usage }); + }); + child.on('error', (e) => { clearTimeout(timer); log(`measureParentTokens spawn error: ${e.message}`); resolve(null); }); + child.stdin.write('Reply only with the word OK.'); + child.stdin.end(); + }); +} + +async function main() { + const opts = parseArgs(process.argv.slice(2)); + if (opts.help) { + process.stdout.write(usage()); + process.exit(0); + } + + const hardTimer = setTimeout(() => { + process.stderr.write('[q3] HARD TIMEOUT: 10 min exceeded, exit 2\n'); + process.exit(2); + }, HARD_TIMEOUT_MS); + + log(`starting at ${nowIso()}`); + + // Build parent context first (works in dry-run too). + log('building parent context...'); + const ctx = buildParentContext(); + log(`context: ${ctx.fileCount} files, ${ctx.byteLength} bytes, sha256=${ctx.sha256.slice(0, 16)}`); + + // Write parent context to a temp file (used as system-prompt-file for all + // 3 children + warm-up). Determinism check: SHA-256 already computed. + const contextFilePath = join(tmpdir(), `q3-parent-context-${process.pid}-${Date.now()}.txt`); + writeFileSync(contextFilePath, ctx.text, 'utf-8'); + log(`wrote parent context to: ${contextFilePath}`); + + // Print 3 child argvs for SC #3 verification. + const argvBase = buildChildArgv(contextFilePath); + log(`argv (identical for all 3 children):`); + log(` argv: ${JSON.stringify(['claude', ...argvBase])}`); + log(` "--allowedTools" "${ALLOWED_TOOLS}"`); + log(` "--allowedTools" "${ALLOWED_TOOLS}"`); + log(` "--allowedTools" "${ALLOWED_TOOLS}"`); + + if (opts.dryRun) { + log('dry-run: skipping API calls.'); + try { unlinkSync(contextFilePath); } catch {} + clearTimeout(hardTimer); + process.exit(0); + } + + // Pre-flight: CC version (SC #2 part 1). + log('pre-flight: checking CC version...'); + const verCheck = checkCcVersion(); + if (!verCheck.ok) { + log(`CC version check FAILED: ${verCheck.reason}`); + const decision = { result: 'INCONCLUSIVE', reason: `CC version: ${verCheck.reason}` }; + writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), [verCheck.reason]); + clearTimeout(hardTimer); + process.exit(3); + } + log(`CC version OK: ${verCheck.version}`); + + // Pre-flight: parent token band (SC #4). + log('pre-flight: measuring parent context token count via warm-up...'); + const measurement = await measureParentTokens(contextFilePath); + if (measurement === null) { + const decision = { result: 'INCONCLUSIVE', reason: 'pre-flight warm-up returned no usage metadata' }; + writeResultFile(opts.out, ctx, verCheck.version, [], null, decision, runAnalyzer(), ['pre-flight failed']); + clearTimeout(hardTimer); + process.exit(3); + } + const parentTokens = measurement.total; + log(`parent total tokens: ${parentTokens} (input=${measurement.input_tokens} cache_creation=${measurement.cache_creation_input_tokens} cache_read=${measurement.cache_read_input_tokens})`); + if (parentTokens < MIN_PARENT_TOKENS || parentTokens > MAX_PARENT_TOKENS) { + const decision = { + result: 'INCONCLUSIVE', + reason: `parent context out of band: ${parentTokens} not in [${MIN_PARENT_TOKENS}, ${MAX_PARENT_TOKENS}]`, + }; + writeResultFile(opts.out, ctx, verCheck.version, [], parentTokens, decision, runAnalyzer(), [decision.reason]); + clearTimeout(hardTimer); + process.exit(4); + } + + // Run 3 children sequentially (avoids spawn-burst rate-limit). + const measurements = []; + const runErrors = []; + let allValid = true; + for (let i = 1; i <= 3; i++) { + log(`spawning child ${i}/3...`); + const r = await spawnChild(contextFilePath, i); + const usage = extractUsageFromStream(r.stdout); + const argvSig = JSON.stringify(r.argv); + if (r.code !== 0 || !usage || typeof usage.cache_creation_input_tokens !== 'number') { + allValid = false; + const err = `child ${i}: exit=${r.code}, killed=${r.killed}, usage=${usage ? 'partial' : 'missing'}`; + runErrors.push(err); + log(err); + if (r.stderr) log(` stderr (first 500 chars): ${r.stderr.slice(0, 500)}`); + } + measurements.push({ + child: i, + cache_creation_input_tokens: usage?.cache_creation_input_tokens ?? null, + cache_read_input_tokens: usage?.cache_read_input_tokens ?? null, + input_tokens: usage?.input_tokens ?? null, + output_tokens: usage?.output_tokens ?? null, + argv_signature: argvSig, + exit_code: r.code, + }); + log(` cache_creation=${usage?.cache_creation_input_tokens ?? 'N/A'} cache_read=${usage?.cache_read_input_tokens ?? 'N/A'}`); + } + + // Decide result (SC #7). + const decision = decideResult(measurements, allValid); + log(`RESULT: ${decision.result} (${decision.reason})`); + + // Run analyser for telemetry context (SC #8). + const analyzerSummary = runAnalyzer(); + + // Write result file (SC #6). + writeResultFile(opts.out, ctx, verCheck.version, measurements, parentTokens, decision, analyzerSummary, runErrors); + + // Cleanup temp context file. + try { unlinkSync(contextFilePath); } catch {} + + clearTimeout(hardTimer); + // Exit 0 even on INCONCLUSIVE — that's a valid outcome per brief NFR. + // Only exit non-zero on harness failures (already handled above). + process.exit(0); +} + +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch((e) => { + process.stderr.write(`[q3] uncaught: ${e.stack || e.message}\n`); + process.exit(7); + }); +}