From 146cf8ba35ebf578ba2f27101b7f92ffc694405a Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 17:56:03 +0200 Subject: [PATCH] test(ai-psychosis): perf.test.mjs enforces hook timing budget --- plugins/ai-psychosis/tests/perf.test.mjs | 266 +++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 plugins/ai-psychosis/tests/perf.test.mjs diff --git a/plugins/ai-psychosis/tests/perf.test.mjs b/plugins/ai-psychosis/tests/perf.test.mjs new file mode 100644 index 0000000..b4f0003 --- /dev/null +++ b/plugins/ai-psychosis/tests/perf.test.mjs @@ -0,0 +1,266 @@ +// Hook timing budget enforcement. +// +// Two thresholds are measured per hook: +// +// - WALL_CLOCK_P95_MS = 200 — total round-trip including Node ESM cold-start. +// The cold-start alone is 60-120ms on Intel Mac, so 100ms is unrealistic +// for any subprocess-based hook. 200ms gives headroom for shared CI noise. +// +// - LOGIC_TIME_P95_MS = 50 — pure work (regex evaluation + JSONL/state I/O) +// measured by a fixture-runner that imports lib.mjs once and exercises +// the hook's hot path inline. This is the meaningful hook-perf assertion; +// ESM cold-start is not something the plugin can optimize. +// +// p95 = the 4th value of 5 sorted iterations. Failing once triggers a single +// retry to absorb transient OS noise; a second failure is treated as a real +// signal (real perf regression or threshold needs tuning). + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { execSync } from 'child_process'; +import { + mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync, + unlinkSync, rmSync, appendFileSync, +} from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { nowIso, nowEpoch } from '../hooks/scripts/lib.mjs'; + +const SCRIPTS_DIR = join(import.meta.dirname, '..', 'hooks', 'scripts'); +const WALL_CLOCK_P95_MS = 200; +const LOGIC_TIME_P95_MS = 50; +const ITERATIONS = 5; + +function setupDir() { + const dir = mkdtempSync(join(tmpdir(), 'ia-perf-')); + mkdirSync(join(dir, 'state'), { recursive: true }); + return dir; +} + +function p95(samples) { + return [...samples].sort((a, b) => a - b)[3]; +} + +// --- Wall-clock measurement (subprocess spawn) --- + +function runWallClock(scriptName, stdinJson, dataDir) { + const t0 = performance.now(); + execSync(`node ${join(SCRIPTS_DIR, scriptName)}`, { + input: JSON.stringify(stdinJson), + env: { ...process.env, CLAUDE_PLUGIN_DATA: dataDir }, + encoding: 'utf8', + timeout: 5000, + }); + return performance.now() - t0; +} + +function measureWallClock(scriptName, stdinTemplate) { + const samples = []; + for (let i = 0; i < ITERATIONS; i++) { + const dir = setupDir(); + try { + const sid = `perf-${i}`; + // Pre-seed state for hooks that read it (tool-tracker, session-end) + writeFileSync( + join(dir, 'state', `${sid}.json`), + JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 }) + ); + samples.push(runWallClock(scriptName, { ...stdinTemplate, session_id: sid }, dir)); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + } + return samples; +} + +// --- Logic-time fixtures (no subprocess, single import of lib.mjs) --- +// +// These mirror each hook's hot path in pure inline code so we can measure +// regex + I/O cost without paying the ~80ms ESM cold-start tax. The pattern +// list intentionally mirrors the size class of prompt-analyzer's full +// pattern set (~25 regexes) so the benchmark stays representative. + +const samplePatterns = [ + /\bI\s+can'?t\s+do\s+this\s+without\b/i, + /\bwhat\s+should\s+I\b/i, + /\bI\s+need\s+you\s+to\b/i, + /\bonly\s+you\s+understand\b/i, + /\b(?:always|never|every|all)\s+the\s+time\b/i, + /\bdefinitely\s+(?:should|will|need)\b/i, + /\babsolutely\s+(?:right|correct)\b/i, + /\bI\s+am\s+(?:tired|exhausted|drained)\b/i, + /\blate\s+night\b/i, + /\b(?:can'?t|cannot)\s+sleep\b/i, + /\bI\s+(?:wish|want)\s+(?:I|you)\s+could\b/i, + /\bdo\s+you\s+think\b/i, + /\bare\s+you\s+sure\b/i, + /\bright\?$/i, + /\bagree\?$/i, + /\bam\s+I\s+(?:right|wrong)\b/i, + /\bplease\s+confirm\b/i, + /\bI\s+keep\s+(?:thinking|coming\s+back)\b/i, + /\bI\s+(?:can'?t|cannot)\s+stop\b/i, + /\bone\s+more\s+(?:thing|question)\b/i, + /\bjust\s+one\s+more\b/i, + /\bI'?ve\s+been\s+thinking\b/i, + /\bwhy\s+did\s+I\b/i, + /\bI\s+messed\s+up\b/i, + /\bI\s+made\s+a\s+mistake\b/i, +]; + +function logicSessionStart(dir, sid) { + const stateFile = join(dir, 'state', `${sid}.json`); + const sessionsLog = join(dir, 'sessions.jsonl'); + const iso = nowIso(); + const epoch = nowEpoch(); + const state = { start_epoch: epoch, start_iso: iso, tool_count: 0, edit_count: 0 }; + writeFileSync(stateFile, JSON.stringify(state)); + appendFileSync( + sessionsLog, + JSON.stringify({ session_id: sid, start: iso, hour: new Date().getUTCHours(), is_late_night: false }) + '\n' + ); +} + +function logicPromptAnalyzer(dir, sid, prompt) { + const stateFile = join(dir, 'state', `${sid}.json`); + const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {}; + let depHit = 0, valHit = 0; + for (const p of samplePatterns) { if (p.test(prompt)) { valHit = 1; break; } } + state.dep_flags = (state.dep_flags || 0) + depHit; + state.val_flags = (state.val_flags || 0) + valHit; + writeFileSync(stateFile, JSON.stringify(state)); +} + +function logicToolTracker(dir, sid, toolName) { + const stateFile = join(dir, 'state', `${sid}.json`); + const eventsLog = join(dir, 'events.jsonl'); + const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {}; + state.tool_count = (state.tool_count || 0) + 1; + if (toolName === 'Edit' || toolName === 'Write') state.edit_count = (state.edit_count || 0) + 1; + appendFileSync( + eventsLog, + JSON.stringify({ ts: nowIso(), session_id: sid, tool_name: toolName }) + '\n' + ); + writeFileSync(stateFile, JSON.stringify(state)); +} + +function logicSessionEnd(dir, sid) { + const stateFile = join(dir, 'state', `${sid}.json`); + const sessionsLog = join(dir, 'sessions.jsonl'); + if (!existsSync(stateFile)) return; + const state = JSON.parse(readFileSync(stateFile, 'utf8')); + appendFileSync( + sessionsLog, + JSON.stringify({ + session_id: sid, + start: state.start_iso, + end: nowIso(), + duration_min: 0, + tool_count: state.tool_count || 0, + edit_count: state.edit_count || 0, + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: state.val_flags || 0, pushback: 0 }, + }) + '\n' + ); + unlinkSync(stateFile); +} + +function measureLogicTime(fn, ...extraArgs) { + const samples = []; + for (let i = 0; i < ITERATIONS; i++) { + const dir = setupDir(); + const sid = `perf-${i}`; + try { + writeFileSync( + join(dir, 'state', `${sid}.json`), + JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 }) + ); + const t0 = performance.now(); + fn(dir, sid, ...extraArgs); + samples.push(performance.now() - t0); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + } + return samples; +} + +function assertWithRetry(measure, threshold, label) { + let samples = measure(); + let p = p95(samples); + if (p > threshold) { + samples = measure(); + p = p95(samples); + } + assert.ok( + p <= threshold, + `${label} p95 = ${p.toFixed(1)}ms exceeds ${threshold}ms (samples: ${samples.map(s => s.toFixed(1)).join(', ')})` + ); +} + +// --- Wall-clock tests (4) --- + +test('session-start.mjs wall-clock p95 within 200ms', () => { + assertWithRetry( + () => measureWallClock('session-start.mjs', { cwd: '/tmp' }), + WALL_CLOCK_P95_MS, + 'session-start wall-clock' + ); +}); + +test('prompt-analyzer.mjs wall-clock p95 within 200ms', () => { + assertWithRetry( + () => measureWallClock('prompt-analyzer.mjs', { prompt: 'are you sure I should do this? right?', cwd: '/tmp' }), + WALL_CLOCK_P95_MS, + 'prompt-analyzer wall-clock' + ); +}); + +test('tool-tracker.mjs wall-clock p95 within 200ms', () => { + assertWithRetry( + () => measureWallClock('tool-tracker.mjs', { tool_name: 'Edit', cwd: '/tmp' }), + WALL_CLOCK_P95_MS, + 'tool-tracker wall-clock' + ); +}); + +test('session-end.mjs wall-clock p95 within 200ms', () => { + assertWithRetry( + () => measureWallClock('session-end.mjs', { cwd: '/tmp' }), + WALL_CLOCK_P95_MS, + 'session-end wall-clock' + ); +}); + +// --- Logic-time tests (4) --- + +test('session-start logic-time p95 within 50ms', () => { + assertWithRetry( + () => measureLogicTime(logicSessionStart), + LOGIC_TIME_P95_MS, + 'session-start logic-time' + ); +}); + +test('prompt-analyzer logic-time p95 within 50ms', () => { + assertWithRetry( + () => measureLogicTime(logicPromptAnalyzer, 'are you sure I should do this? right?'), + LOGIC_TIME_P95_MS, + 'prompt-analyzer logic-time' + ); +}); + +test('tool-tracker logic-time p95 within 50ms', () => { + assertWithRetry( + () => measureLogicTime(logicToolTracker, 'Edit'), + LOGIC_TIME_P95_MS, + 'tool-tracker logic-time' + ); +}); + +test('session-end logic-time p95 within 50ms', () => { + assertWithRetry( + () => measureLogicTime(logicSessionEnd), + LOGIC_TIME_P95_MS, + 'session-end logic-time' + ); +});