test(ai-psychosis): perf.test.mjs enforces hook timing budget
This commit is contained in:
parent
5eecb968d8
commit
146cf8ba35
1 changed files with 266 additions and 0 deletions
266
plugins/ai-psychosis/tests/perf.test.mjs
Normal file
266
plugins/ai-psychosis/tests/perf.test.mjs
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
// Hook timing budget enforcement.
|
||||
//
|
||||
// Two thresholds are measured per hook:
|
||||
//
|
||||
// - WALL_CLOCK_P95_MS = 200 — total round-trip including Node ESM cold-start.
|
||||
// The cold-start alone is 60-120ms on Intel Mac, so 100ms is unrealistic
|
||||
// for any subprocess-based hook. 200ms gives headroom for shared CI noise.
|
||||
//
|
||||
// - LOGIC_TIME_P95_MS = 50 — pure work (regex evaluation + JSONL/state I/O)
|
||||
// measured by a fixture-runner that imports lib.mjs once and exercises
|
||||
// the hook's hot path inline. This is the meaningful hook-perf assertion;
|
||||
// ESM cold-start is not something the plugin can optimize.
|
||||
//
|
||||
// p95 = the 4th value of 5 sorted iterations. Failing once triggers a single
|
||||
// retry to absorb transient OS noise; a second failure is treated as a real
|
||||
// signal (real perf regression or threshold needs tuning).
|
||||
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { execSync } from 'child_process';
|
||||
import {
|
||||
mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync,
|
||||
unlinkSync, rmSync, appendFileSync,
|
||||
} from 'fs';
|
||||
import { join } from 'path';
|
||||
import { tmpdir } from 'os';
|
||||
import { nowIso, nowEpoch } from '../hooks/scripts/lib.mjs';
|
||||
|
||||
const SCRIPTS_DIR = join(import.meta.dirname, '..', 'hooks', 'scripts');
|
||||
const WALL_CLOCK_P95_MS = 200;
|
||||
const LOGIC_TIME_P95_MS = 50;
|
||||
const ITERATIONS = 5;
|
||||
|
||||
function setupDir() {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'ia-perf-'));
|
||||
mkdirSync(join(dir, 'state'), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
function p95(samples) {
|
||||
return [...samples].sort((a, b) => a - b)[3];
|
||||
}
|
||||
|
||||
// --- Wall-clock measurement (subprocess spawn) ---
|
||||
|
||||
function runWallClock(scriptName, stdinJson, dataDir) {
|
||||
const t0 = performance.now();
|
||||
execSync(`node ${join(SCRIPTS_DIR, scriptName)}`, {
|
||||
input: JSON.stringify(stdinJson),
|
||||
env: { ...process.env, CLAUDE_PLUGIN_DATA: dataDir },
|
||||
encoding: 'utf8',
|
||||
timeout: 5000,
|
||||
});
|
||||
return performance.now() - t0;
|
||||
}
|
||||
|
||||
function measureWallClock(scriptName, stdinTemplate) {
|
||||
const samples = [];
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
const dir = setupDir();
|
||||
try {
|
||||
const sid = `perf-${i}`;
|
||||
// Pre-seed state for hooks that read it (tool-tracker, session-end)
|
||||
writeFileSync(
|
||||
join(dir, 'state', `${sid}.json`),
|
||||
JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 })
|
||||
);
|
||||
samples.push(runWallClock(scriptName, { ...stdinTemplate, session_id: sid }, dir));
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
// --- Logic-time fixtures (no subprocess, single import of lib.mjs) ---
|
||||
//
|
||||
// These mirror each hook's hot path in pure inline code so we can measure
|
||||
// regex + I/O cost without paying the ~80ms ESM cold-start tax. The pattern
|
||||
// list intentionally mirrors the size class of prompt-analyzer's full
|
||||
// pattern set (~25 regexes) so the benchmark stays representative.
|
||||
|
||||
const samplePatterns = [
|
||||
/\bI\s+can'?t\s+do\s+this\s+without\b/i,
|
||||
/\bwhat\s+should\s+I\b/i,
|
||||
/\bI\s+need\s+you\s+to\b/i,
|
||||
/\bonly\s+you\s+understand\b/i,
|
||||
/\b(?:always|never|every|all)\s+the\s+time\b/i,
|
||||
/\bdefinitely\s+(?:should|will|need)\b/i,
|
||||
/\babsolutely\s+(?:right|correct)\b/i,
|
||||
/\bI\s+am\s+(?:tired|exhausted|drained)\b/i,
|
||||
/\blate\s+night\b/i,
|
||||
/\b(?:can'?t|cannot)\s+sleep\b/i,
|
||||
/\bI\s+(?:wish|want)\s+(?:I|you)\s+could\b/i,
|
||||
/\bdo\s+you\s+think\b/i,
|
||||
/\bare\s+you\s+sure\b/i,
|
||||
/\bright\?$/i,
|
||||
/\bagree\?$/i,
|
||||
/\bam\s+I\s+(?:right|wrong)\b/i,
|
||||
/\bplease\s+confirm\b/i,
|
||||
/\bI\s+keep\s+(?:thinking|coming\s+back)\b/i,
|
||||
/\bI\s+(?:can'?t|cannot)\s+stop\b/i,
|
||||
/\bone\s+more\s+(?:thing|question)\b/i,
|
||||
/\bjust\s+one\s+more\b/i,
|
||||
/\bI'?ve\s+been\s+thinking\b/i,
|
||||
/\bwhy\s+did\s+I\b/i,
|
||||
/\bI\s+messed\s+up\b/i,
|
||||
/\bI\s+made\s+a\s+mistake\b/i,
|
||||
];
|
||||
|
||||
function logicSessionStart(dir, sid) {
|
||||
const stateFile = join(dir, 'state', `${sid}.json`);
|
||||
const sessionsLog = join(dir, 'sessions.jsonl');
|
||||
const iso = nowIso();
|
||||
const epoch = nowEpoch();
|
||||
const state = { start_epoch: epoch, start_iso: iso, tool_count: 0, edit_count: 0 };
|
||||
writeFileSync(stateFile, JSON.stringify(state));
|
||||
appendFileSync(
|
||||
sessionsLog,
|
||||
JSON.stringify({ session_id: sid, start: iso, hour: new Date().getUTCHours(), is_late_night: false }) + '\n'
|
||||
);
|
||||
}
|
||||
|
||||
function logicPromptAnalyzer(dir, sid, prompt) {
|
||||
const stateFile = join(dir, 'state', `${sid}.json`);
|
||||
const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {};
|
||||
let depHit = 0, valHit = 0;
|
||||
for (const p of samplePatterns) { if (p.test(prompt)) { valHit = 1; break; } }
|
||||
state.dep_flags = (state.dep_flags || 0) + depHit;
|
||||
state.val_flags = (state.val_flags || 0) + valHit;
|
||||
writeFileSync(stateFile, JSON.stringify(state));
|
||||
}
|
||||
|
||||
function logicToolTracker(dir, sid, toolName) {
|
||||
const stateFile = join(dir, 'state', `${sid}.json`);
|
||||
const eventsLog = join(dir, 'events.jsonl');
|
||||
const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {};
|
||||
state.tool_count = (state.tool_count || 0) + 1;
|
||||
if (toolName === 'Edit' || toolName === 'Write') state.edit_count = (state.edit_count || 0) + 1;
|
||||
appendFileSync(
|
||||
eventsLog,
|
||||
JSON.stringify({ ts: nowIso(), session_id: sid, tool_name: toolName }) + '\n'
|
||||
);
|
||||
writeFileSync(stateFile, JSON.stringify(state));
|
||||
}
|
||||
|
||||
function logicSessionEnd(dir, sid) {
|
||||
const stateFile = join(dir, 'state', `${sid}.json`);
|
||||
const sessionsLog = join(dir, 'sessions.jsonl');
|
||||
if (!existsSync(stateFile)) return;
|
||||
const state = JSON.parse(readFileSync(stateFile, 'utf8'));
|
||||
appendFileSync(
|
||||
sessionsLog,
|
||||
JSON.stringify({
|
||||
session_id: sid,
|
||||
start: state.start_iso,
|
||||
end: nowIso(),
|
||||
duration_min: 0,
|
||||
tool_count: state.tool_count || 0,
|
||||
edit_count: state.edit_count || 0,
|
||||
flags: { dependency: 0, escalation: 0, fatigue: 0, validation: state.val_flags || 0, pushback: 0 },
|
||||
}) + '\n'
|
||||
);
|
||||
unlinkSync(stateFile);
|
||||
}
|
||||
|
||||
function measureLogicTime(fn, ...extraArgs) {
|
||||
const samples = [];
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
const dir = setupDir();
|
||||
const sid = `perf-${i}`;
|
||||
try {
|
||||
writeFileSync(
|
||||
join(dir, 'state', `${sid}.json`),
|
||||
JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 })
|
||||
);
|
||||
const t0 = performance.now();
|
||||
fn(dir, sid, ...extraArgs);
|
||||
samples.push(performance.now() - t0);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
function assertWithRetry(measure, threshold, label) {
|
||||
let samples = measure();
|
||||
let p = p95(samples);
|
||||
if (p > threshold) {
|
||||
samples = measure();
|
||||
p = p95(samples);
|
||||
}
|
||||
assert.ok(
|
||||
p <= threshold,
|
||||
`${label} p95 = ${p.toFixed(1)}ms exceeds ${threshold}ms (samples: ${samples.map(s => s.toFixed(1)).join(', ')})`
|
||||
);
|
||||
}
|
||||
|
||||
// --- Wall-clock tests (4) ---
|
||||
|
||||
test('session-start.mjs wall-clock p95 within 200ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureWallClock('session-start.mjs', { cwd: '/tmp' }),
|
||||
WALL_CLOCK_P95_MS,
|
||||
'session-start wall-clock'
|
||||
);
|
||||
});
|
||||
|
||||
test('prompt-analyzer.mjs wall-clock p95 within 200ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureWallClock('prompt-analyzer.mjs', { prompt: 'are you sure I should do this? right?', cwd: '/tmp' }),
|
||||
WALL_CLOCK_P95_MS,
|
||||
'prompt-analyzer wall-clock'
|
||||
);
|
||||
});
|
||||
|
||||
test('tool-tracker.mjs wall-clock p95 within 200ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureWallClock('tool-tracker.mjs', { tool_name: 'Edit', cwd: '/tmp' }),
|
||||
WALL_CLOCK_P95_MS,
|
||||
'tool-tracker wall-clock'
|
||||
);
|
||||
});
|
||||
|
||||
test('session-end.mjs wall-clock p95 within 200ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureWallClock('session-end.mjs', { cwd: '/tmp' }),
|
||||
WALL_CLOCK_P95_MS,
|
||||
'session-end wall-clock'
|
||||
);
|
||||
});
|
||||
|
||||
// --- Logic-time tests (4) ---
|
||||
|
||||
test('session-start logic-time p95 within 50ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureLogicTime(logicSessionStart),
|
||||
LOGIC_TIME_P95_MS,
|
||||
'session-start logic-time'
|
||||
);
|
||||
});
|
||||
|
||||
test('prompt-analyzer logic-time p95 within 50ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureLogicTime(logicPromptAnalyzer, 'are you sure I should do this? right?'),
|
||||
LOGIC_TIME_P95_MS,
|
||||
'prompt-analyzer logic-time'
|
||||
);
|
||||
});
|
||||
|
||||
test('tool-tracker logic-time p95 within 50ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureLogicTime(logicToolTracker, 'Edit'),
|
||||
LOGIC_TIME_P95_MS,
|
||||
'tool-tracker logic-time'
|
||||
);
|
||||
});
|
||||
|
||||
test('session-end logic-time p95 within 50ms', () => {
|
||||
assertWithRetry(
|
||||
() => measureLogicTime(logicSessionEnd),
|
||||
LOGIC_TIME_P95_MS,
|
||||
'session-end logic-time'
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue