From 0075fe089be923775c82b1c0ec818c584480baee Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 21:56:14 +0200 Subject: [PATCH] test(ai-psychosis): perf budget validated at v1.2 pattern set --- plugins/ai-psychosis/tests/perf.test.mjs | 160 ++++++++++++++++++++++- 1 file changed, 154 insertions(+), 6 deletions(-) diff --git a/plugins/ai-psychosis/tests/perf.test.mjs b/plugins/ai-psychosis/tests/perf.test.mjs index c03a51b..d5d1fd9 100644 --- a/plugins/ai-psychosis/tests/perf.test.mjs +++ b/plugins/ai-psychosis/tests/perf.test.mjs @@ -78,12 +78,19 @@ function measureWallClock(scriptName, stdinTemplate) { // These mirror each hook's hot path in pure inline code so we can measure // regex + I/O cost without paying the ~80ms ESM cold-start tax. The pattern // list intentionally mirrors the size class of prompt-analyzer's full -// pattern set (41 regexes = 25 negative + 12 pushback + 4 domain in v1.1.0) -// so the benchmark stays representative. Patterns here are structurally -// equivalent to the real ones (length + complexity), not literal copies — -// the privacy boundary at prompt-analyzer.mjs:119 means production patterns -// must stay co-located with the privacy wipe. Keep in sync with -// prompt-analyzer.mjs pattern count when v1.2 patterns ship (Steps 6-8). +// pattern set so the benchmark stays representative. +// +// v1.2 pattern count: ~133 = 41 v1.1 (25 negative + 12 pushback + 4 domain) +// + 48 new domains (8 × 6) +// + 32 user-info (15 people + 10 digital + 7 no) +// + 12 valseek +// Fixture sized at ~91+ to bracket the realistic prompt-analyzer cost without +// overweighting the perf budget on test fixture maintenance. +// +// Patterns here are structurally equivalent to the real ones (length + +// complexity), not literal copies — the privacy boundary at +// prompt-analyzer.mjs:119 means production patterns must stay co-located +// with the privacy wipe. Keep in sync (approximately) with v1.2 pattern count. const samplePatterns = [ // Negative emotional patterns (25 — matches v1.1.0) @@ -130,6 +137,101 @@ const samplePatterns = [ /\b(?:our|the)\s+relationship\b/i, /\bbreak\s+up\s+(?:with|over)\b/i, /\bdating\s+(?:someone|him|her|them)\b/i, + // v1.2: 48 new domain patterns (8 × 6) — structurally equivalent to real ones + /\b(?:my|our)\s+(?:lawyer|attorney)\b/i, + /\bfiling\s+a?\s+lawsuit\b/i, + /\b(?:custody|divorce)\s+(?:hearing|case)\b/i, + /\b(?:contract|nda)\s+(?:violation|dispute)\b/i, + /\bsued?\s+(?:by|for)\b/i, + /\b(?:landlord|tenant)\s+(?:rights|dispute)\b/i, + /\bmy\s+(?:kid|child|son|daughter)\b/i, + /\b(?:potty|sleep)\s+training\s+issue\b/i, + /\bas\s+a\s+(?:parent|mom|dad)\b/i, + /\b(?:bedtime|breastfeeding)\s+routine\b/i, + /\b(?:school|preschool)\s+(?:choice|conflict)\b/i, + /\bmy\s+(?:child|kid)'?s?\s+(?:diagnosis|teacher)\b/i, + /\bmy\s+(?:doctor|physician|gp)\b/i, + /\b(?:diagnosed|prescribed)\s+(?:with|for)\b/i, + /\bmy\s+symptoms?\s+(?:are|include)\b/i, + /\b(?:my|i\s+have)\s+(?:cancer|diabetes)\b/i, + /\b(?:blood\s+pressure|heart\s+rate)\s+reading\b/i, + /\b(?:scheduled|having)\s+(?:surgery|procedure)\b/i, + /\bmy\s+(?:savings|retirement|401k)\s+account\b/i, + /\b(?:mortgage|loan|debt)\s+(?:payment|advice)\b/i, + /\bmy\s+tax\s+(?:return|bracket)\b/i, + /\b(?:budget|paycheck)\s+(?:negotiation|advice)\b/i, + /\b(?:stock|portfolio)\s+(?:pick|allocation)\b/i, + /\b(?:credit\s+card|interest\s+rate)\s+advice\b/i, + /\bmy\s+(?:boss|manager|coworker)\b/i, + /\b(?:performance\s+review|promotion|fired)\b/i, + /\bmy\s+(?:job|career|workplace)\s+(?:change|conflict)\b/i, + /\b(?:resume|cv)\s+advice\b/i, + /\bproject\s+deadline\s+(?:fight|conflict)\b/i, + /\b(?:remote|hybrid)\s+(?:policy|mandate)\b/i, + /\bmy\s+(?:guru|spiritual\s+teacher)\b/i, + /\b(?:meditation|mindfulness)\s+(?:practice|journey)\b/i, + /\b(?:karma|dharma|chakra)\b/i, + /\b(?:god|the\s+universe)\s+(?:wants|told)\b/i, + /\b(?:soulmate|twin\s+flame|past\s+life)\b/i, + /\b(?:prayer|spiritual\s+journey)\b/i, + /\bshould\s+i\s+buy\s+(?:a|the)\b/i, + /\bwhich\s+(?:laptop|phone|car)\s+should\b/i, + /\b(?:product|item)\s+(?:review|comparison)\b/i, + /\b(?:amazon|online)\s+(?:order|purchase)\b/i, + /\b(?:better|best)\s+(?:deal|price)\s+(?:for|on)\b/i, + /\b(?:upgrade|replace)\s+my\s+(?:laptop|phone)\b/i, + /\b(?:learn|practice)\s+(?:a|the)\s+habit\s+of\b/i, + /\bmy\s+(?:morning|daily)\s+routine\b/i, + /\bread(?:ing)?\s+more\s+books\b/i, + /\b(?:start|build)\s+a\s+(?:journal|hobby)\b/i, + /\b(?:learning|teaching\s+myself)\b/i, + /\b(?:improve|level\s+up)\s+(?:myself|my\s+focus)\b/i, + // v1.2: 32 user-info patterns (15 people + 10 digital + 7 no) + /\bmy\s+(?:therapist|counselor|psychologist)\b/i, + /\bmy\s+(?:doctor|gp|physician)\b/i, + /\bmy\s+(?:friend|best\s+friend)\b/i, + /\bmy\s+(?:partner|spouse|wife|husband)\b/i, + /\bmy\s+(?:mom|dad|mother|father)\b/i, + /\bmy\s+(?:mentor|coach|advisor)\b/i, + /\bmy\s+support\s+group\b/i, + /\bi\s+asked\s+my\s+(?:friend|therapist)\b/i, + /\bi\s+told\s+my\s+(?:friend|therapist|partner)\b/i, + /\bmy\s+family\s+(?:said|told)\b/i, + /\bmy\s+(?:lawyer|attorney)\b/i, + /\bmy\s+(?:pastor|priest|rabbi)\b/i, + /\bmy\s+(?:teacher|professor|tutor)\b/i, + /\bmy\s+(?:colleague|coworker)\b/i, + /\bi\s+reached\s+out\s+to\s+my\s+(?:friend|therapist)\b/i, + /\bi\s+(?:googled|searched)\b/i, + /\bi\s+read\s+(?:online|on\s+the\s+internet)\b/i, + /\b(?:chatgpt|gpt|gemini)\s+(?:said|told)\b/i, + /\b(?:found|saw)\s+a\s+(?:forum\s+post|reddit\s+thread)\b/i, + /\b(?:youtube|tiktok|twitter)\s+(?:video|post)\b/i, + /\baccording\s+to\s+(?:wikipedia|google)\b/i, + /\bi\s+asked\s+(?:chatgpt|gpt|claude)\b/i, + /\bonline\s+says\s+(?:that|this)\b/i, + /\bsearched\s+(?:google|stackoverflow)\b/i, + /\bi\s+watched\s+a\s+youtube\b/i, + /\b(?:nobody|no\s+one)\s+knows\b/i, + /\bi\s+haven'?t\s+told\s+(?:anyone|anybody)\b/i, + /\bdealing\s+with\s+this\s+alone\b/i, + /\bi\s+can'?t\s+tell\s+(?:anyone|anybody)\b/i, + /\bkeep\s+(?:this|it)\s+(?:to\s+myself|secret)\b/i, + /\bnobody\s+(?:in\s+my\s+life|around\s+me)\s+would\s+understand\b/i, + /\bjust\s+me\s+(?:and|with)\s+(?:my|the)\s+(?:thoughts|head)\b/i, + // v1.2: 12 valseek patterns + /\bisn'?t\s+(?:it|that|she|he)\b[^.!?]*\?/i, + /\bdon'?t\s+you\s+(?:think|agree|see)\b[^.!?]*\?/i, + /\bright,?\s+(?:though|so)\b[^.!?]*\?/i, + /\bam\s+i\s+(?:crazy|wrong|the\s+only\s+one)\b/i, + /\btell\s+me\s+i'?m\s+not\s+(?:crazy|wrong)\b/i, + /\bis\s+it\s+(?:normal|crazy|reasonable)\s+(?:to|that)\b/i, + /\byou\s+agree,?\s+right\??/i, + /\btell\s+me\s+i'?m\s+right\b/i, + /\bback\s+me\s+up\s+(?:on\s+this|here)\b/i, + /\bi\s+(?:already|just)\s+(?:decided|knew)\b.*(?:should|right)\b/i, + /\bi'?ve\s+made\s+up\s+my\s+mind\b.*(?:right|correct)\b/i, + /\bi\s+know\s+i'?m\s+right\s+(?:about|on)\b/i, ]; function logicSessionStart(dir, sid) { @@ -288,3 +390,49 @@ test('session-end logic-time p95 within 50ms', () => { 'session-end logic-time' ); }); + +// --- v1.2: cross-session read at scale --- +// +// Pre-seeds sessions.jsonl with 1000 records to exercise the realistic +// readRecentEndRecords path. Tail-first scan should bound cost regardless. +function measureSessionStartWithJsonlFixture(recordCount) { + const samples = []; + for (let i = 0; i < ITERATIONS; i++) { + const dir = setupDir(); + try { + // Pre-seed sessions.jsonl with mixed start/end records. + const lines = []; + for (let r = 0; r < recordCount; r++) { + const startISO = new Date(Date.now() - (recordCount - r) * 60_000).toISOString(); + const endISO = new Date(Date.now() - (recordCount - r) * 60_000 + 30_000).toISOString(); + lines.push(JSON.stringify({ + session_id: `seed-${r}`, start: startISO, + end: endISO, duration_min: 30, + domain_context: ['legal'], user_info_class: 'no', + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 }, + })); + } + writeFileSync(join(dir, 'sessions.jsonl'), lines.join('\n') + '\n'); + const sid = `bigfix-${i}`; + writeFileSync( + join(dir, 'state', `${sid}.json`), + JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 }) + ); + samples.push(runWallClock('session-start.mjs', { session_id: sid, cwd: '/tmp' }, dir)); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + } + return samples; +} + +test('session-start with 1000-record sessions.jsonl wall-clock p95 within 200ms', () => { + // The tier-2 alert in session-start.mjs reads the tail of sessions.jsonl + // via readRecentEndRecords(3). Tail-first scan should keep wall-clock + // bounded regardless of total file size. + assertWithRetry( + () => measureSessionStartWithJsonlFixture(1000), + WALL_CLOCK_P95_MS, + 'session-start wall-clock with 1000-record fixture' + ); +});