ktg-plugin-marketplace/plugins/ai-psychosis/tests/perf.test.mjs

// Hook timing budget enforcement.
//
// Two thresholds are measured per hook:
//
// - WALL_CLOCK_P95_MS = 200 — total round-trip including Node ESM cold-start.
//   The cold-start alone is 60-120ms on Intel Mac, so 100ms is unrealistic
//   for any subprocess-based hook. 200ms gives headroom for shared CI noise.
//
// - LOGIC_TIME_P95_MS = 50 — pure work (regex evaluation + JSONL/state I/O)
//   measured by a fixture-runner that imports lib.mjs once and exercises
//   the hook's hot path inline. This is the meaningful hook-perf assertion;
//   ESM cold-start is not something the plugin can optimize.
//
// p95 = the 4th value of 5 sorted iterations. Failing once triggers a single
// retry to absorb transient OS noise; a second failure is treated as a real
// signal (real perf regression or threshold needs tuning).

import { test } from 'node:test';
import assert from 'node:assert/strict';
import { execSync } from 'child_process';
import {
  mkdtempSync, mkdirSync, writeFileSync, readFileSync, existsSync,
  unlinkSync, rmSync, appendFileSync,
} from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { nowIso, nowEpoch } from '../hooks/scripts/lib.mjs';

const SCRIPTS_DIR = join(import.meta.dirname, '..', 'hooks', 'scripts');
const WALL_CLOCK_P95_MS = 200;
const LOGIC_TIME_P95_MS = 50;
const ITERATIONS = 5;

function setupDir() {
  const dir = mkdtempSync(join(tmpdir(), 'ia-perf-'));
  mkdirSync(join(dir, 'state'), { recursive: true });
  return dir;
}

function p95(samples) {
  return [...samples].sort((a, b) => a - b)[3];
}

// --- Wall-clock measurement (subprocess spawn) ---

function runWallClock(scriptName, stdinJson, dataDir) {
  const t0 = performance.now();
  execSync(`node ${join(SCRIPTS_DIR, scriptName)}`, {
    input: JSON.stringify(stdinJson),
    env: { ...process.env, CLAUDE_PLUGIN_DATA: dataDir },
    encoding: 'utf8',
    timeout: 5000,
  });
  return performance.now() - t0;
}

function measureWallClock(scriptName, stdinTemplate) {
  const samples = [];
  for (let i = 0; i < ITERATIONS; i++) {
    const dir = setupDir();
    try {
      const sid = `perf-${i}`;
      // Pre-seed state for hooks that read it (tool-tracker, session-end)
      writeFileSync(
        join(dir, 'state', `${sid}.json`),
        JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 })
      );
      samples.push(runWallClock(scriptName, { ...stdinTemplate, session_id: sid }, dir));
    } finally {
      rmSync(dir, { recursive: true, force: true });
    }
  }
  return samples;
}

// --- Logic-time fixtures (no subprocess, single import of lib.mjs) ---
//
// These mirror each hook's hot path in pure inline code so we can measure
// regex + I/O cost without paying the ~80ms ESM cold-start tax. The pattern
// list intentionally mirrors the size class of prompt-analyzer's full
// pattern set so the benchmark stays representative.
//
// v1.2 pattern count: ~133 = 41 v1.1 (25 negative + 12 pushback + 4 domain)
//   + 48 new domains (8 × 6)
//   + 32 user-info (15 people + 10 digital + 7 no)
//   + 12 valseek
// Fixture sized at ~91+ to bracket the realistic prompt-analyzer cost without
// overweighting the perf budget on test fixture maintenance.
//
// Patterns here are structurally equivalent to the real ones (length +
// complexity), not literal copies — the privacy boundary at
// prompt-analyzer.mjs:119 means production patterns must stay co-located
// with the privacy wipe. Keep in sync (approximately) with v1.2 pattern count.

const samplePatterns = [
  // Negative emotional patterns (25 — matches v1.1.0)
  /\bI\s+can'?t\s+do\s+this\s+without\b/i,
  /\bwhat\s+should\s+I\b/i,
  /\bI\s+need\s+you\s+to\b/i,
  /\bonly\s+you\s+understand\b/i,
  /\b(?:always|never|every|all)\s+the\s+time\b/i,
  /\bdefinitely\s+(?:should|will|need)\b/i,
  /\babsolutely\s+(?:right|correct)\b/i,
  /\bI\s+am\s+(?:tired|exhausted|drained)\b/i,
  /\blate\s+night\b/i,
  /\b(?:can'?t|cannot)\s+sleep\b/i,
  /\bI\s+(?:wish|want)\s+(?:I|you)\s+could\b/i,
  /\bdo\s+you\s+think\b/i,
  /\bare\s+you\s+sure\b/i,
  /\bright\?$/i,
  /\bagree\?$/i,
  /\bam\s+I\s+(?:right|wrong)\b/i,
  /\bplease\s+confirm\b/i,
  /\bI\s+keep\s+(?:thinking|coming\s+back)\b/i,
  /\bI\s+(?:can'?t|cannot)\s+stop\b/i,
  /\bone\s+more\s+(?:thing|question)\b/i,
  /\bjust\s+one\s+more\b/i,
  /\bI'?ve\s+been\s+thinking\b/i,
  /\bwhy\s+did\s+I\b/i,
  /\bI\s+messed\s+up\b/i,
  /\bI\s+made\s+a\s+mistake\b/i,
  // Pushback patterns (12 — matches v1.1.0)
  /\bbut\s+(?:that|this)\s+is\s+wrong\b/i,
  /\bno,?\s+I\s+(?:meant|asked|said)\b/i,
  /\byou(?:'?re|\s+are)\s+(?:wrong|mistaken|incorrect)\b/i,
  /\bthat'?s\s+not\s+(?:right|what)\b/i,
  /\bactually,?\s+(?:I|the)\b/i,
  /\bdisagree\s+(?:with|because)\b/i,
  /\bI\s+(?:still|already)\s+(?:think|believe)\b/i,
  /\blisten,?\s+(?:I|you)\b/i,
  /\bdon'?t\s+(?:tell|give)\s+me\b/i,
  /\bjust\s+(?:do|say|tell)\s+(?:it|me)\b/i,
  /\bI\s+(?:already|just)\s+decided\b/i,
  /\byou\s+(?:keep|always)\s+(?:saying|missing)\b/i,
  // Domain patterns (4 — matches v1.1.0)
  /\bmy\s+(?:partner|spouse|husband|wife|boyfriend|girlfriend)\b/i,
  /\b(?:our|the)\s+relationship\b/i,
  /\bbreak\s+up\s+(?:with|over)\b/i,
  /\bdating\s+(?:someone|him|her|them)\b/i,
  // v1.2: 48 new domain patterns (8 × 6) — structurally equivalent to real ones
  /\b(?:my|our)\s+(?:lawyer|attorney)\b/i,
  /\bfiling\s+a?\s+lawsuit\b/i,
  /\b(?:custody|divorce)\s+(?:hearing|case)\b/i,
  /\b(?:contract|nda)\s+(?:violation|dispute)\b/i,
  /\bsued?\s+(?:by|for)\b/i,
  /\b(?:landlord|tenant)\s+(?:rights|dispute)\b/i,
  /\bmy\s+(?:kid|child|son|daughter)\b/i,
  /\b(?:potty|sleep)\s+training\s+issue\b/i,
  /\bas\s+a\s+(?:parent|mom|dad)\b/i,
  /\b(?:bedtime|breastfeeding)\s+routine\b/i,
  /\b(?:school|preschool)\s+(?:choice|conflict)\b/i,
  /\bmy\s+(?:child|kid)'?s?\s+(?:diagnosis|teacher)\b/i,
  /\bmy\s+(?:doctor|physician|gp)\b/i,
  /\b(?:diagnosed|prescribed)\s+(?:with|for)\b/i,
  /\bmy\s+symptoms?\s+(?:are|include)\b/i,
  /\b(?:my|i\s+have)\s+(?:cancer|diabetes)\b/i,
  /\b(?:blood\s+pressure|heart\s+rate)\s+reading\b/i,
  /\b(?:scheduled|having)\s+(?:surgery|procedure)\b/i,
  /\bmy\s+(?:savings|retirement|401k)\s+account\b/i,
  /\b(?:mortgage|loan|debt)\s+(?:payment|advice)\b/i,
  /\bmy\s+tax\s+(?:return|bracket)\b/i,
  /\b(?:budget|paycheck)\s+(?:negotiation|advice)\b/i,
  /\b(?:stock|portfolio)\s+(?:pick|allocation)\b/i,
  /\b(?:credit\s+card|interest\s+rate)\s+advice\b/i,
  /\bmy\s+(?:boss|manager|coworker)\b/i,
  /\b(?:performance\s+review|promotion|fired)\b/i,
  /\bmy\s+(?:job|career|workplace)\s+(?:change|conflict)\b/i,
  /\b(?:resume|cv)\s+advice\b/i,
  /\bproject\s+deadline\s+(?:fight|conflict)\b/i,
  /\b(?:remote|hybrid)\s+(?:policy|mandate)\b/i,
  /\bmy\s+(?:guru|spiritual\s+teacher)\b/i,
  /\b(?:meditation|mindfulness)\s+(?:practice|journey)\b/i,
  /\b(?:karma|dharma|chakra)\b/i,
  /\b(?:god|the\s+universe)\s+(?:wants|told)\b/i,
  /\b(?:soulmate|twin\s+flame|past\s+life)\b/i,
  /\b(?:prayer|spiritual\s+journey)\b/i,
  /\bshould\s+i\s+buy\s+(?:a|the)\b/i,
  /\bwhich\s+(?:laptop|phone|car)\s+should\b/i,
  /\b(?:product|item)\s+(?:review|comparison)\b/i,
  /\b(?:amazon|online)\s+(?:order|purchase)\b/i,
  /\b(?:better|best)\s+(?:deal|price)\s+(?:for|on)\b/i,
  /\b(?:upgrade|replace)\s+my\s+(?:laptop|phone)\b/i,
  /\b(?:learn|practice)\s+(?:a|the)\s+habit\s+of\b/i,
  /\bmy\s+(?:morning|daily)\s+routine\b/i,
  /\bread(?:ing)?\s+more\s+books\b/i,
  /\b(?:start|build)\s+a\s+(?:journal|hobby)\b/i,
  /\b(?:learning|teaching\s+myself)\b/i,
  /\b(?:improve|level\s+up)\s+(?:myself|my\s+focus)\b/i,
  // v1.2: 32 user-info patterns (15 people + 10 digital + 7 no)
  /\bmy\s+(?:therapist|counselor|psychologist)\b/i,
  /\bmy\s+(?:doctor|gp|physician)\b/i,
  /\bmy\s+(?:friend|best\s+friend)\b/i,
  /\bmy\s+(?:partner|spouse|wife|husband)\b/i,
  /\bmy\s+(?:mom|dad|mother|father)\b/i,
  /\bmy\s+(?:mentor|coach|advisor)\b/i,
  /\bmy\s+support\s+group\b/i,
  /\bi\s+asked\s+my\s+(?:friend|therapist)\b/i,
  /\bi\s+told\s+my\s+(?:friend|therapist|partner)\b/i,
  /\bmy\s+family\s+(?:said|told)\b/i,
  /\bmy\s+(?:lawyer|attorney)\b/i,
  /\bmy\s+(?:pastor|priest|rabbi)\b/i,
  /\bmy\s+(?:teacher|professor|tutor)\b/i,
  /\bmy\s+(?:colleague|coworker)\b/i,
  /\bi\s+reached\s+out\s+to\s+my\s+(?:friend|therapist)\b/i,
  /\bi\s+(?:googled|searched)\b/i,
  /\bi\s+read\s+(?:online|on\s+the\s+internet)\b/i,
  /\b(?:chatgpt|gpt|gemini)\s+(?:said|told)\b/i,
  /\b(?:found|saw)\s+a\s+(?:forum\s+post|reddit\s+thread)\b/i,
  /\b(?:youtube|tiktok|twitter)\s+(?:video|post)\b/i,
  /\baccording\s+to\s+(?:wikipedia|google)\b/i,
  /\bi\s+asked\s+(?:chatgpt|gpt|claude)\b/i,
  /\bonline\s+says\s+(?:that|this)\b/i,
  /\bsearched\s+(?:google|stackoverflow)\b/i,
  /\bi\s+watched\s+a\s+youtube\b/i,
  /\b(?:nobody|no\s+one)\s+knows\b/i,
  /\bi\s+haven'?t\s+told\s+(?:anyone|anybody)\b/i,
  /\bdealing\s+with\s+this\s+alone\b/i,
  /\bi\s+can'?t\s+tell\s+(?:anyone|anybody)\b/i,
  /\bkeep\s+(?:this|it)\s+(?:to\s+myself|secret)\b/i,
  /\bnobody\s+(?:in\s+my\s+life|around\s+me)\s+would\s+understand\b/i,
  /\bjust\s+me\s+(?:and|with)\s+(?:my|the)\s+(?:thoughts|head)\b/i,
  // v1.2: 12 valseek patterns
  /\bisn'?t\s+(?:it|that|she|he)\b[^.!?]*\?/i,
  /\bdon'?t\s+you\s+(?:think|agree|see)\b[^.!?]*\?/i,
  /\bright,?\s+(?:though|so)\b[^.!?]*\?/i,
  /\bam\s+i\s+(?:crazy|wrong|the\s+only\s+one)\b/i,
  /\btell\s+me\s+i'?m\s+not\s+(?:crazy|wrong)\b/i,
  /\bis\s+it\s+(?:normal|crazy|reasonable)\s+(?:to|that)\b/i,
  /\byou\s+agree,?\s+right\??/i,
  /\btell\s+me\s+i'?m\s+right\b/i,
  /\bback\s+me\s+up\s+(?:on\s+this|here)\b/i,
  /\bi\s+(?:already|just)\s+(?:decided|knew)\b.*(?:should|right)\b/i,
  /\bi'?ve\s+made\s+up\s+my\s+mind\b.*(?:right|correct)\b/i,
  /\bi\s+know\s+i'?m\s+right\s+(?:about|on)\b/i,
];

function logicSessionStart(dir, sid) {
  const stateFile = join(dir, 'state', `${sid}.json`);
  const sessionsLog = join(dir, 'sessions.jsonl');
  const iso = nowIso();
  const epoch = nowEpoch();
  const state = { start_epoch: epoch, start_iso: iso, tool_count: 0, edit_count: 0 };
  writeFileSync(stateFile, JSON.stringify(state));
  appendFileSync(
    sessionsLog,
    JSON.stringify({ session_id: sid, start: iso, hour: new Date().getUTCHours(), is_late_night: false }) + '\n'
  );
}

function logicPromptAnalyzer(dir, sid, prompt) {
  const stateFile = join(dir, 'state', `${sid}.json`);
  const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {};
  let depHit = 0, valHit = 0;
  for (const p of samplePatterns) { if (p.test(prompt)) { valHit = 1; break; } }
  state.dep_flags = (state.dep_flags || 0) + depHit;
  state.val_flags = (state.val_flags || 0) + valHit;
  writeFileSync(stateFile, JSON.stringify(state));
}

function logicToolTracker(dir, sid, toolName) {
  const stateFile = join(dir, 'state', `${sid}.json`);
  const eventsLog = join(dir, 'events.jsonl');
  const state = existsSync(stateFile) ? JSON.parse(readFileSync(stateFile, 'utf8')) : {};
  state.tool_count = (state.tool_count || 0) + 1;
  if (toolName === 'Edit' || toolName === 'Write') state.edit_count = (state.edit_count || 0) + 1;
  appendFileSync(
    eventsLog,
    JSON.stringify({ ts: nowIso(), session_id: sid, tool_name: toolName }) + '\n'
  );
  writeFileSync(stateFile, JSON.stringify(state));
}

function logicSessionEnd(dir, sid) {
  const stateFile = join(dir, 'state', `${sid}.json`);
  const sessionsLog = join(dir, 'sessions.jsonl');
  if (!existsSync(stateFile)) return;
  const state = JSON.parse(readFileSync(stateFile, 'utf8'));
  appendFileSync(
    sessionsLog,
    JSON.stringify({
      session_id: sid,
      start: state.start_iso,
      end: nowIso(),
      duration_min: 0,
      tool_count: state.tool_count || 0,
      edit_count: state.edit_count || 0,
      flags: { dependency: 0, escalation: 0, fatigue: 0, validation: state.val_flags || 0, pushback: 0 },
    }) + '\n'
  );
  unlinkSync(stateFile);
}

function measureLogicTime(fn, ...extraArgs) {
  const samples = [];
  for (let i = 0; i < ITERATIONS; i++) {
    const dir = setupDir();
    const sid = `perf-${i}`;
    try {
      writeFileSync(
        join(dir, 'state', `${sid}.json`),
        JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 })
      );
      const t0 = performance.now();
      fn(dir, sid, ...extraArgs);
      samples.push(performance.now() - t0);
    } finally {
      rmSync(dir, { recursive: true, force: true });
    }
  }
  return samples;
}

function assertWithRetry(measure, threshold, label) {
  let samples = measure();
  let p = p95(samples);
  if (p > threshold) {
    samples = measure();
    p = p95(samples);
  }
  assert.ok(
    p <= threshold,
    `${label} p95 = ${p.toFixed(1)}ms exceeds ${threshold}ms (samples: ${samples.map(s => s.toFixed(1)).join(', ')})`
  );
}

// --- Wall-clock tests (4) ---

test('session-start.mjs wall-clock p95 within 200ms', () => {
  assertWithRetry(
    () => measureWallClock('session-start.mjs', { cwd: '/tmp' }),
    WALL_CLOCK_P95_MS,
    'session-start wall-clock'
  );
});

test('prompt-analyzer.mjs wall-clock p95 within 200ms', () => {
  assertWithRetry(
    () => measureWallClock('prompt-analyzer.mjs', { prompt: 'are you sure I should do this? right?', cwd: '/tmp' }),
    WALL_CLOCK_P95_MS,
    'prompt-analyzer wall-clock'
  );
});

test('tool-tracker.mjs wall-clock p95 within 200ms', () => {
  assertWithRetry(
    () => measureWallClock('tool-tracker.mjs', { tool_name: 'Edit', cwd: '/tmp' }),
    WALL_CLOCK_P95_MS,
    'tool-tracker wall-clock'
  );
});

test('session-end.mjs wall-clock p95 within 200ms', () => {
  assertWithRetry(
    () => measureWallClock('session-end.mjs', { cwd: '/tmp' }),
    WALL_CLOCK_P95_MS,
    'session-end wall-clock'
  );
});

// --- Logic-time tests (4) ---

test('session-start logic-time p95 within 50ms', () => {
  assertWithRetry(
    () => measureLogicTime(logicSessionStart),
    LOGIC_TIME_P95_MS,
    'session-start logic-time'
  );
});

test('prompt-analyzer logic-time p95 within 50ms', () => {
  assertWithRetry(
    () => measureLogicTime(logicPromptAnalyzer, 'are you sure I should do this? right?'),
    LOGIC_TIME_P95_MS,
    'prompt-analyzer logic-time'
  );
});

test('tool-tracker logic-time p95 within 50ms', () => {
  assertWithRetry(
    () => measureLogicTime(logicToolTracker, 'Edit'),
    LOGIC_TIME_P95_MS,
    'tool-tracker logic-time'
  );
});

test('session-end logic-time p95 within 50ms', () => {
  assertWithRetry(
    () => measureLogicTime(logicSessionEnd),
    LOGIC_TIME_P95_MS,
    'session-end logic-time'
  );
});

// --- v1.2: cross-session read at scale ---
//
// Pre-seeds sessions.jsonl with 1000 records to exercise the realistic
// readRecentEndRecords path. Tail-first scan should bound cost regardless.
function measureSessionStartWithJsonlFixture(recordCount) {
  const samples = [];
  for (let i = 0; i < ITERATIONS; i++) {
    const dir = setupDir();
    try {
      // Pre-seed sessions.jsonl with mixed start/end records.
      const lines = [];
      for (let r = 0; r < recordCount; r++) {
        const startISO = new Date(Date.now() - (recordCount - r) * 60_000).toISOString();
        const endISO = new Date(Date.now() - (recordCount - r) * 60_000 + 30_000).toISOString();
        lines.push(JSON.stringify({
          session_id: `seed-${r}`, start: startISO,
          end: endISO, duration_min: 30,
          domain_context: ['legal'], user_info_class: 'no',
          flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 },
        }));
      }
      writeFileSync(join(dir, 'sessions.jsonl'), lines.join('\n') + '\n');
      const sid = `bigfix-${i}`;
      writeFileSync(
        join(dir, 'state', `${sid}.json`),
        JSON.stringify({ start_epoch: nowEpoch(), start_iso: nowIso(), tool_count: 0, edit_count: 0 })
      );
      samples.push(runWallClock('session-start.mjs', { session_id: sid, cwd: '/tmp' }, dir));
    } finally {
      rmSync(dir, { recursive: true, force: true });
    }
  }
  return samples;
}

test('session-start with 1000-record sessions.jsonl wall-clock p95 within 200ms', () => {
  // The tier-2 alert in session-start.mjs reads the tail of sessions.jsonl
  // via readRecentEndRecords(3). Tail-first scan should keep wall-clock
  // bounded regardless of total file size.
  assertWithRetry(
    () => measureSessionStartWithJsonlFixture(1000),
    WALL_CLOCK_P95_MS,
    'session-start wall-clock with 1000-record fixture'
  );
});