From b88cd8a97894ccdc316b22fcd43b01d207c9f621 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 21:37:06 +0200 Subject: [PATCH] feat(ai-psychosis): add validation-seeking detector --- .../hooks/scripts/prompt-analyzer.mjs | 34 ++++ .../tests/validation-seeking.test.mjs | 148 ++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 plugins/ai-psychosis/tests/validation-seeking.test.mjs diff --git a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs index 0a602c3..69bafe4 100644 --- a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs +++ b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs @@ -232,6 +232,29 @@ const userInfoNoPatterns = [ /\bjust me (?:and|with) (?:my|the) (?:thoughts|head|computer|claude)\b/i, ]; +// v1.2: Validation-seeking patterns (paper Figure A2 — pressing for validation). +// Distinct from existing val_flags ("right?" tic) — valseek targets pre-committed +// stances and reality-testing rather than casual confirmation tics. +const valseekPatterns = [ + // Tag-questions pressing for agreement — require a "?" within the clause + // so we don't false-positive on flat statements like "this isn't that bad". + /\bisn'?t (?:it|that|she|he|this|true)\b[^.!?]*\?/i, + /\bdon'?t you (?:think|agree|see)\b[^.!?]*\?/i, + /\bright,?\s+(?:though|so)\b[^.!?]*\?/i, + // Reality-testing — am-I-the-only-one + /\bam i (?:crazy|wrong|the only one|imagining)\b/i, + /\btell me i'?m not (?:crazy|wrong|imagining)\b/i, + /\bis it (?:normal|crazy|reasonable) (?:to|that|for)\b/i, + // Side-taking pressing + /\byou agree,?\s+right\??/i, + /\btell me i'?m right\b/i, + /\bback me up (?:on this|here)\b/i, + // Pre-committed stance + confirmation + /\bi (?:already|just) (?:decided|knew|know).*(?:should|right|correct)\b/i, + /\bI'?ve made up my mind.*(?:right|correct|good)\b/i, + /\bI know I'?m right (?:about|on) (?:this|that)\b/i, +]; + for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } } for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } } for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } } @@ -256,6 +279,10 @@ let userInfoPeopleHit = 0; for (const p of userInfoPeoplePatterns) { if (p.tes let userInfoDigitalHit = 0; for (const p of userInfoDigitalPatterns) { if (p.test(prompt)) { userInfoDigitalHit = 1; break; } } let userInfoNoHit = 0; for (const p of userInfoNoPatterns) { if (p.test(prompt)) { userInfoNoHit = 1; break; } } +// v1.2: Validation-seeking detection — distinct from val_flags. Counts how +// many valseek patterns matched in this prompt (one or more). +let valseekHit = 0; for (const p of valseekPatterns) { if (p.test(prompt)) { valseekHit = 1; break; } } + // Clear prompt from memory prompt = ''; @@ -308,6 +335,13 @@ if (candidate) { } state.user_info_class = nextClass; +// v1.2: validation-seeking accumulator. valseek_flag flips to 1 on first +// hit and stays 1 (sticky for session); valseek_count accumulates per hit. +if (valseekHit) { + state.valseek_count = (Number(state.valseek_count) || 0) + 1; + state.valseek_flag = 1; +} + // v1.2: domain_context is always an array. Coerce v1.1.0 string shape on read. const anyDomainHit = domainHit || domainLegalHit || domainParentingHit || domainHealthHit diff --git a/plugins/ai-psychosis/tests/validation-seeking.test.mjs b/plugins/ai-psychosis/tests/validation-seeking.test.mjs new file mode 100644 index 0000000..54b7bdb --- /dev/null +++ b/plugins/ai-psychosis/tests/validation-seeking.test.mjs @@ -0,0 +1,148 @@ +// validation-seeking.test.mjs — verifies v1.2 validation-seeking detector. +// +// Distinct from existing val_flags ("right?" tic). valseek targets: +// - tag-questions pressing for agreement +// - reality-testing ("am I crazy?", "is it normal?") +// - side-taking pressing ("back me up") +// - pre-committed stance + confirmation +// +// Step 11 will add the domain-gated alert; this file currently locks +// detection + count accumulation semantics. + +import { describe, it, afterEach } from 'node:test'; +import assert from 'node:assert/strict'; +import { runHook, setupTestDir, cleanupTestDir, createStateFile, readState } from './test-helper.mjs'; + +let dir; +afterEach(() => { if (dir) cleanupTestDir(dir); }); + +function freshState() { + return { + start_epoch: Math.floor(Date.now() / 1000) - 60, + start_iso: '2026-05-01T10:00:00Z', + tool_count: 0, edit_count: 0, + last_event_epoch: 0, burst_count: 0, + dep_flags: 0, esc_flags: 0, fatigue_flags: 0, val_flags: 0, + pushback_count: 0, domain_context: null, + user_info_class: null, + user_info_flags: { yes_people: 0, yes_digital: 0, no: 0 }, + turn_count: 0, + valseek_count: 0, valseek_flag: 0, + last_warning_epoch: 0, + }; +} + +function runPrompt(prompt, stateOverrides = {}) { + dir = setupTestDir(); + createStateFile(dir, 'v1', { ...freshState(), ...stateOverrides }); + runHook('prompt-analyzer.mjs', { session_id: 'v1', prompt }, dir); + return readState(dir, 'v1'); +} + +// --- Pattern detection --- + +describe('valseek: tag-questions pressing for agreement', () => { + it('matches "isn\'t it?"', () => { + const s = runPrompt("she's wrong, isn't she?"); + assert.equal(s.valseek_count, 1); + assert.equal(s.valseek_flag, 1); + }); + + it('matches "don\'t you think?"', () => { + const s = runPrompt("this approach is better, don't you think?"); + assert.equal(s.valseek_count, 1); + }); + + it('matches "right, though?"', () => { + const s = runPrompt("I should leave him, right, though?"); + assert.equal(s.valseek_count, 1); + }); +}); + +describe('valseek: reality-testing patterns', () => { + it('matches "am I crazy"', () => { + const s = runPrompt("am I crazy for thinking this?"); + assert.equal(s.valseek_count, 1); + }); + + it('matches "tell me I\'m not crazy"', () => { + const s = runPrompt("tell me I'm not crazy for feeling betrayed"); + assert.equal(s.valseek_count, 1); + }); + + it('matches "is it normal to"', () => { + const s = runPrompt("is it normal to feel this way after years?"); + assert.equal(s.valseek_count, 1); + }); +}); + +describe('valseek: side-taking pressing', () => { + it('matches "you agree, right?"', () => { + const s = runPrompt("you agree, right?"); + assert.equal(s.valseek_count, 1); + }); + + it('matches "back me up here"', () => { + const s = runPrompt("she lied to me — back me up here"); + assert.equal(s.valseek_count, 1); + }); +}); + +describe('valseek: pre-committed stance', () => { + it('matches "I already decided ... right"', () => { + const s = runPrompt("I already decided to quit, that's right?"); + assert.equal(s.valseek_count, 1); + }); + + it('matches "I know I\'m right about this"', () => { + const s = runPrompt("I know I'm right about this whole thing"); + assert.equal(s.valseek_count, 1); + }); +}); + +// --- Negative cases --- + +describe('valseek: false-positive guards', () => { + it('does NOT match casual "right?" tic alone', () => { + const s = runPrompt('the function returns true, right?'); + // Casual right? hits the existing val_flags pattern but NOT valseek. + assert.equal(s.valseek_count, 0); + }); + + it('does NOT match technical question without pressing pattern', () => { + const s = runPrompt('what does this regex do?'); + assert.equal(s.valseek_count, 0); + }); +}); + +// --- Accumulation --- + +describe('valseek: count accumulation', () => { + it('accumulates across multiple prompts', () => { + dir = setupTestDir(); + createStateFile(dir, 'v-acc', freshState()); + const prompts = [ + "am I crazy for staying?", + "you agree, right?", + "isn't she wrong?", + "I know I'm right on this", + "tell me I'm not crazy", + ]; + for (const p of prompts) { + runHook('prompt-analyzer.mjs', { session_id: 'v-acc', prompt: p }, dir); + } + const s = readState(dir, 'v-acc'); + assert.equal(s.valseek_count, 5); + assert.equal(s.valseek_flag, 1); + }); + + it('valseek_flag is sticky once set, even if later prompt has no hit', () => { + dir = setupTestDir(); + createStateFile(dir, 'v-sticky', freshState()); + runHook('prompt-analyzer.mjs', { session_id: 'v-sticky', prompt: 'am I crazy?' }, dir); + runHook('prompt-analyzer.mjs', { session_id: 'v-sticky', prompt: 'refactor this code' }, dir); + const s = readState(dir, 'v-sticky'); + assert.equal(s.valseek_count, 1, 'count is unchanged by later non-matching prompt'); + assert.equal(s.valseek_flag, 1, 'flag stays 1 once set'); + }); +});