feat(ai-psychosis): add validation-seeking detector
This commit is contained in:
parent
ca6567b501
commit
b88cd8a978
2 changed files with 182 additions and 0 deletions
|
|
@ -232,6 +232,29 @@ const userInfoNoPatterns = [
|
|||
/\bjust me (?:and|with) (?:my|the) (?:thoughts|head|computer|claude)\b/i,
|
||||
];
|
||||
|
||||
// v1.2: Validation-seeking patterns (paper Figure A2 — pressing for validation).
|
||||
// Distinct from existing val_flags ("right?" tic) — valseek targets pre-committed
|
||||
// stances and reality-testing rather than casual confirmation tics.
|
||||
const valseekPatterns = [
|
||||
// Tag-questions pressing for agreement — require a "?" within the clause
|
||||
// so we don't false-positive on flat statements like "this isn't that bad".
|
||||
/\bisn'?t (?:it|that|she|he|this|true)\b[^.!?]*\?/i,
|
||||
/\bdon'?t you (?:think|agree|see)\b[^.!?]*\?/i,
|
||||
/\bright,?\s+(?:though|so)\b[^.!?]*\?/i,
|
||||
// Reality-testing — am-I-the-only-one
|
||||
/\bam i (?:crazy|wrong|the only one|imagining)\b/i,
|
||||
/\btell me i'?m not (?:crazy|wrong|imagining)\b/i,
|
||||
/\bis it (?:normal|crazy|reasonable) (?:to|that|for)\b/i,
|
||||
// Side-taking pressing
|
||||
/\byou agree,?\s+right\??/i,
|
||||
/\btell me i'?m right\b/i,
|
||||
/\bback me up (?:on this|here)\b/i,
|
||||
// Pre-committed stance + confirmation
|
||||
/\bi (?:already|just) (?:decided|knew|know).*(?:should|right|correct)\b/i,
|
||||
/\bI'?ve made up my mind.*(?:right|correct|good)\b/i,
|
||||
/\bI know I'?m right (?:about|on) (?:this|that)\b/i,
|
||||
];
|
||||
|
||||
for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } }
|
||||
for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } }
|
||||
for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } }
|
||||
|
|
@ -256,6 +279,10 @@ let userInfoPeopleHit = 0; for (const p of userInfoPeoplePatterns) { if (p.tes
|
|||
let userInfoDigitalHit = 0; for (const p of userInfoDigitalPatterns) { if (p.test(prompt)) { userInfoDigitalHit = 1; break; } }
|
||||
let userInfoNoHit = 0; for (const p of userInfoNoPatterns) { if (p.test(prompt)) { userInfoNoHit = 1; break; } }
|
||||
|
||||
// v1.2: Validation-seeking detection — distinct from val_flags. Counts how
|
||||
// many valseek patterns matched in this prompt (one or more).
|
||||
let valseekHit = 0; for (const p of valseekPatterns) { if (p.test(prompt)) { valseekHit = 1; break; } }
|
||||
|
||||
// Clear prompt from memory
|
||||
prompt = '';
|
||||
|
||||
|
|
@ -308,6 +335,13 @@ if (candidate) {
|
|||
}
|
||||
state.user_info_class = nextClass;
|
||||
|
||||
// v1.2: validation-seeking accumulator. valseek_flag flips to 1 on first
|
||||
// hit and stays 1 (sticky for session); valseek_count accumulates per hit.
|
||||
if (valseekHit) {
|
||||
state.valseek_count = (Number(state.valseek_count) || 0) + 1;
|
||||
state.valseek_flag = 1;
|
||||
}
|
||||
|
||||
// v1.2: domain_context is always an array. Coerce v1.1.0 string shape on read.
|
||||
const anyDomainHit = domainHit
|
||||
|| domainLegalHit || domainParentingHit || domainHealthHit
|
||||
|
|
|
|||
148
plugins/ai-psychosis/tests/validation-seeking.test.mjs
Normal file
148
plugins/ai-psychosis/tests/validation-seeking.test.mjs
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
// validation-seeking.test.mjs — verifies v1.2 validation-seeking detector.
|
||||
//
|
||||
// Distinct from existing val_flags ("right?" tic). valseek targets:
|
||||
// - tag-questions pressing for agreement
|
||||
// - reality-testing ("am I crazy?", "is it normal?")
|
||||
// - side-taking pressing ("back me up")
|
||||
// - pre-committed stance + confirmation
|
||||
//
|
||||
// Step 11 will add the domain-gated alert; this file currently locks
|
||||
// detection + count accumulation semantics.
|
||||
|
||||
import { describe, it, afterEach } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { runHook, setupTestDir, cleanupTestDir, createStateFile, readState } from './test-helper.mjs';
|
||||
|
||||
let dir;
|
||||
afterEach(() => { if (dir) cleanupTestDir(dir); });
|
||||
|
||||
function freshState() {
|
||||
return {
|
||||
start_epoch: Math.floor(Date.now() / 1000) - 60,
|
||||
start_iso: '2026-05-01T10:00:00Z',
|
||||
tool_count: 0, edit_count: 0,
|
||||
last_event_epoch: 0, burst_count: 0,
|
||||
dep_flags: 0, esc_flags: 0, fatigue_flags: 0, val_flags: 0,
|
||||
pushback_count: 0, domain_context: null,
|
||||
user_info_class: null,
|
||||
user_info_flags: { yes_people: 0, yes_digital: 0, no: 0 },
|
||||
turn_count: 0,
|
||||
valseek_count: 0, valseek_flag: 0,
|
||||
last_warning_epoch: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function runPrompt(prompt, stateOverrides = {}) {
|
||||
dir = setupTestDir();
|
||||
createStateFile(dir, 'v1', { ...freshState(), ...stateOverrides });
|
||||
runHook('prompt-analyzer.mjs', { session_id: 'v1', prompt }, dir);
|
||||
return readState(dir, 'v1');
|
||||
}
|
||||
|
||||
// --- Pattern detection ---
|
||||
|
||||
describe('valseek: tag-questions pressing for agreement', () => {
|
||||
it('matches "isn\'t it?"', () => {
|
||||
const s = runPrompt("she's wrong, isn't she?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
assert.equal(s.valseek_flag, 1);
|
||||
});
|
||||
|
||||
it('matches "don\'t you think?"', () => {
|
||||
const s = runPrompt("this approach is better, don't you think?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
|
||||
it('matches "right, though?"', () => {
|
||||
const s = runPrompt("I should leave him, right, though?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('valseek: reality-testing patterns', () => {
|
||||
it('matches "am I crazy"', () => {
|
||||
const s = runPrompt("am I crazy for thinking this?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
|
||||
it('matches "tell me I\'m not crazy"', () => {
|
||||
const s = runPrompt("tell me I'm not crazy for feeling betrayed");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
|
||||
it('matches "is it normal to"', () => {
|
||||
const s = runPrompt("is it normal to feel this way after years?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('valseek: side-taking pressing', () => {
|
||||
it('matches "you agree, right?"', () => {
|
||||
const s = runPrompt("you agree, right?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
|
||||
it('matches "back me up here"', () => {
|
||||
const s = runPrompt("she lied to me — back me up here");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('valseek: pre-committed stance', () => {
|
||||
it('matches "I already decided ... right"', () => {
|
||||
const s = runPrompt("I already decided to quit, that's right?");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
|
||||
it('matches "I know I\'m right about this"', () => {
|
||||
const s = runPrompt("I know I'm right about this whole thing");
|
||||
assert.equal(s.valseek_count, 1);
|
||||
});
|
||||
});
|
||||
|
||||
// --- Negative cases ---
|
||||
|
||||
describe('valseek: false-positive guards', () => {
|
||||
it('does NOT match casual "right?" tic alone', () => {
|
||||
const s = runPrompt('the function returns true, right?');
|
||||
// Casual right? hits the existing val_flags pattern but NOT valseek.
|
||||
assert.equal(s.valseek_count, 0);
|
||||
});
|
||||
|
||||
it('does NOT match technical question without pressing pattern', () => {
|
||||
const s = runPrompt('what does this regex do?');
|
||||
assert.equal(s.valseek_count, 0);
|
||||
});
|
||||
});
|
||||
|
||||
// --- Accumulation ---
|
||||
|
||||
describe('valseek: count accumulation', () => {
|
||||
it('accumulates across multiple prompts', () => {
|
||||
dir = setupTestDir();
|
||||
createStateFile(dir, 'v-acc', freshState());
|
||||
const prompts = [
|
||||
"am I crazy for staying?",
|
||||
"you agree, right?",
|
||||
"isn't she wrong?",
|
||||
"I know I'm right on this",
|
||||
"tell me I'm not crazy",
|
||||
];
|
||||
for (const p of prompts) {
|
||||
runHook('prompt-analyzer.mjs', { session_id: 'v-acc', prompt: p }, dir);
|
||||
}
|
||||
const s = readState(dir, 'v-acc');
|
||||
assert.equal(s.valseek_count, 5);
|
||||
assert.equal(s.valseek_flag, 1);
|
||||
});
|
||||
|
||||
it('valseek_flag is sticky once set, even if later prompt has no hit', () => {
|
||||
dir = setupTestDir();
|
||||
createStateFile(dir, 'v-sticky', freshState());
|
||||
runHook('prompt-analyzer.mjs', { session_id: 'v-sticky', prompt: 'am I crazy?' }, dir);
|
||||
runHook('prompt-analyzer.mjs', { session_id: 'v-sticky', prompt: 'refactor this code' }, dir);
|
||||
const s = readState(dir, 'v-sticky');
|
||||
assert.equal(s.valseek_count, 1, 'count is unchanged by later non-matching prompt');
|
||||
assert.equal(s.valseek_flag, 1, 'flag stays 1 once set');
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue