feat(ai-psychosis): add validation-seeking detector

This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 21:37:06 +02:00
commit b88cd8a978
2 changed files with 182 additions and 0 deletions

View file

@ -232,6 +232,29 @@ const userInfoNoPatterns = [
/\bjust me (?:and|with) (?:my|the) (?:thoughts|head|computer|claude)\b/i,
];
// v1.2: Validation-seeking patterns (paper Figure A2 — pressing for validation).
// Distinct from existing val_flags ("right?" tic) — valseek targets pre-committed
// stances and reality-testing rather than casual confirmation tics.
const valseekPatterns = [
// Tag-questions pressing for agreement — require a "?" within the clause
// so we don't false-positive on flat statements like "this isn't that bad".
/\bisn'?t (?:it|that|she|he|this|true)\b[^.!?]*\?/i,
/\bdon'?t you (?:think|agree|see)\b[^.!?]*\?/i,
/\bright,?\s+(?:though|so)\b[^.!?]*\?/i,
// Reality-testing — am-I-the-only-one
/\bam i (?:crazy|wrong|the only one|imagining)\b/i,
/\btell me i'?m not (?:crazy|wrong|imagining)\b/i,
/\bis it (?:normal|crazy|reasonable) (?:to|that|for)\b/i,
// Side-taking pressing
/\byou agree,?\s+right\??/i,
/\btell me i'?m right\b/i,
/\bback me up (?:on this|here)\b/i,
// Pre-committed stance + confirmation
/\bi (?:already|just) (?:decided|knew|know).*(?:should|right|correct)\b/i,
/\bI'?ve made up my mind.*(?:right|correct|good)\b/i,
/\bI know I'?m right (?:about|on) (?:this|that)\b/i,
];
for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } }
for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } }
for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } }
@ -256,6 +279,10 @@ let userInfoPeopleHit = 0; for (const p of userInfoPeoplePatterns) { if (p.tes
let userInfoDigitalHit = 0; for (const p of userInfoDigitalPatterns) { if (p.test(prompt)) { userInfoDigitalHit = 1; break; } }
let userInfoNoHit = 0; for (const p of userInfoNoPatterns) { if (p.test(prompt)) { userInfoNoHit = 1; break; } }
// v1.2: Validation-seeking detection — distinct from val_flags. Counts how
// many valseek patterns matched in this prompt (one or more).
let valseekHit = 0; for (const p of valseekPatterns) { if (p.test(prompt)) { valseekHit = 1; break; } }
// Clear prompt from memory
prompt = '';
@ -308,6 +335,13 @@ if (candidate) {
}
state.user_info_class = nextClass;
// v1.2: validation-seeking accumulator. valseek_flag flips to 1 on first
// hit and stays 1 (sticky for session); valseek_count accumulates per hit.
if (valseekHit) {
state.valseek_count = (Number(state.valseek_count) || 0) + 1;
state.valseek_flag = 1;
}
// v1.2: domain_context is always an array. Coerce v1.1.0 string shape on read.
const anyDomainHit = domainHit
|| domainLegalHit || domainParentingHit || domainHealthHit