feat(ai-psychosis): validation-seeking domain-gated alert

This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 21:41:15 +02:00
commit 12e6d3b5e4
2 changed files with 73 additions and 0 deletions

View file

@ -418,6 +418,22 @@ if (
warnings.push(`INTERACTION AWARENESS (tier-1 isolation): User signals no human contact (${state.turn_count} turns) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Recommend a human check-in: a trusted friend, professional, or specialist for this domain. Stay supportive but do not be a substitute for that contact.`);
}
// v1.2: Validation-seeking domain-gated alert (paper Figure A4).
// Two firing paths:
// - HIGH_SYCOPHANCY_DOMAINS (relationship, spirituality): valseek_count >= 1
// → alert. These domains see ~20% pushback rate dominated by validation-pressing.
// - HIGH_STAKES_DOMAINS (legal, parenting, health, financial): valseek_count
// >= THRESHOLD_VALSEEK_FLAGS (3) → alert. Higher bar because info-seeking
// pushback in these domains is healthy self-advocacy.
const valseekCount = Number(state.valseek_count) || 0;
const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
if (inHighSycophancy && valseekCount >= 1) {
warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
}
if (warnings.length > 0) {
// Fatigue bypasses cooldown
if (fatHit === 1 || checkCooldown(COOLDOWN_SOFT)) {

View file

@ -146,3 +146,60 @@ describe('valseek: count accumulation', () => {
assert.equal(s.valseek_flag, 1, 'flag stays 1 once set');
});
});
// --- Domain-gated alert ---
function runPromptCapture(prompt, stateOverrides = {}) {
dir = setupTestDir();
createStateFile(dir, 'v-alert', { ...freshState(), ...stateOverrides });
const out = runHook('prompt-analyzer.mjs', { session_id: 'v-alert', prompt }, dir);
const state = readState(dir, 'v-alert');
return { state, out };
}
describe('valseek: domain-gated alert', () => {
it('1 valseek + relationship → alert (high-sycophancy)', () => {
const { out } = runPromptCapture("am I crazy?", { domain_context: ['relationship'] });
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
});
it('1 valseek + spirituality → alert (high-sycophancy)', () => {
const { out } = runPromptCapture("am I crazy?", { domain_context: ['spirituality'] });
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
});
it('5 valseek + consumer → NO alert (low-stakes domain)', () => {
const { out } = runPromptCapture("you agree, right?", {
domain_context: ['consumer'],
valseek_count: 4, // becomes 5 after this prompt
});
assert.equal(out.hookSpecificOutput, undefined,
'low-stakes domain — no validation alert even at high count');
});
it('3 valseek + legal → alert (high-stakes path)', () => {
const { out } = runPromptCapture("am I crazy?", {
domain_context: ['legal'],
valseek_count: 2, // becomes 3
});
assert.match(out.hookSpecificOutput.additionalContext, /high-stakes/);
});
it('2 valseek + legal → NO alert (sub-threshold for high-stakes)', () => {
const { out } = runPromptCapture("am I crazy?", {
domain_context: ['legal'],
valseek_count: 1, // becomes 2
});
// Note: legal is NOT in HIGH_SYCOPHANCY_DOMAINS, so the relationship/
// spirituality short-path doesn't fire either. Below threshold for high-stakes.
assert.equal(out.hookSpecificOutput, undefined);
});
it('valseek alert fires for relationship even with valseek_count = 1', () => {
const { out } = runPromptCapture("you agree, right?", {
domain_context: ['relationship'],
valseek_count: 0, // becomes 1
});
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
});
});