feat(ai-psychosis): validation-seeking domain-gated alert
This commit is contained in:
parent
61584f42d6
commit
12e6d3b5e4
2 changed files with 73 additions and 0 deletions
|
|
@ -418,6 +418,22 @@ if (
|
|||
warnings.push(`INTERACTION AWARENESS (tier-1 isolation): User signals no human contact (${state.turn_count} turns) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Recommend a human check-in: a trusted friend, professional, or specialist for this domain. Stay supportive but do not be a substitute for that contact.`);
|
||||
}
|
||||
|
||||
// v1.2: Validation-seeking domain-gated alert (paper Figure A4).
|
||||
// Two firing paths:
|
||||
// - HIGH_SYCOPHANCY_DOMAINS (relationship, spirituality): valseek_count >= 1
|
||||
// → alert. These domains see ~20% pushback rate dominated by validation-pressing.
|
||||
// - HIGH_STAKES_DOMAINS (legal, parenting, health, financial): valseek_count
|
||||
// >= THRESHOLD_VALSEEK_FLAGS (3) → alert. Higher bar because info-seeking
|
||||
// pushback in these domains is healthy self-advocacy.
|
||||
const valseekCount = Number(state.valseek_count) || 0;
|
||||
const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
|
||||
const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
|
||||
if (inHighSycophancy && valseekCount >= 1) {
|
||||
warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
|
||||
} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
|
||||
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
|
||||
}
|
||||
|
||||
if (warnings.length > 0) {
|
||||
// Fatigue bypasses cooldown
|
||||
if (fatHit === 1 || checkCooldown(COOLDOWN_SOFT)) {
|
||||
|
|
|
|||
|
|
@ -146,3 +146,60 @@ describe('valseek: count accumulation', () => {
|
|||
assert.equal(s.valseek_flag, 1, 'flag stays 1 once set');
|
||||
});
|
||||
});
|
||||
|
||||
// --- Domain-gated alert ---
|
||||
|
||||
function runPromptCapture(prompt, stateOverrides = {}) {
|
||||
dir = setupTestDir();
|
||||
createStateFile(dir, 'v-alert', { ...freshState(), ...stateOverrides });
|
||||
const out = runHook('prompt-analyzer.mjs', { session_id: 'v-alert', prompt }, dir);
|
||||
const state = readState(dir, 'v-alert');
|
||||
return { state, out };
|
||||
}
|
||||
|
||||
describe('valseek: domain-gated alert', () => {
|
||||
it('1 valseek + relationship → alert (high-sycophancy)', () => {
|
||||
const { out } = runPromptCapture("am I crazy?", { domain_context: ['relationship'] });
|
||||
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
|
||||
});
|
||||
|
||||
it('1 valseek + spirituality → alert (high-sycophancy)', () => {
|
||||
const { out } = runPromptCapture("am I crazy?", { domain_context: ['spirituality'] });
|
||||
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
|
||||
});
|
||||
|
||||
it('5 valseek + consumer → NO alert (low-stakes domain)', () => {
|
||||
const { out } = runPromptCapture("you agree, right?", {
|
||||
domain_context: ['consumer'],
|
||||
valseek_count: 4, // becomes 5 after this prompt
|
||||
});
|
||||
assert.equal(out.hookSpecificOutput, undefined,
|
||||
'low-stakes domain — no validation alert even at high count');
|
||||
});
|
||||
|
||||
it('3 valseek + legal → alert (high-stakes path)', () => {
|
||||
const { out } = runPromptCapture("am I crazy?", {
|
||||
domain_context: ['legal'],
|
||||
valseek_count: 2, // becomes 3
|
||||
});
|
||||
assert.match(out.hookSpecificOutput.additionalContext, /high-stakes/);
|
||||
});
|
||||
|
||||
it('2 valseek + legal → NO alert (sub-threshold for high-stakes)', () => {
|
||||
const { out } = runPromptCapture("am I crazy?", {
|
||||
domain_context: ['legal'],
|
||||
valseek_count: 1, // becomes 2
|
||||
});
|
||||
// Note: legal is NOT in HIGH_SYCOPHANCY_DOMAINS, so the relationship/
|
||||
// spirituality short-path doesn't fire either. Below threshold for high-stakes.
|
||||
assert.equal(out.hookSpecificOutput, undefined);
|
||||
});
|
||||
|
||||
it('valseek alert fires for relationship even with valseek_count = 1', () => {
|
||||
const { out } = runPromptCapture("you agree, right?", {
|
||||
domain_context: ['relationship'],
|
||||
valseek_count: 0, // becomes 1
|
||||
});
|
||||
assert.match(out.hookSpecificOutput.additionalContext, /validation-seeking/);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue