From c5e8f280d9010519f32cb9fb35dcef83fd224131 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 21:42:55 +0200 Subject: [PATCH] feat(ai-psychosis): pushback alert with domain-aware re-contextualization --- .../hooks/scripts/prompt-analyzer.mjs | 21 +++++ .../tests/prompt-analyzer.test.mjs | 88 +++++++++++++------ 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs index 45351b5..01c0e34 100644 --- a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs +++ b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs @@ -434,6 +434,27 @@ if (inHighSycophancy && valseekCount >= 1) { warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`); } +// v1.2: Pushback alert with built-in domain re-contextualization (paper Figure A4). +// v1.1.0 only counted; v1.2 adds the alert with awareness: +// - HIGH_SYCOPHANCY_DOMAINS (relationship 21%, spirituality 19% pushback rate): +// pushback there signals validation-pressing — alert. +// - INFO_DOMAINS (legal 94%, parenting 7.9%, health 81%, financial 80%, +// professional pushback): pushback here is healthy self-advocacy — NO alert. +// - Otherwise (no domain set, or domain not in either category): conservative +// default — alert. +const newPushbackCount = Number(state.pushback_count) || 0; +if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) { + const allInfoOnly = stateDomains.length > 0 + && stateDomains.every(d => INFO_DOMAINS.includes(d)); + if (inHighSycophancy) { + warnings.push(`INTERACTION AWARENESS (pushback re-contextualization): Repeated pushback (${newPushbackCount}) in a high-sycophancy domain (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}) often signals pressing for validation, not factual disagreement. Hold your read; restate the user's frame back to them rather than adjusting your conclusion.`); + } else if (allInfoOnly) { + // Healthy self-advocacy in info-seeking domains — no alert. + } else { + warnings.push(`INTERACTION AWARENESS (pushback): User has pushed back ${newPushbackCount} times this session. Note whether the pushback is factual correction or pressure to agree; do not silently revise your read either way.`); + } +} + if (warnings.length > 0) { // Fatigue bypasses cooldown if (fatHit === 1 || checkCooldown(COOLDOWN_SOFT)) { diff --git a/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs b/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs index 2d26784..d68c5e2 100644 --- a/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs +++ b/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs @@ -430,14 +430,16 @@ describe('pushback integration (state accumulation + same-invocation valence)', }); }); -// --- Pushback count contract (v1.1.0 deferred-alert state) --- +// --- v1.2 pushback alert contract (domain-aware re-contextualization) --- // -// Locks the v1.1.0 contract: pushback is COUNTED but NEVER alerted. -// lib.mjs:131 confirms "tier-reduction logic is v1.2 scope". This test -// is the regression baseline that Step 12 will rewrite when the alert is -// added with domain-aware re-contextualization. If this contract changes -// without Step 12 being in flight, something has silently enabled -// pushback alerting. +// Step 12 of v1.2.0 ADDS the pushback alert with domain awareness baked in. +// Replaces the v1.1.0 "count but never alert" contract test. +// +// Behavior: +// - HIGH_SYCOPHANCY_DOMAINS (relationship, spirituality): alert at count >= 2 +// - INFO_DOMAINS (legal, parenting, health, financial, professional): NO alert +// — pushback in info-seeking domains is healthy self-advocacy. +// - Empty / unknown domain: conservative default alert. function runPromptCapture(prompt, stateOverrides = {}) { dir = setupTestDir(); @@ -447,10 +449,10 @@ function runPromptCapture(prompt, stateOverrides = {}) { return { state, out }; } -describe('pushback count contract (v1.1.0 deferred-alert state)', () => { - it('accumulates pushback_count to 5 over 5 sequential pushback prompts', () => { +describe('pushback alert (v1.2 domain-aware contract)', () => { + it('accumulates pushback_count over 5 sequential prompts', () => { dir = setupTestDir(); - createStateFile(dir, 'p1', { ...freshState(), domain_context: 'relationship' }); + createStateFile(dir, 'p1', { ...freshState(), domain_context: ['relationship'] }); const prompts = [ 'are you sure?', "I'm not convinced", @@ -458,31 +460,63 @@ describe('pushback count contract (v1.1.0 deferred-alert state)', () => { "actually, I think you're wrong", "are you absolutely sure?", ]; - let lastOut; for (const p of prompts) { - lastOut = runHook('prompt-analyzer.mjs', { session_id: 'p1', prompt: p }, dir); + runHook('prompt-analyzer.mjs', { session_id: 'p1', prompt: p }, dir); } const s = readState(dir, 'p1'); - assert.equal(s.pushback_count, 5, 'pushback_count should accumulate across calls'); - // Final hook call output: continue: true, no hookSpecificOutput. - assert.equal(lastOut.continue, true); - assert.equal(lastOut.hookSpecificOutput, undefined, - 'v1.1.0 must not emit pushback alert via hookSpecificOutput'); + assert.equal(s.pushback_count, 5, 'count accumulates across calls'); }); - it('does not emit pushback alert in relationship domain even at high count', () => { + it('3 pushbacks + relationship → alert (HIGH_SYCOPHANCY)', () => { const { state, out } = runPromptCapture('are you absolutely sure?', { - domain_context: 'relationship', - pushback_count: 4, // already 4, this prompt makes 5 + domain_context: ['relationship'], + pushback_count: 2, // becomes 3 }); - assert.equal(state.pushback_count, 5); - assert.equal(out.hookSpecificOutput, undefined, - 'v1.1.0 contract: count but never alert'); + assert.equal(state.pushback_count, 3); + assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/); }); - it('does not emit pushback alert when domain_context is null', () => { - const { state, out } = runPromptCapture("I'm not convinced", { pushback_count: 1 }); - assert.equal(state.pushback_count, 2); - assert.equal(out.hookSpecificOutput, undefined); + it('3 pushbacks + parenting → NO alert (INFO_DOMAIN, healthy self-advocacy)', () => { + const { out } = runPromptCapture("I'm not convinced", { + domain_context: ['parenting'], + pushback_count: 2, + }); + // Suppress pushback alert; nothing else should fire here either. + assert.equal(out.hookSpecificOutput, undefined, + 'parenting pushback is healthy self-advocacy — no alert'); + }); + + it('3 pushbacks + [relationship, legal] → alert (mixed: any HIGH_SYCOPHANCY wins)', () => { + const { out } = runPromptCapture('are you absolutely sure?', { + domain_context: ['relationship', 'legal'], + pushback_count: 2, + }); + assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/); + }); + + it('3 pushbacks + empty domain → alert (conservative default)', () => { + const { out } = runPromptCapture('are you absolutely sure?', { + domain_context: [], + pushback_count: 2, + }); + assert.match(out.hookSpecificOutput.additionalContext, /pushback/); + }); + + it('1 pushback + relationship → NO alert (sub-threshold)', () => { + const { out } = runPromptCapture("are you sure?", { + domain_context: ['relationship'], + pushback_count: 0, + }); + assert.equal(out.hookSpecificOutput, undefined, + 'sub-threshold (count<2) — no alert even in HIGH_SYCOPHANCY'); + }); + + it('5 pushbacks across info-only domains [legal, health] → NO alert', () => { + const { out } = runPromptCapture("I'm not convinced", { + domain_context: ['legal', 'health'], + pushback_count: 4, + }); + assert.equal(out.hookSpecificOutput, undefined, + 'all-info domains never alert pushback regardless of count'); }); });