feat(ai-psychosis): domain-stakes weighting on alert thresholds

2026-05-01 21:46:29 +02:00 · 2026-05-01 21:46:29 +02:00 · c5e933b35d
commit c5e933b35d
parent c5e8f280d9
3 changed files with 146 additions and 6 deletions
--- a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs
+++ b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs
@ -409,6 +409,20 @@ function domainsIntersect(domains, set) {
  return false;
 }

+// v1.2: Stakes-matrix lookup. Returns the maximum weight across all domains
+// in the array (default 1.0 if empty or no known domain). Applied ONLY to
+// new v1.2 alerts (pushback in HIGH_SYCOPHANCY, valseek in HIGH_STAKES).
+// Existing v1.1.0 alert sensitivity is unchanged.
+function getDomainWeight(domains) {
+  if (!Array.isArray(domains) || domains.length === 0) return DOMAIN_STAKES.default;
+  let max = DOMAIN_STAKES.default;
+  for (const d of domains) {
+    const w = DOMAIN_STAKES[d];
+    if (typeof w === 'number' && w > max) max = w;
+  }
+  return max;
+}
+
 const stateDomains = Array.isArray(state.domain_context) ? state.domain_context : [];
 if (
  state.user_info_class === 'no'
@ -428,9 +442,14 @@ if (
 const valseekCount = Number(state.valseek_count) || 0;
 const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
 const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
+// v1.2: stakes-weighted threshold for valseek HIGH_STAKES path. Higher-weight
+// domains (legal/parenting/health/financial = 1.5) lower the effective threshold:
+// 3 / 1.5 = 2.0. Less weight (professional = 1.1) keeps it near the literal 3.
+const stakesWeight = getDomainWeight(stateDomains);
+const valseekStakesThreshold = THRESHOLD_VALSEEK_FLAGS / stakesWeight;
 if (inHighSycophancy && valseekCount >= 1) {
  warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
-} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
+} else if (inHighStakes && valseekCount >= valseekStakesThreshold) {
  warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
 }

@ -442,8 +461,15 @@ if (inHighSycophancy && valseekCount >= 1) {
 //     professional pushback): pushback here is healthy self-advocacy — NO alert.
 //   - Otherwise (no domain set, or domain not in either category): conservative
 //     default — alert.
+// v1.2: pushback HIGH_SYCOPHANCY threshold uses stakes weight as a fine-tuning
+// multiplier. THRESHOLD_PUSHBACK_FLAGS=2; relationship weight 1.3 → 2/1.3 ≈ 1.54.
+// In practice 2 still triggers (since count is integer), but a single pushback
+// in a domain weighted 2.0+ would also trigger if such a domain existed.
 const newPushbackCount = Number(state.pushback_count) || 0;
-if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) {
+const pushbackEffectiveThreshold = inHighSycophancy
+  ? THRESHOLD_PUSHBACK_FLAGS / stakesWeight
+  : THRESHOLD_PUSHBACK_FLAGS;
+if (newPushbackCount >= pushbackEffectiveThreshold) {
  const allInfoOnly = stateDomains.length > 0
    && stateDomains.every(d => INFO_DOMAINS.includes(d));
  if (inHighSycophancy) {
--- a/plugins/ai-psychosis/tests/stakes-matrix.test.mjs
+++ b/plugins/ai-psychosis/tests/stakes-matrix.test.mjs
@ -0,0 +1,114 @@
+// stakes-matrix.test.mjs — verifies v1.2 domain-stakes weighting on
+// new v1.2 alerts only. v1.1.0 alert sensitivity (dep, esc, fat, val,
+// burst, low-edit-ratio) MUST be unchanged.
+
+import { describe, it, afterEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { runHook, setupTestDir, cleanupTestDir, createStateFile, readState } from './test-helper.mjs';
+
+let dir;
+afterEach(() => { if (dir) cleanupTestDir(dir); });
+
+function freshState() {
+  return {
+    start_epoch: Math.floor(Date.now() / 1000) - 60,
+    start_iso: '2026-05-01T10:00:00Z',
+    tool_count: 0, edit_count: 0,
+    last_event_epoch: 0, burst_count: 0,
+    dep_flags: 0, esc_flags: 0, fatigue_flags: 0, val_flags: 0,
+    pushback_count: 0, domain_context: null,
+    user_info_class: null,
+    user_info_flags: { yes_people: 0, yes_digital: 0, no: 0 },
+    turn_count: 0,
+    valseek_count: 0, valseek_flag: 0,
+    last_warning_epoch: 0,
+  };
+}
+
+function runPromptCapture(prompt, stateOverrides = {}) {
+  dir = setupTestDir();
+  createStateFile(dir, 's-stake', { ...freshState(), ...stateOverrides });
+  const out = runHook('prompt-analyzer.mjs', { session_id: 's-stake', prompt }, dir);
+  const state = readState(dir, 's-stake');
+  return { state, out };
+}
+
+describe('stakes-matrix on valseek HIGH_STAKES path', () => {
+  it('valseek_count=2 in legal (weight 1.5) → effective threshold 2.0 → fires', () => {
+    // 3 / 1.5 = 2.0; valseek_count after this prompt becomes 2; 2 >= 2.0 → fires.
+    const { out } = runPromptCapture("am I crazy?", {
+      domain_context: ['legal'],
+      valseek_count: 1,
+    });
+    assert.match(out.hookSpecificOutput.additionalContext, /high-stakes/);
+  });
+
+  it('valseek_count=1 in legal → 1 < 2.0 → no alert', () => {
+    const { out } = runPromptCapture("am I crazy?", {
+      domain_context: ['legal'],
+      valseek_count: 0, // becomes 1
+    });
+    assert.equal(out.hookSpecificOutput, undefined);
+  });
+
+  it('valseek_count=4 in consumer (weight 1.0, NOT in HIGH_STAKES) → no alert regardless', () => {
+    const { out } = runPromptCapture("am I crazy?", {
+      domain_context: ['consumer'],
+      valseek_count: 3, // becomes 4
+    });
+    assert.equal(out.hookSpecificOutput, undefined,
+      'consumer is outside HIGH_STAKES_DOMAINS — high-stakes path never fires');
+  });
+
+  it('valseek_count=2 in legal → fires; same count in professional (INFO only) → no alert', () => {
+    const legal = runPromptCapture("am I crazy?", {
+      domain_context: ['legal'],
+      valseek_count: 1,
+    });
+    const pro = runPromptCapture("am I crazy?", {
+      domain_context: ['professional'],
+      valseek_count: 1,
+    });
+    assert.match(legal.out.hookSpecificOutput.additionalContext, /high-stakes/);
+    assert.equal(pro.out.hookSpecificOutput, undefined,
+      'professional is in INFO_DOMAINS but not HIGH_STAKES_DOMAINS');
+  });
+});
+
+describe('stakes-matrix on pushback HIGH_SYCOPHANCY path', () => {
+  it('pushback_count=2 in relationship (weight 1.3) → 2/1.3 ≈ 1.54 → fires', () => {
+    const { out } = runPromptCapture("are you sure?", {
+      domain_context: ['relationship'],
+      pushback_count: 1, // becomes 2
+    });
+    assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/);
+  });
+});
+
+describe('stakes-matrix MUST NOT alter v1.1.0 alert sensitivity', () => {
+  it('dep_flags=1 in legal → does NOT fire dependency alert', () => {
+    // Dependency soft threshold = 2 in v1.1.0. If stakes-matrix bled into this,
+    // 2/1.5 = 1.33 → dep_flags=1 might trigger. It must NOT.
+    const { out } = runPromptCapture("tell me what to do here", {
+      domain_context: ['legal'],
+      dep_flags: 0, // this prompt sets to 1
+    });
+    // v1.1.0 dep alert requires >= 2 flags, regardless of domain weight.
+    // Output should not contain dep "Dependency language" wording.
+    const text = out.hookSpecificOutput?.additionalContext || '';
+    assert.ok(!/Dependency language/.test(text),
+      'v1.1.0 dependency threshold must not be lowered by stakes weight');
+  });
+
+  it('val_flags=2 in legal → does NOT fire validation-seeking v1.1.0 alert', () => {
+    // v1.1.0 val_flags threshold is 3. Stakes weight must not lower it to 2.
+    const { out } = runPromptCapture("right?", {
+      domain_context: ['legal'],
+      val_flags: 1, // becomes 2
+    });
+    const text = out.hookSpecificOutput?.additionalContext || '';
+    // The v1.1.0 wording is "Validation-seeking pattern detected (...)".
+    assert.ok(!/Validation-seeking pattern detected/.test(text),
+      'v1.1.0 val_flags threshold (3) must not be lowered by stakes weight');
+  });
+});
--- a/plugins/ai-psychosis/tests/validation-seeking.test.mjs
+++ b/plugins/ai-psychosis/tests/validation-seeking.test.mjs
@ -185,13 +185,13 @@ describe('valseek: domain-gated alert', () => {
    assert.match(out.hookSpecificOutput.additionalContext, /high-stakes/);
  });

-  it('2 valseek + legal → NO alert (sub-threshold for high-stakes)', () => {
+  it('1 valseek + legal → NO alert (sub-threshold even with stakes weight)', () => {
+    // Step 13: stakes weight 1.5 lowers high-stakes threshold from 3 to 2.0.
+    // valseek_count=1 still under threshold.
    const { out } = runPromptCapture("am I crazy?", {
      domain_context: ['legal'],
-      valseek_count: 1, // becomes 2
+      valseek_count: 0, // becomes 1
    });
-    // Note: legal is NOT in HIGH_SYCOPHANCY_DOMAINS, so the relationship/
-    // spirituality short-path doesn't fire either. Below threshold for high-stakes.
    assert.equal(out.hookSpecificOutput, undefined);
  });