feat(ai-psychosis): domain-stakes weighting on alert thresholds

2026-05-01 21:46:29 +02:00 · 2026-05-01 21:46:29 +02:00 · c5e933b35d
commit c5e933b35d
parent c5e8f280d9
3 changed files with 146 additions and 6 deletions
--- a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs
+++ b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs
@ -409,6 +409,20 @@ function domainsIntersect(domains, set) {
  return false;
 }

+// v1.2: Stakes-matrix lookup. Returns the maximum weight across all domains
+// in the array (default 1.0 if empty or no known domain). Applied ONLY to
+// new v1.2 alerts (pushback in HIGH_SYCOPHANCY, valseek in HIGH_STAKES).
+// Existing v1.1.0 alert sensitivity is unchanged.
+function getDomainWeight(domains) {
+  if (!Array.isArray(domains) || domains.length === 0) return DOMAIN_STAKES.default;
+  let max = DOMAIN_STAKES.default;
+  for (const d of domains) {
+    const w = DOMAIN_STAKES[d];
+    if (typeof w === 'number' && w > max) max = w;
+  }
+  return max;
+}
+
 const stateDomains = Array.isArray(state.domain_context) ? state.domain_context : [];
 if (
  state.user_info_class === 'no'
@ -428,9 +442,14 @@ if (
 const valseekCount = Number(state.valseek_count) || 0;
 const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
 const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
+// v1.2: stakes-weighted threshold for valseek HIGH_STAKES path. Higher-weight
+// domains (legal/parenting/health/financial = 1.5) lower the effective threshold:
+// 3 / 1.5 = 2.0. Less weight (professional = 1.1) keeps it near the literal 3.
+const stakesWeight = getDomainWeight(stateDomains);
+const valseekStakesThreshold = THRESHOLD_VALSEEK_FLAGS / stakesWeight;
 if (inHighSycophancy && valseekCount >= 1) {
  warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
-} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
+} else if (inHighStakes && valseekCount >= valseekStakesThreshold) {
  warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
 }

@ -442,8 +461,15 @@ if (inHighSycophancy && valseekCount >= 1) {
 //     professional pushback): pushback here is healthy self-advocacy — NO alert.
 //   - Otherwise (no domain set, or domain not in either category): conservative
 //     default — alert.
+// v1.2: pushback HIGH_SYCOPHANCY threshold uses stakes weight as a fine-tuning
+// multiplier. THRESHOLD_PUSHBACK_FLAGS=2; relationship weight 1.3 → 2/1.3 ≈ 1.54.
+// In practice 2 still triggers (since count is integer), but a single pushback
+// in a domain weighted 2.0+ would also trigger if such a domain existed.
 const newPushbackCount = Number(state.pushback_count) || 0;
-if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) {
+const pushbackEffectiveThreshold = inHighSycophancy
+  ? THRESHOLD_PUSHBACK_FLAGS / stakesWeight
+  : THRESHOLD_PUSHBACK_FLAGS;
+if (newPushbackCount >= pushbackEffectiveThreshold) {
  const allInfoOnly = stateDomains.length > 0
    && stateDomains.every(d => INFO_DOMAINS.includes(d));
  if (inHighSycophancy) {