feat(ai-psychosis): domain-stakes weighting on alert thresholds

This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 21:46:29 +02:00
commit c5e933b35d
3 changed files with 146 additions and 6 deletions

View file

@ -409,6 +409,20 @@ function domainsIntersect(domains, set) {
return false;
}
// v1.2: Stakes-matrix lookup. Returns the maximum weight across all domains
// in the array (default 1.0 if empty or no known domain). Applied ONLY to
// new v1.2 alerts (pushback in HIGH_SYCOPHANCY, valseek in HIGH_STAKES).
// Existing v1.1.0 alert sensitivity is unchanged.
function getDomainWeight(domains) {
if (!Array.isArray(domains) || domains.length === 0) return DOMAIN_STAKES.default;
let max = DOMAIN_STAKES.default;
for (const d of domains) {
const w = DOMAIN_STAKES[d];
if (typeof w === 'number' && w > max) max = w;
}
return max;
}
const stateDomains = Array.isArray(state.domain_context) ? state.domain_context : [];
if (
state.user_info_class === 'no'
@ -428,9 +442,14 @@ if (
const valseekCount = Number(state.valseek_count) || 0;
const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
// v1.2: stakes-weighted threshold for valseek HIGH_STAKES path. Higher-weight
// domains (legal/parenting/health/financial = 1.5) lower the effective threshold:
// 3 / 1.5 = 2.0. Less weight (professional = 1.1) keeps it near the literal 3.
const stakesWeight = getDomainWeight(stateDomains);
const valseekStakesThreshold = THRESHOLD_VALSEEK_FLAGS / stakesWeight;
if (inHighSycophancy && valseekCount >= 1) {
warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
} else if (inHighStakes && valseekCount >= valseekStakesThreshold) {
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
}
@ -442,8 +461,15 @@ if (inHighSycophancy && valseekCount >= 1) {
// professional pushback): pushback here is healthy self-advocacy — NO alert.
// - Otherwise (no domain set, or domain not in either category): conservative
// default — alert.
// v1.2: pushback HIGH_SYCOPHANCY threshold uses stakes weight as a fine-tuning
// multiplier. THRESHOLD_PUSHBACK_FLAGS=2; relationship weight 1.3 → 2/1.3 ≈ 1.54.
// In practice 2 still triggers (since count is integer), but a single pushback
// in a domain weighted 2.0+ would also trigger if such a domain existed.
const newPushbackCount = Number(state.pushback_count) || 0;
if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) {
const pushbackEffectiveThreshold = inHighSycophancy
? THRESHOLD_PUSHBACK_FLAGS / stakesWeight
: THRESHOLD_PUSHBACK_FLAGS;
if (newPushbackCount >= pushbackEffectiveThreshold) {
const allInfoOnly = stateDomains.length > 0
&& stateDomains.every(d => INFO_DOMAINS.includes(d));
if (inHighSycophancy) {