feat(ai-psychosis): domain-stakes weighting on alert thresholds
This commit is contained in:
parent
c5e8f280d9
commit
c5e933b35d
3 changed files with 146 additions and 6 deletions
|
|
@ -409,6 +409,20 @@ function domainsIntersect(domains, set) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// v1.2: Stakes-matrix lookup. Returns the maximum weight across all domains
|
||||
// in the array (default 1.0 if empty or no known domain). Applied ONLY to
|
||||
// new v1.2 alerts (pushback in HIGH_SYCOPHANCY, valseek in HIGH_STAKES).
|
||||
// Existing v1.1.0 alert sensitivity is unchanged.
|
||||
function getDomainWeight(domains) {
|
||||
if (!Array.isArray(domains) || domains.length === 0) return DOMAIN_STAKES.default;
|
||||
let max = DOMAIN_STAKES.default;
|
||||
for (const d of domains) {
|
||||
const w = DOMAIN_STAKES[d];
|
||||
if (typeof w === 'number' && w > max) max = w;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
const stateDomains = Array.isArray(state.domain_context) ? state.domain_context : [];
|
||||
if (
|
||||
state.user_info_class === 'no'
|
||||
|
|
@ -428,9 +442,14 @@ if (
|
|||
const valseekCount = Number(state.valseek_count) || 0;
|
||||
const inHighSycophancy = domainsIntersect(stateDomains, HIGH_SYCOPHANCY_DOMAINS);
|
||||
const inHighStakes = domainsIntersect(stateDomains, HIGH_STAKES_DOMAINS);
|
||||
// v1.2: stakes-weighted threshold for valseek HIGH_STAKES path. Higher-weight
|
||||
// domains (legal/parenting/health/financial = 1.5) lower the effective threshold:
|
||||
// 3 / 1.5 = 2.0. Less weight (professional = 1.1) keeps it near the literal 3.
|
||||
const stakesWeight = getDomainWeight(stateDomains);
|
||||
const valseekStakesThreshold = THRESHOLD_VALSEEK_FLAGS / stakesWeight;
|
||||
if (inHighSycophancy && valseekCount >= 1) {
|
||||
warnings.push(`INTERACTION AWARENESS (validation-seeking): User is pressing for confirmation in a domain where AI validation can substitute for human reality-testing (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}). Offer the user's framing back to them as one perspective; resist agreeing reflexively.`);
|
||||
} else if (inHighStakes && valseekCount >= THRESHOLD_VALSEEK_FLAGS) {
|
||||
} else if (inHighStakes && valseekCount >= valseekStakesThreshold) {
|
||||
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
|
||||
}
|
||||
|
||||
|
|
@ -442,8 +461,15 @@ if (inHighSycophancy && valseekCount >= 1) {
|
|||
// professional pushback): pushback here is healthy self-advocacy — NO alert.
|
||||
// - Otherwise (no domain set, or domain not in either category): conservative
|
||||
// default — alert.
|
||||
// v1.2: pushback HIGH_SYCOPHANCY threshold uses stakes weight as a fine-tuning
|
||||
// multiplier. THRESHOLD_PUSHBACK_FLAGS=2; relationship weight 1.3 → 2/1.3 ≈ 1.54.
|
||||
// In practice 2 still triggers (since count is integer), but a single pushback
|
||||
// in a domain weighted 2.0+ would also trigger if such a domain existed.
|
||||
const newPushbackCount = Number(state.pushback_count) || 0;
|
||||
if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) {
|
||||
const pushbackEffectiveThreshold = inHighSycophancy
|
||||
? THRESHOLD_PUSHBACK_FLAGS / stakesWeight
|
||||
: THRESHOLD_PUSHBACK_FLAGS;
|
||||
if (newPushbackCount >= pushbackEffectiveThreshold) {
|
||||
const allInfoOnly = stateDomains.length > 0
|
||||
&& stateDomains.every(d => INFO_DOMAINS.includes(d));
|
||||
if (inHighSycophancy) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue