feat(ai-psychosis): pushback alert with domain-aware re-contextualization
This commit is contained in:
parent
12e6d3b5e4
commit
c5e8f280d9
2 changed files with 81 additions and 26 deletions
|
|
@ -434,6 +434,27 @@ if (inHighSycophancy && valseekCount >= 1) {
|
||||||
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
|
warnings.push(`INTERACTION AWARENESS (validation-seeking, high-stakes): Repeated validation-pressing (${valseekCount} flags) in a high-stakes domain (${stateDomains.filter(d => HIGH_STAKES_DOMAINS.includes(d)).join(', ')}). Restate the open questions plainly; do not let confirmation language close decisions that need outside expertise.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// v1.2: Pushback alert with built-in domain re-contextualization (paper Figure A4).
|
||||||
|
// v1.1.0 only counted; v1.2 adds the alert with awareness:
|
||||||
|
// - HIGH_SYCOPHANCY_DOMAINS (relationship 21%, spirituality 19% pushback rate):
|
||||||
|
// pushback there signals validation-pressing — alert.
|
||||||
|
// - INFO_DOMAINS (legal 94%, parenting 7.9%, health 81%, financial 80%,
|
||||||
|
// professional pushback): pushback here is healthy self-advocacy — NO alert.
|
||||||
|
// - Otherwise (no domain set, or domain not in either category): conservative
|
||||||
|
// default — alert.
|
||||||
|
const newPushbackCount = Number(state.pushback_count) || 0;
|
||||||
|
if (newPushbackCount >= THRESHOLD_PUSHBACK_FLAGS) {
|
||||||
|
const allInfoOnly = stateDomains.length > 0
|
||||||
|
&& stateDomains.every(d => INFO_DOMAINS.includes(d));
|
||||||
|
if (inHighSycophancy) {
|
||||||
|
warnings.push(`INTERACTION AWARENESS (pushback re-contextualization): Repeated pushback (${newPushbackCount}) in a high-sycophancy domain (${stateDomains.filter(d => HIGH_SYCOPHANCY_DOMAINS.includes(d)).join(', ')}) often signals pressing for validation, not factual disagreement. Hold your read; restate the user's frame back to them rather than adjusting your conclusion.`);
|
||||||
|
} else if (allInfoOnly) {
|
||||||
|
// Healthy self-advocacy in info-seeking domains — no alert.
|
||||||
|
} else {
|
||||||
|
warnings.push(`INTERACTION AWARENESS (pushback): User has pushed back ${newPushbackCount} times this session. Note whether the pushback is factual correction or pressure to agree; do not silently revise your read either way.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (warnings.length > 0) {
|
if (warnings.length > 0) {
|
||||||
// Fatigue bypasses cooldown
|
// Fatigue bypasses cooldown
|
||||||
if (fatHit === 1 || checkCooldown(COOLDOWN_SOFT)) {
|
if (fatHit === 1 || checkCooldown(COOLDOWN_SOFT)) {
|
||||||
|
|
|
||||||
|
|
@ -430,14 +430,16 @@ describe('pushback integration (state accumulation + same-invocation valence)',
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// --- Pushback count contract (v1.1.0 deferred-alert state) ---
|
// --- v1.2 pushback alert contract (domain-aware re-contextualization) ---
|
||||||
//
|
//
|
||||||
// Locks the v1.1.0 contract: pushback is COUNTED but NEVER alerted.
|
// Step 12 of v1.2.0 ADDS the pushback alert with domain awareness baked in.
|
||||||
// lib.mjs:131 confirms "tier-reduction logic is v1.2 scope". This test
|
// Replaces the v1.1.0 "count but never alert" contract test.
|
||||||
// is the regression baseline that Step 12 will rewrite when the alert is
|
//
|
||||||
// added with domain-aware re-contextualization. If this contract changes
|
// Behavior:
|
||||||
// without Step 12 being in flight, something has silently enabled
|
// - HIGH_SYCOPHANCY_DOMAINS (relationship, spirituality): alert at count >= 2
|
||||||
// pushback alerting.
|
// - INFO_DOMAINS (legal, parenting, health, financial, professional): NO alert
|
||||||
|
// — pushback in info-seeking domains is healthy self-advocacy.
|
||||||
|
// - Empty / unknown domain: conservative default alert.
|
||||||
|
|
||||||
function runPromptCapture(prompt, stateOverrides = {}) {
|
function runPromptCapture(prompt, stateOverrides = {}) {
|
||||||
dir = setupTestDir();
|
dir = setupTestDir();
|
||||||
|
|
@ -447,10 +449,10 @@ function runPromptCapture(prompt, stateOverrides = {}) {
|
||||||
return { state, out };
|
return { state, out };
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('pushback count contract (v1.1.0 deferred-alert state)', () => {
|
describe('pushback alert (v1.2 domain-aware contract)', () => {
|
||||||
it('accumulates pushback_count to 5 over 5 sequential pushback prompts', () => {
|
it('accumulates pushback_count over 5 sequential prompts', () => {
|
||||||
dir = setupTestDir();
|
dir = setupTestDir();
|
||||||
createStateFile(dir, 'p1', { ...freshState(), domain_context: 'relationship' });
|
createStateFile(dir, 'p1', { ...freshState(), domain_context: ['relationship'] });
|
||||||
const prompts = [
|
const prompts = [
|
||||||
'are you sure?',
|
'are you sure?',
|
||||||
"I'm not convinced",
|
"I'm not convinced",
|
||||||
|
|
@ -458,31 +460,63 @@ describe('pushback count contract (v1.1.0 deferred-alert state)', () => {
|
||||||
"actually, I think you're wrong",
|
"actually, I think you're wrong",
|
||||||
"are you absolutely sure?",
|
"are you absolutely sure?",
|
||||||
];
|
];
|
||||||
let lastOut;
|
|
||||||
for (const p of prompts) {
|
for (const p of prompts) {
|
||||||
lastOut = runHook('prompt-analyzer.mjs', { session_id: 'p1', prompt: p }, dir);
|
runHook('prompt-analyzer.mjs', { session_id: 'p1', prompt: p }, dir);
|
||||||
}
|
}
|
||||||
const s = readState(dir, 'p1');
|
const s = readState(dir, 'p1');
|
||||||
assert.equal(s.pushback_count, 5, 'pushback_count should accumulate across calls');
|
assert.equal(s.pushback_count, 5, 'count accumulates across calls');
|
||||||
// Final hook call output: continue: true, no hookSpecificOutput.
|
|
||||||
assert.equal(lastOut.continue, true);
|
|
||||||
assert.equal(lastOut.hookSpecificOutput, undefined,
|
|
||||||
'v1.1.0 must not emit pushback alert via hookSpecificOutput');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does not emit pushback alert in relationship domain even at high count', () => {
|
it('3 pushbacks + relationship → alert (HIGH_SYCOPHANCY)', () => {
|
||||||
const { state, out } = runPromptCapture('are you absolutely sure?', {
|
const { state, out } = runPromptCapture('are you absolutely sure?', {
|
||||||
domain_context: 'relationship',
|
domain_context: ['relationship'],
|
||||||
pushback_count: 4, // already 4, this prompt makes 5
|
pushback_count: 2, // becomes 3
|
||||||
});
|
});
|
||||||
assert.equal(state.pushback_count, 5);
|
assert.equal(state.pushback_count, 3);
|
||||||
assert.equal(out.hookSpecificOutput, undefined,
|
assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/);
|
||||||
'v1.1.0 contract: count but never alert');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does not emit pushback alert when domain_context is null', () => {
|
it('3 pushbacks + parenting → NO alert (INFO_DOMAIN, healthy self-advocacy)', () => {
|
||||||
const { state, out } = runPromptCapture("I'm not convinced", { pushback_count: 1 });
|
const { out } = runPromptCapture("I'm not convinced", {
|
||||||
assert.equal(state.pushback_count, 2);
|
domain_context: ['parenting'],
|
||||||
assert.equal(out.hookSpecificOutput, undefined);
|
pushback_count: 2,
|
||||||
|
});
|
||||||
|
// Suppress pushback alert; nothing else should fire here either.
|
||||||
|
assert.equal(out.hookSpecificOutput, undefined,
|
||||||
|
'parenting pushback is healthy self-advocacy — no alert');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('3 pushbacks + [relationship, legal] → alert (mixed: any HIGH_SYCOPHANCY wins)', () => {
|
||||||
|
const { out } = runPromptCapture('are you absolutely sure?', {
|
||||||
|
domain_context: ['relationship', 'legal'],
|
||||||
|
pushback_count: 2,
|
||||||
|
});
|
||||||
|
assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('3 pushbacks + empty domain → alert (conservative default)', () => {
|
||||||
|
const { out } = runPromptCapture('are you absolutely sure?', {
|
||||||
|
domain_context: [],
|
||||||
|
pushback_count: 2,
|
||||||
|
});
|
||||||
|
assert.match(out.hookSpecificOutput.additionalContext, /pushback/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('1 pushback + relationship → NO alert (sub-threshold)', () => {
|
||||||
|
const { out } = runPromptCapture("are you sure?", {
|
||||||
|
domain_context: ['relationship'],
|
||||||
|
pushback_count: 0,
|
||||||
|
});
|
||||||
|
assert.equal(out.hookSpecificOutput, undefined,
|
||||||
|
'sub-threshold (count<2) — no alert even in HIGH_SYCOPHANCY');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('5 pushbacks across info-only domains [legal, health] → NO alert', () => {
|
||||||
|
const { out } = runPromptCapture("I'm not convinced", {
|
||||||
|
domain_context: ['legal', 'health'],
|
||||||
|
pushback_count: 4,
|
||||||
|
});
|
||||||
|
assert.equal(out.hookSpecificOutput, undefined,
|
||||||
|
'all-info domains never alert pushback regardless of count');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue