114 lines
4.5 KiB
JavaScript
114 lines
4.5 KiB
JavaScript
// stakes-matrix.test.mjs — verifies v1.2 domain-stakes weighting on
|
|
// new v1.2 alerts only. v1.1.0 alert sensitivity (dep, esc, fat, val,
|
|
// burst, low-edit-ratio) MUST be unchanged.
|
|
|
|
import { describe, it, afterEach } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { runHook, setupTestDir, cleanupTestDir, createStateFile, readState } from './test-helper.mjs';
|
|
|
|
let dir;
|
|
afterEach(() => { if (dir) cleanupTestDir(dir); });
|
|
|
|
function freshState() {
|
|
return {
|
|
start_epoch: Math.floor(Date.now() / 1000) - 60,
|
|
start_iso: '2026-05-01T10:00:00Z',
|
|
tool_count: 0, edit_count: 0,
|
|
last_event_epoch: 0, burst_count: 0,
|
|
dep_flags: 0, esc_flags: 0, fatigue_flags: 0, val_flags: 0,
|
|
pushback_count: 0, domain_context: null,
|
|
user_info_class: null,
|
|
user_info_flags: { yes_people: 0, yes_digital: 0, no: 0 },
|
|
turn_count: 0,
|
|
valseek_count: 0, valseek_flag: 0,
|
|
last_warning_epoch: 0,
|
|
};
|
|
}
|
|
|
|
function runPromptCapture(prompt, stateOverrides = {}) {
|
|
dir = setupTestDir();
|
|
createStateFile(dir, 's-stake', { ...freshState(), ...stateOverrides });
|
|
const out = runHook('prompt-analyzer.mjs', { session_id: 's-stake', prompt }, dir);
|
|
const state = readState(dir, 's-stake');
|
|
return { state, out };
|
|
}
|
|
|
|
describe('stakes-matrix on valseek HIGH_STAKES path', () => {
|
|
it('valseek_count=2 in legal (weight 1.5) → effective threshold 2.0 → fires', () => {
|
|
// 3 / 1.5 = 2.0; valseek_count after this prompt becomes 2; 2 >= 2.0 → fires.
|
|
const { out } = runPromptCapture("am I crazy?", {
|
|
domain_context: ['legal'],
|
|
valseek_count: 1,
|
|
});
|
|
assert.match(out.hookSpecificOutput.additionalContext, /high-stakes/);
|
|
});
|
|
|
|
it('valseek_count=1 in legal → 1 < 2.0 → no alert', () => {
|
|
const { out } = runPromptCapture("am I crazy?", {
|
|
domain_context: ['legal'],
|
|
valseek_count: 0, // becomes 1
|
|
});
|
|
assert.equal(out.hookSpecificOutput, undefined);
|
|
});
|
|
|
|
it('valseek_count=4 in consumer (weight 1.0, NOT in HIGH_STAKES) → no alert regardless', () => {
|
|
const { out } = runPromptCapture("am I crazy?", {
|
|
domain_context: ['consumer'],
|
|
valseek_count: 3, // becomes 4
|
|
});
|
|
assert.equal(out.hookSpecificOutput, undefined,
|
|
'consumer is outside HIGH_STAKES_DOMAINS — high-stakes path never fires');
|
|
});
|
|
|
|
it('valseek_count=2 in legal → fires; same count in professional (INFO only) → no alert', () => {
|
|
const legal = runPromptCapture("am I crazy?", {
|
|
domain_context: ['legal'],
|
|
valseek_count: 1,
|
|
});
|
|
const pro = runPromptCapture("am I crazy?", {
|
|
domain_context: ['professional'],
|
|
valseek_count: 1,
|
|
});
|
|
assert.match(legal.out.hookSpecificOutput.additionalContext, /high-stakes/);
|
|
assert.equal(pro.out.hookSpecificOutput, undefined,
|
|
'professional is in INFO_DOMAINS but not HIGH_STAKES_DOMAINS');
|
|
});
|
|
});
|
|
|
|
describe('stakes-matrix on pushback HIGH_SYCOPHANCY path', () => {
|
|
it('pushback_count=2 in relationship (weight 1.3) → 2/1.3 ≈ 1.54 → fires', () => {
|
|
const { out } = runPromptCapture("are you sure?", {
|
|
domain_context: ['relationship'],
|
|
pushback_count: 1, // becomes 2
|
|
});
|
|
assert.match(out.hookSpecificOutput.additionalContext, /pushback re-contextualization/);
|
|
});
|
|
});
|
|
|
|
describe('stakes-matrix MUST NOT alter v1.1.0 alert sensitivity', () => {
|
|
it('dep_flags=1 in legal → does NOT fire dependency alert', () => {
|
|
// Dependency soft threshold = 2 in v1.1.0. If stakes-matrix bled into this,
|
|
// 2/1.5 = 1.33 → dep_flags=1 might trigger. It must NOT.
|
|
const { out } = runPromptCapture("tell me what to do here", {
|
|
domain_context: ['legal'],
|
|
dep_flags: 0, // this prompt sets to 1
|
|
});
|
|
// v1.1.0 dep alert requires >= 2 flags, regardless of domain weight.
|
|
// Output should not contain dep "Dependency language" wording.
|
|
const text = out.hookSpecificOutput?.additionalContext || '';
|
|
assert.ok(!/Dependency language/.test(text),
|
|
'v1.1.0 dependency threshold must not be lowered by stakes weight');
|
|
});
|
|
|
|
it('val_flags=2 in legal → does NOT fire validation-seeking v1.1.0 alert', () => {
|
|
// v1.1.0 val_flags threshold is 3. Stakes weight must not lower it to 2.
|
|
const { out } = runPromptCapture("right?", {
|
|
domain_context: ['legal'],
|
|
val_flags: 1, // becomes 2
|
|
});
|
|
const text = out.hookSpecificOutput?.additionalContext || '';
|
|
// The v1.1.0 wording is "Validation-seeking pattern detected (...)".
|
|
assert.ok(!/Validation-seeking pattern detected/.test(text),
|
|
'v1.1.0 val_flags threshold (3) must not be lowered by stakes weight');
|
|
});
|
|
});
|