feat(ai-psychosis): add 12 pushback + 4 domain regex patterns + cross-check existing 25

This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 17:10:44 +02:00
commit 6b7aee2bf1
2 changed files with 112 additions and 0 deletions

View file

@ -79,6 +79,34 @@ const valPatterns = [
/isn't\s+it/i,
];
// Pushback patterns — REACTIVE tier (Anthropic-validated + academic-validated)
// Source: research/01-pushback-self-advocacy.md
const pbReactivePatterns = [
/^are you sure\??/i, // validated-by: anthropic-april-2026 (questioning)
/\bi'?m not convinced\b/i, // validated-by: anthropic-april-2026 (questioning)
/\bthat doesn'?t (?:seem|feel) right\b/i, // validated-by: anthropic-april-2026 (questioning)
/\bthat'?s not (?:quite )?what i meant\b/i, // validated-by: anthropic-april-2026 (clarifying)
/\blet me add (?:some )?context\b/i, // validated-by: anthropic-april-2026 (clarifying)
/\bactually,? (?:my situation|i)\b/i, // validated-by: anthropic-april-2026 (clarifying)
/(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i, // validated-by: arxiv-2508.02087
/\bi don'?t agree(?: with you)?\b/i, // validated-by: arxiv-2508.13743
/\bare you absolutely sure\b/i, // validated-by: arxiv-2508.13743
];
// Pushback patterns — PREEMPTIVE tier (community-derived)
const pbPreemptivePatterns = [
/\bsteelman\b/i, // validated-by: community-multi-source-2025
/\bplay (?:the )?devil'?s advocate\b/i, // validated-by: community-multi-source-2025
/\bargue against (?:this|my)\b/i, // validated-by: community-multi-source-2025
];
// Domain-context: relationship — uses (?:my|our) prefix to avoid false positives
// on technical "function relationship", "database relationship" etc.
const domainRelationshipPatterns = [
/\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i,
/\bin our relationship\b/i,
/\b(?:dating|breakup|divorce)\b/i,
/\bromantic(?:ally)? (?:involved|interested)\b/i,
];
for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } }
for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } }
for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } }

View file

@ -312,3 +312,87 @@ describe('thresholds and cooldowns', () => {
assert.ok(out.hookSpecificOutput?.additionalContext?.includes('Validation-seeking pattern'));
});
});
// --- v1.1.0 pushback + domain regex (regex-only unit tests) ---
// Local copies of patterns in hooks/scripts/prompt-analyzer.mjs.
// Step 3 adds integration tests via runPrompt; integration tests catch
// pattern divergence between source and tests.
const pbReactivePatterns = [
/^are you sure\??/i,
/\bi'?m not convinced\b/i,
/\bthat doesn'?t (?:seem|feel) right\b/i,
/\bthat'?s not (?:quite )?what i meant\b/i,
/\blet me add (?:some )?context\b/i,
/\bactually,? (?:my situation|i)\b/i,
/(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i,
/\bi don'?t agree(?: with you)?\b/i,
/\bare you absolutely sure\b/i,
];
const pbPreemptivePatterns = [
/\bsteelman\b/i,
/\bplay (?:the )?devil'?s advocate\b/i,
/\bargue against (?:this|my)\b/i,
];
const domainRelationshipPatterns = [
/\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i,
/\bin our relationship\b/i,
/\b(?:dating|breakup|divorce)\b/i,
/\bromantic(?:ally)? (?:involved|interested)\b/i,
];
function matchesAny(patterns, text) {
return patterns.some((p) => p.test(text));
}
describe('pushback reactive patterns', () => {
it('matches "are you sure?"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you sure?')));
it('does not match "tell me what to do" (no pushback)', () => assert.equal(matchesAny(pbReactivePatterns, 'tell me what to do'), false));
it("matches \"i'm not convinced\"", () => assert.ok(matchesAny(pbReactivePatterns, "i'm not convinced this works")));
it('does not match "i am convinced" (no negation)', () => assert.equal(matchesAny(pbReactivePatterns, 'i am convinced this works'), false));
it('matches "that doesn\'t seem right"', () => assert.ok(matchesAny(pbReactivePatterns, "that doesn't seem right to me")));
it('does not match "that seems right" (positive sense)', () => assert.equal(matchesAny(pbReactivePatterns, 'that seems right to me'), false));
it('matches "that\'s not what I meant"', () => assert.ok(matchesAny(pbReactivePatterns, "that's not what I meant by that")));
it('does not match "I meant exactly that"', () => assert.equal(matchesAny(pbReactivePatterns, 'I meant exactly that'), false));
it('matches "let me add context"', () => assert.ok(matchesAny(pbReactivePatterns, 'let me add context — the issue is X')));
it('does not match "I added context to the function"', () => assert.equal(matchesAny(pbReactivePatterns, 'I added context to the function'), false));
it('matches "actually, my situation is different"', () => assert.ok(matchesAny(pbReactivePatterns, 'actually, my situation is different')));
it('does not match "actually that approach works"', () => assert.equal(matchesAny(pbReactivePatterns, 'actually that approach works'), false));
it("matches \"I think you're wrong\"", () => assert.ok(matchesAny(pbReactivePatterns, "I think you're wrong about this")));
it("does not match \"I think we're wrong\" (different pronoun)", () => assert.equal(matchesAny(pbReactivePatterns, "I think we're wrong here"), false));
it("matches \"I don't agree\"", () => assert.ok(matchesAny(pbReactivePatterns, "I don't agree with that conclusion")));
it('does not match "I agree with you"', () => assert.equal(matchesAny(pbReactivePatterns, 'I agree with you fully'), false));
it('matches "are you absolutely sure"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you absolutely sure about that')));
it('does not match "we are sure of the answer" (no questioning frame)', () => assert.equal(matchesAny(pbReactivePatterns, 'we are sure of the answer'), false));
});
describe('pushback preemptive patterns', () => {
it('matches "steelman"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'please steelman this argument')));
it('does not match "steel manufacturing" (no whole-word match)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'the steel manufacturing report'), false));
it("matches \"play devil's advocate\"", () => assert.ok(matchesAny(pbPreemptivePatterns, "can you play devil's advocate here")));
it('does not match "play music" (different verb object)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'play music while coding'), false));
it('matches "argue against this"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'argue against this proposal')));
it('does not match "they argue with each other"', () => assert.equal(matchesAny(pbPreemptivePatterns, 'they argue with each other'), false));
});
describe('domain relationship patterns', () => {
it('matches "my partner won\'t listen"', () => assert.ok(matchesAny(domainRelationshipPatterns, "my partner won't listen")));
it('matches "in our relationship"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'in our relationship things changed')));
it('matches "considering divorce"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'considering divorce after years')));
it('matches "romantically involved"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'we are romantically involved')));
it('does not match "function relationship between input and output" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'function relationship between input and output'), false));
it('does not match "database relationship mapping" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'database relationship mapping'), false));
it('does not match "the data is updating" (no dating word boundary)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'the data is updating in real time'), false));
it('does not match "romantic comedy film" (no involved/interested suffix)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'watching a romantic comedy film'), false));
});