From 6b7aee2bf1a96b839068b73ab40a865a1b5f9c3c Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 17:10:44 +0200 Subject: [PATCH] feat(ai-psychosis): add 12 pushback + 4 domain regex patterns + cross-check existing 25 --- .../hooks/scripts/prompt-analyzer.mjs | 28 +++++++ .../tests/prompt-analyzer.test.mjs | 84 +++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs index f7005bc..eb510d5 100644 --- a/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs +++ b/plugins/ai-psychosis/hooks/scripts/prompt-analyzer.mjs @@ -79,6 +79,34 @@ const valPatterns = [ /isn't\s+it/i, ]; +// Pushback patterns — REACTIVE tier (Anthropic-validated + academic-validated) +// Source: research/01-pushback-self-advocacy.md +const pbReactivePatterns = [ + /^are you sure\??/i, // validated-by: anthropic-april-2026 (questioning) + /\bi'?m not convinced\b/i, // validated-by: anthropic-april-2026 (questioning) + /\bthat doesn'?t (?:seem|feel) right\b/i, // validated-by: anthropic-april-2026 (questioning) + /\bthat'?s not (?:quite )?what i meant\b/i, // validated-by: anthropic-april-2026 (clarifying) + /\blet me add (?:some )?context\b/i, // validated-by: anthropic-april-2026 (clarifying) + /\bactually,? (?:my situation|i)\b/i, // validated-by: anthropic-april-2026 (clarifying) + /(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i, // validated-by: arxiv-2508.02087 + /\bi don'?t agree(?: with you)?\b/i, // validated-by: arxiv-2508.13743 + /\bare you absolutely sure\b/i, // validated-by: arxiv-2508.13743 +]; +// Pushback patterns — PREEMPTIVE tier (community-derived) +const pbPreemptivePatterns = [ + /\bsteelman\b/i, // validated-by: community-multi-source-2025 + /\bplay (?:the )?devil'?s advocate\b/i, // validated-by: community-multi-source-2025 + /\bargue against (?:this|my)\b/i, // validated-by: community-multi-source-2025 +]; +// Domain-context: relationship — uses (?:my|our) prefix to avoid false positives +// on technical "function relationship", "database relationship" etc. +const domainRelationshipPatterns = [ + /\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i, + /\bin our relationship\b/i, + /\b(?:dating|breakup|divorce)\b/i, + /\bromantic(?:ally)? (?:involved|interested)\b/i, +]; + for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } } for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } } for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } } diff --git a/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs b/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs index 35c1fbb..60b7fef 100644 --- a/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs +++ b/plugins/ai-psychosis/tests/prompt-analyzer.test.mjs @@ -312,3 +312,87 @@ describe('thresholds and cooldowns', () => { assert.ok(out.hookSpecificOutput?.additionalContext?.includes('Validation-seeking pattern')); }); }); + +// --- v1.1.0 pushback + domain regex (regex-only unit tests) --- +// Local copies of patterns in hooks/scripts/prompt-analyzer.mjs. +// Step 3 adds integration tests via runPrompt; integration tests catch +// pattern divergence between source and tests. + +const pbReactivePatterns = [ + /^are you sure\??/i, + /\bi'?m not convinced\b/i, + /\bthat doesn'?t (?:seem|feel) right\b/i, + /\bthat'?s not (?:quite )?what i meant\b/i, + /\blet me add (?:some )?context\b/i, + /\bactually,? (?:my situation|i)\b/i, + /(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i, + /\bi don'?t agree(?: with you)?\b/i, + /\bare you absolutely sure\b/i, +]; +const pbPreemptivePatterns = [ + /\bsteelman\b/i, + /\bplay (?:the )?devil'?s advocate\b/i, + /\bargue against (?:this|my)\b/i, +]; +const domainRelationshipPatterns = [ + /\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i, + /\bin our relationship\b/i, + /\b(?:dating|breakup|divorce)\b/i, + /\bromantic(?:ally)? (?:involved|interested)\b/i, +]; + +function matchesAny(patterns, text) { + return patterns.some((p) => p.test(text)); +} + +describe('pushback reactive patterns', () => { + it('matches "are you sure?"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you sure?'))); + it('does not match "tell me what to do" (no pushback)', () => assert.equal(matchesAny(pbReactivePatterns, 'tell me what to do'), false)); + + it("matches \"i'm not convinced\"", () => assert.ok(matchesAny(pbReactivePatterns, "i'm not convinced this works"))); + it('does not match "i am convinced" (no negation)', () => assert.equal(matchesAny(pbReactivePatterns, 'i am convinced this works'), false)); + + it('matches "that doesn\'t seem right"', () => assert.ok(matchesAny(pbReactivePatterns, "that doesn't seem right to me"))); + it('does not match "that seems right" (positive sense)', () => assert.equal(matchesAny(pbReactivePatterns, 'that seems right to me'), false)); + + it('matches "that\'s not what I meant"', () => assert.ok(matchesAny(pbReactivePatterns, "that's not what I meant by that"))); + it('does not match "I meant exactly that"', () => assert.equal(matchesAny(pbReactivePatterns, 'I meant exactly that'), false)); + + it('matches "let me add context"', () => assert.ok(matchesAny(pbReactivePatterns, 'let me add context — the issue is X'))); + it('does not match "I added context to the function"', () => assert.equal(matchesAny(pbReactivePatterns, 'I added context to the function'), false)); + + it('matches "actually, my situation is different"', () => assert.ok(matchesAny(pbReactivePatterns, 'actually, my situation is different'))); + it('does not match "actually that approach works"', () => assert.equal(matchesAny(pbReactivePatterns, 'actually that approach works'), false)); + + it("matches \"I think you're wrong\"", () => assert.ok(matchesAny(pbReactivePatterns, "I think you're wrong about this"))); + it("does not match \"I think we're wrong\" (different pronoun)", () => assert.equal(matchesAny(pbReactivePatterns, "I think we're wrong here"), false)); + + it("matches \"I don't agree\"", () => assert.ok(matchesAny(pbReactivePatterns, "I don't agree with that conclusion"))); + it('does not match "I agree with you"', () => assert.equal(matchesAny(pbReactivePatterns, 'I agree with you fully'), false)); + + it('matches "are you absolutely sure"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you absolutely sure about that'))); + it('does not match "we are sure of the answer" (no questioning frame)', () => assert.equal(matchesAny(pbReactivePatterns, 'we are sure of the answer'), false)); +}); + +describe('pushback preemptive patterns', () => { + it('matches "steelman"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'please steelman this argument'))); + it('does not match "steel manufacturing" (no whole-word match)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'the steel manufacturing report'), false)); + + it("matches \"play devil's advocate\"", () => assert.ok(matchesAny(pbPreemptivePatterns, "can you play devil's advocate here"))); + it('does not match "play music" (different verb object)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'play music while coding'), false)); + + it('matches "argue against this"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'argue against this proposal'))); + it('does not match "they argue with each other"', () => assert.equal(matchesAny(pbPreemptivePatterns, 'they argue with each other'), false)); +}); + +describe('domain relationship patterns', () => { + it('matches "my partner won\'t listen"', () => assert.ok(matchesAny(domainRelationshipPatterns, "my partner won't listen"))); + it('matches "in our relationship"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'in our relationship things changed'))); + it('matches "considering divorce"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'considering divorce after years'))); + it('matches "romantically involved"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'we are romantically involved'))); + + it('does not match "function relationship between input and output" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'function relationship between input and output'), false)); + it('does not match "database relationship mapping" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'database relationship mapping'), false)); + it('does not match "the data is updating" (no dating word boundary)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'the data is updating in real time'), false)); + it('does not match "romantic comedy film" (no involved/interested suffix)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'watching a romantic comedy film'), false)); +});