feat(ai-psychosis): add 12 pushback + 4 domain regex patterns + cross-check existing 25
This commit is contained in:
parent
080f2414ad
commit
6b7aee2bf1
2 changed files with 112 additions and 0 deletions
|
|
@ -79,6 +79,34 @@ const valPatterns = [
|
|||
/isn't\s+it/i,
|
||||
];
|
||||
|
||||
// Pushback patterns — REACTIVE tier (Anthropic-validated + academic-validated)
|
||||
// Source: research/01-pushback-self-advocacy.md
|
||||
const pbReactivePatterns = [
|
||||
/^are you sure\??/i, // validated-by: anthropic-april-2026 (questioning)
|
||||
/\bi'?m not convinced\b/i, // validated-by: anthropic-april-2026 (questioning)
|
||||
/\bthat doesn'?t (?:seem|feel) right\b/i, // validated-by: anthropic-april-2026 (questioning)
|
||||
/\bthat'?s not (?:quite )?what i meant\b/i, // validated-by: anthropic-april-2026 (clarifying)
|
||||
/\blet me add (?:some )?context\b/i, // validated-by: anthropic-april-2026 (clarifying)
|
||||
/\bactually,? (?:my situation|i)\b/i, // validated-by: anthropic-april-2026 (clarifying)
|
||||
/(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i, // validated-by: arxiv-2508.02087
|
||||
/\bi don'?t agree(?: with you)?\b/i, // validated-by: arxiv-2508.13743
|
||||
/\bare you absolutely sure\b/i, // validated-by: arxiv-2508.13743
|
||||
];
|
||||
// Pushback patterns — PREEMPTIVE tier (community-derived)
|
||||
const pbPreemptivePatterns = [
|
||||
/\bsteelman\b/i, // validated-by: community-multi-source-2025
|
||||
/\bplay (?:the )?devil'?s advocate\b/i, // validated-by: community-multi-source-2025
|
||||
/\bargue against (?:this|my)\b/i, // validated-by: community-multi-source-2025
|
||||
];
|
||||
// Domain-context: relationship — uses (?:my|our) prefix to avoid false positives
|
||||
// on technical "function relationship", "database relationship" etc.
|
||||
const domainRelationshipPatterns = [
|
||||
/\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i,
|
||||
/\bin our relationship\b/i,
|
||||
/\b(?:dating|breakup|divorce)\b/i,
|
||||
/\bromantic(?:ally)? (?:involved|interested)\b/i,
|
||||
];
|
||||
|
||||
for (const p of depPatterns) { if (p.test(prompt)) { depHit = 1; break; } }
|
||||
for (const p of escPatterns) { if (p.test(prompt)) { escHit = 1; break; } }
|
||||
for (const p of fatPatterns) { if (p.test(prompt)) { fatHit = 1; break; } }
|
||||
|
|
|
|||
|
|
@ -312,3 +312,87 @@ describe('thresholds and cooldowns', () => {
|
|||
assert.ok(out.hookSpecificOutput?.additionalContext?.includes('Validation-seeking pattern'));
|
||||
});
|
||||
});
|
||||
|
||||
// --- v1.1.0 pushback + domain regex (regex-only unit tests) ---
|
||||
// Local copies of patterns in hooks/scripts/prompt-analyzer.mjs.
|
||||
// Step 3 adds integration tests via runPrompt; integration tests catch
|
||||
// pattern divergence between source and tests.
|
||||
|
||||
const pbReactivePatterns = [
|
||||
/^are you sure\??/i,
|
||||
/\bi'?m not convinced\b/i,
|
||||
/\bthat doesn'?t (?:seem|feel) right\b/i,
|
||||
/\bthat'?s not (?:quite )?what i meant\b/i,
|
||||
/\blet me add (?:some )?context\b/i,
|
||||
/\bactually,? (?:my situation|i)\b/i,
|
||||
/(?:^|[.!?]\s+)i (?:believe|think) (?:you'?re|that'?s) wrong\b/i,
|
||||
/\bi don'?t agree(?: with you)?\b/i,
|
||||
/\bare you absolutely sure\b/i,
|
||||
];
|
||||
const pbPreemptivePatterns = [
|
||||
/\bsteelman\b/i,
|
||||
/\bplay (?:the )?devil'?s advocate\b/i,
|
||||
/\bargue against (?:this|my)\b/i,
|
||||
];
|
||||
const domainRelationshipPatterns = [
|
||||
/\b(?:my|our) (?:partner|spouse|wife|husband|girlfriend|boyfriend)\b/i,
|
||||
/\bin our relationship\b/i,
|
||||
/\b(?:dating|breakup|divorce)\b/i,
|
||||
/\bromantic(?:ally)? (?:involved|interested)\b/i,
|
||||
];
|
||||
|
||||
function matchesAny(patterns, text) {
|
||||
return patterns.some((p) => p.test(text));
|
||||
}
|
||||
|
||||
describe('pushback reactive patterns', () => {
|
||||
it('matches "are you sure?"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you sure?')));
|
||||
it('does not match "tell me what to do" (no pushback)', () => assert.equal(matchesAny(pbReactivePatterns, 'tell me what to do'), false));
|
||||
|
||||
it("matches \"i'm not convinced\"", () => assert.ok(matchesAny(pbReactivePatterns, "i'm not convinced this works")));
|
||||
it('does not match "i am convinced" (no negation)', () => assert.equal(matchesAny(pbReactivePatterns, 'i am convinced this works'), false));
|
||||
|
||||
it('matches "that doesn\'t seem right"', () => assert.ok(matchesAny(pbReactivePatterns, "that doesn't seem right to me")));
|
||||
it('does not match "that seems right" (positive sense)', () => assert.equal(matchesAny(pbReactivePatterns, 'that seems right to me'), false));
|
||||
|
||||
it('matches "that\'s not what I meant"', () => assert.ok(matchesAny(pbReactivePatterns, "that's not what I meant by that")));
|
||||
it('does not match "I meant exactly that"', () => assert.equal(matchesAny(pbReactivePatterns, 'I meant exactly that'), false));
|
||||
|
||||
it('matches "let me add context"', () => assert.ok(matchesAny(pbReactivePatterns, 'let me add context — the issue is X')));
|
||||
it('does not match "I added context to the function"', () => assert.equal(matchesAny(pbReactivePatterns, 'I added context to the function'), false));
|
||||
|
||||
it('matches "actually, my situation is different"', () => assert.ok(matchesAny(pbReactivePatterns, 'actually, my situation is different')));
|
||||
it('does not match "actually that approach works"', () => assert.equal(matchesAny(pbReactivePatterns, 'actually that approach works'), false));
|
||||
|
||||
it("matches \"I think you're wrong\"", () => assert.ok(matchesAny(pbReactivePatterns, "I think you're wrong about this")));
|
||||
it("does not match \"I think we're wrong\" (different pronoun)", () => assert.equal(matchesAny(pbReactivePatterns, "I think we're wrong here"), false));
|
||||
|
||||
it("matches \"I don't agree\"", () => assert.ok(matchesAny(pbReactivePatterns, "I don't agree with that conclusion")));
|
||||
it('does not match "I agree with you"', () => assert.equal(matchesAny(pbReactivePatterns, 'I agree with you fully'), false));
|
||||
|
||||
it('matches "are you absolutely sure"', () => assert.ok(matchesAny(pbReactivePatterns, 'are you absolutely sure about that')));
|
||||
it('does not match "we are sure of the answer" (no questioning frame)', () => assert.equal(matchesAny(pbReactivePatterns, 'we are sure of the answer'), false));
|
||||
});
|
||||
|
||||
describe('pushback preemptive patterns', () => {
|
||||
it('matches "steelman"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'please steelman this argument')));
|
||||
it('does not match "steel manufacturing" (no whole-word match)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'the steel manufacturing report'), false));
|
||||
|
||||
it("matches \"play devil's advocate\"", () => assert.ok(matchesAny(pbPreemptivePatterns, "can you play devil's advocate here")));
|
||||
it('does not match "play music" (different verb object)', () => assert.equal(matchesAny(pbPreemptivePatterns, 'play music while coding'), false));
|
||||
|
||||
it('matches "argue against this"', () => assert.ok(matchesAny(pbPreemptivePatterns, 'argue against this proposal')));
|
||||
it('does not match "they argue with each other"', () => assert.equal(matchesAny(pbPreemptivePatterns, 'they argue with each other'), false));
|
||||
});
|
||||
|
||||
describe('domain relationship patterns', () => {
|
||||
it('matches "my partner won\'t listen"', () => assert.ok(matchesAny(domainRelationshipPatterns, "my partner won't listen")));
|
||||
it('matches "in our relationship"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'in our relationship things changed')));
|
||||
it('matches "considering divorce"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'considering divorce after years')));
|
||||
it('matches "romantically involved"', () => assert.ok(matchesAny(domainRelationshipPatterns, 'we are romantically involved')));
|
||||
|
||||
it('does not match "function relationship between input and output" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'function relationship between input and output'), false));
|
||||
it('does not match "database relationship mapping" (technical false-positive)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'database relationship mapping'), false));
|
||||
it('does not match "the data is updating" (no dating word boundary)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'the data is updating in real time'), false));
|
||||
it('does not match "romantic comedy film" (no involved/interested suffix)', () => assert.equal(matchesAny(domainRelationshipPatterns, 'watching a romantic comedy film'), false));
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue