From 3041c90115cf152b8ec767b14e917b06c096c216 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 17:41:30 +0200 Subject: [PATCH] feat(ai-psychosis): /interaction-report adds pushback metrics + reader script --- .../commands/interaction-report.md | 83 +++++++++++++++-- .../hooks/scripts/report-reader.mjs | 83 +++++++++++++++++ .../tests/interaction-report.test.mjs | 90 +++++++++++++++++++ 3 files changed, 251 insertions(+), 5 deletions(-) create mode 100644 plugins/ai-psychosis/hooks/scripts/report-reader.mjs create mode 100644 plugins/ai-psychosis/tests/interaction-report.test.mjs diff --git a/plugins/ai-psychosis/commands/interaction-report.md b/plugins/ai-psychosis/commands/interaction-report.md index 9da5002..cec5926 100644 --- a/plugins/ai-psychosis/commands/interaction-report.md +++ b/plugins/ai-psychosis/commands/interaction-report.md @@ -108,11 +108,15 @@ The file contains two record types interleaved: {"session_id":"abc","start":"2026-04-05T10:00:00Z","hour":10,"is_late_night":false} ``` -**End records** — have `end`, `duration_min`, `tool_count`, `edit_count`, `flags`: +**End records** — have `end`, `duration_min`, `tool_count`, `edit_count`, `flags`, +and (v1.1.0+) `domain_context` at top level plus `pushback` inside `flags`: ```json -{"session_id":"abc","start":"2026-04-05T10:00:00Z","end":"2026-04-05T11:35:00Z","duration_min":95,"tool_count":47,"edit_count":12,"flags":{"dependency":2,"escalation":0,"fatigue":1,"validation":1}} +{"session_id":"abc","start":"2026-04-05T10:00:00Z","end":"2026-04-05T11:35:00Z","duration_min":95,"tool_count":47,"edit_count":12,"domain_context":"relationship","flags":{"dependency":2,"escalation":0,"fatigue":1,"validation":1,"pushback":3}} ``` +Records produced by v1.0.0 omit `domain_context` and `flags.pushback`. +Treat missing values as `null` / `0` — never as `NaN`. + **Error records** — have `note: "no_state_file"`. Ignore these. ### Filtering @@ -131,13 +135,31 @@ Filter events where `ts` >= cutoff date string. Group by `tool_name` and count. ## Step 6 — Compute statistics -From **end records**: +For session-level aggregates, do NOT recompute totals in the LLM. Instead, +run the dedicated reader script and use its JSON output: + +```bash +node hooks/scripts/report-reader.mjs ${CLAUDE_PLUGIN_DATA}/sessions.jsonl +``` + +The script outputs a JSON object with the following fields: +- `pushback_total` — sum of `flags.pushback` across all end records +- `relationship_domain_count` — count of records where `domain_context === 'relationship'` +- `null_domain_count`, `other_domain_count` — remaining domain buckets +- `total_end_records` — number of complete sessions +- `flags_total` — totals for dependency / escalation / fatigue / validation / pushback +- `schema_version.v1_0_records` / `v1_1_records` — backward-compat counters + +Use these values directly. The reader handles backward-compatibility with +v1.0.0 records (missing `pushback` / `domain_context`) and never produces NaN. + +In addition, derive these from the JSONL records you read in Step 4: - Total sessions (count of end records in period) - Average session duration (`sum(duration_min) / count`) - Total tool calls (`sum(tool_count)`) - Average edit ratio (`sum(edit_count) / sum(tool_count) * 100`, as percentage) -- Flag totals: `sum(flags.dependency)`, `sum(flags.escalation)`, `sum(flags.fatigue)`, `sum(flags.validation)` -- Average flags per session for each category +- Average flags per session per category (use `flags_total` from the reader, + divided by `total_end_records`) From **start records**: - Late-night sessions: count where `is_late_night` is true @@ -185,6 +207,46 @@ Output the report as markdown. Use this exact structure: | Fatigue signals | {N} | {avg} | | Validation-seeking | {N} | {avg} | +### Pushback (protective signal) + +| Metric | Value | +|--------|-------| +| Total pushback events | {N} | +| Per session | {avg} | +| Sessions with at least one pushback | {N} of {total} | + +User pushback is reported as a *protective signal*, not a problem. Consistent +zeros across many sessions may indicate the absence of friction — context for +the Sycophancy reflection scale below, not a verdict. + +### Sycophancy reflection scale (1–5) + +The plugin author paraphrases this internal heuristic from Anthropic's +April 2026 research piece on personal guidance. It is not a verbatim metric +from any Anthropic publication. + +| Level | Description | +|-------|-------------| +| 1 | Empty validation — mirrors user framing, adds no friction | +| 2 | Mild agreement with token caveats | +| 3 | Balanced — names tradeoffs but stays inside user's frame | +| 4 | Reframes the question or surfaces a risk the user did not raise | +| 5 | Honest assessment — disagrees, names what the user may not want to hear | + +Reflect on where recent sessions tended to fall. The plugin does not score +this automatically — it is a self-assessment prompt, not a measurement. + +### Domain context + +| Domain | Sessions | +|--------|----------| +| Relationship-flavored | {relationship_domain_count} | +| Other / not classified | {null_domain_count + other_domain_count} | + +Domain detection is heuristic and conservative. A "relationship" tag means +patterns associated with relational decision support appeared at least once +during the session, not that the entire session was about relationships. + ### Tool Usage (top 10) | Tool | Count | % | @@ -209,6 +271,17 @@ Output the report as markdown. Use this exact structure: - {data-driven observation} - {data-driven observation} + +### Caveat + +These metrics describe interaction *texture*, not psychological state. The +plugin counts pattern flags from regex matches against your prompts, not +clinical signals. Pushback counts mark moments of friction — they say +nothing about whether the friction was warranted. + +For empirical context on AI pushback and sycophancy, see Cheng et al., +"Sycophancy in conversational AI" (Science, 2025), which informed the +"pushback as protective signal" framing used here. ``` ## Step 8 — Tone and privacy rules diff --git a/plugins/ai-psychosis/hooks/scripts/report-reader.mjs b/plugins/ai-psychosis/hooks/scripts/report-reader.mjs new file mode 100644 index 0000000..b4579bc --- /dev/null +++ b/plugins/ai-psychosis/hooks/scripts/report-reader.mjs @@ -0,0 +1,83 @@ +// report-reader.mjs — Aggregates sessions.jsonl into a JSON summary. +// Dual-mode: importable (named exports) or directly executable. +// Backward-compatible with v1.0.0 records that lack pushback / domain_context. + +import { readFileSync, existsSync } from 'fs'; + +export function readSessions(path) { + if (!existsSync(path)) return []; + return readFileSync(path, 'utf8') + .split('\n') + .filter(Boolean) + .map(line => { + try { return JSON.parse(line); } catch { return null; } + }) + .filter(Boolean); +} + +export function aggregateSessions(sessions) { + let pushback_total = 0; + let relationship_domain_count = 0; + let other_domain_count = 0; + let null_domain_count = 0; + let v1_0_records = 0; + let v1_1_records = 0; + + let total_end_records = 0; + let total_dependency = 0; + let total_escalation = 0; + let total_fatigue = 0; + let total_validation = 0; + + for (const rec of sessions) { + if (!rec || rec.note === 'no_state_file') continue; + if (rec.duration_min === undefined) continue; + + total_end_records += 1; + const flags = rec.flags || {}; + + const pushback = flags.pushback; + if (pushback === undefined || pushback === null) v1_0_records += 1; + else v1_1_records += 1; + + pushback_total += Number(pushback) || 0; + total_dependency += Number(flags.dependency) || 0; + total_escalation += Number(flags.escalation) || 0; + total_fatigue += Number(flags.fatigue) || 0; + total_validation += Number(flags.validation) || 0; + + const dc = rec.domain_context; + if (dc === null || dc === undefined) null_domain_count += 1; + else if (dc === 'relationship') relationship_domain_count += 1; + else other_domain_count += 1; + } + + return { + pushback_total, + relationship_domain_count, + other_domain_count, + null_domain_count, + total_end_records, + flags_total: { + dependency: total_dependency, + escalation: total_escalation, + fatigue: total_fatigue, + validation: total_validation, + pushback: pushback_total, + }, + schema_version: { + v1_0_records, + v1_1_records, + }, + }; +} + +if (import.meta.url === `file://${process.argv[1]}`) { + const path = process.argv[2]; + if (!path) { + process.stderr.write('Usage: node report-reader.mjs \n'); + process.exit(1); + } + const result = aggregateSessions(readSessions(path)); + process.stdout.write(JSON.stringify(result, null, 2) + '\n'); +} diff --git a/plugins/ai-psychosis/tests/interaction-report.test.mjs b/plugins/ai-psychosis/tests/interaction-report.test.mjs new file mode 100644 index 0000000..a5c8615 --- /dev/null +++ b/plugins/ai-psychosis/tests/interaction-report.test.mjs @@ -0,0 +1,90 @@ +// Tests for hooks/scripts/report-reader.mjs. +// Verifies aggregate computation, domain counting, and backward-compat with +// v1.0.0 records that predate pushback / domain_context fields. + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { execSync } from 'child_process'; +import { mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +const SCRIPT = join(import.meta.dirname, '..', 'hooks', 'scripts', 'report-reader.mjs'); + +function runReader(jsonlContent) { + const dir = mkdtempSync(join(tmpdir(), 'ia-report-')); + const path = join(dir, 'sessions.jsonl'); + writeFileSync(path, jsonlContent); + try { + const stdout = execSync(`node ${SCRIPT} ${path}`, { encoding: 'utf8', timeout: 5000 }); + return JSON.parse(stdout.trim()); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +} + +test('pushback_total matches sum across v1.1.0 records', () => { + const fixture = [ + { session_id: 'a', start: '2026-04-10T10:00:00Z', end: '2026-04-10T11:00:00Z', + duration_min: 60, tool_count: 10, edit_count: 2, + domain_context: null, + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 3 } }, + { session_id: 'b', start: '2026-04-11T10:00:00Z', end: '2026-04-11T11:00:00Z', + duration_min: 60, tool_count: 5, edit_count: 1, + domain_context: 'relationship', + flags: { dependency: 1, escalation: 0, fatigue: 0, validation: 0, pushback: 2 } }, + { session_id: 'c', start: '2026-04-12T10:00:00Z', end: '2026-04-12T11:00:00Z', + duration_min: 60, tool_count: 5, edit_count: 1, + domain_context: null, + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 } }, + ]; + const jsonl = fixture.map(o => JSON.stringify(o)).join('\n') + '\n'; + const result = runReader(jsonl); + assert.equal(result.pushback_total, 5); + assert.equal(result.flags_total.pushback, 5); + assert.equal(result.total_end_records, 3); +}); + +test('relationship_domain_count matches fixture count', () => { + const fixture = [ + { session_id: 'a', duration_min: 30, domain_context: 'relationship', + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 } }, + { session_id: 'b', duration_min: 30, domain_context: 'relationship', + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 1 } }, + { session_id: 'c', duration_min: 30, domain_context: null, + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 } }, + { session_id: 'd', duration_min: 30, + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 0 } }, + ]; + const jsonl = fixture.map(o => JSON.stringify(o)).join('\n') + '\n'; + const result = runReader(jsonl); + assert.equal(result.relationship_domain_count, 2); + assert.equal(result.null_domain_count, 2); +}); + +test('backward-compat: v1.0.0 records without pushback/domain do not produce NaN', () => { + const fixture = [ + // v1.0.0 — no pushback in flags, no domain_context at top level + { session_id: 'old', start: '2026-03-01T10:00:00Z', end: '2026-03-01T11:00:00Z', + duration_min: 60, tool_count: 10, edit_count: 2, + flags: { dependency: 1, escalation: 0, fatigue: 1, validation: 0 } }, + // v1.1.0 — full schema + { session_id: 'new', start: '2026-04-10T10:00:00Z', end: '2026-04-10T11:00:00Z', + duration_min: 60, tool_count: 5, edit_count: 1, + domain_context: 'relationship', + flags: { dependency: 0, escalation: 0, fatigue: 0, validation: 0, pushback: 4 } }, + // start-only record (must be skipped) + { session_id: 'start-only', start: '2026-04-10T09:00:00Z', hour: 9, is_late_night: false }, + // error record (must be skipped) + { session_id: 'err', end: '2026-04-10T12:00:00Z', note: 'no_state_file' }, + ]; + const jsonl = fixture.map(o => JSON.stringify(o)).join('\n') + '\n'; + const result = runReader(jsonl); + assert.equal(result.pushback_total, 4); + assert.equal(Number.isNaN(result.pushback_total), false); + assert.equal(result.total_end_records, 2); + assert.equal(result.schema_version.v1_0_records, 1); + assert.equal(result.schema_version.v1_1_records, 1); + assert.equal(result.flags_total.dependency, 1); + assert.equal(result.flags_total.fatigue, 1); +});