feat(graceful-handoff): 2.0 — Stop hook auto-execute + pipeline staging fix [skip-docs]

Step 5 of v2.0 plan + critical pipeline fix. Stop hook (hooks/scripts/stop-context-monitor.mjs): - Estimates context usage from transcript size (chars/3.5 / window_size) - At ≥70%, spawns handoff-pipeline.mjs --auto --no-push synchronously - Reads context_window_size from payload (supports 1M windows) - Lock file at <transcript_dir>/.handoff-lock-<session_id> - Gracefully handles missing CLAUDE_PLUGIN_ROOT, missing transcript Pipeline fix (scripts/handoff-pipeline.mjs): - REMOVED `git add -A` (CLAUDE.md anti-pattern: scoops up unrelated WIP) - Now stages ONLY artifact + REMEMBER.md/TODO.md if present - New regression test 'pipeline never stages unrelated dirty files' Tests: 7 stop-hook tests use stub pipeline (no real git operations); 11 pipeline tests including new regression for explicit staging.
2026-05-01 05:57:41 +02:00 · 2026-05-01 05:57:41 +02:00 · 81aba9a5f5
commit 81aba9a5f5
parent 1efb1b3176
4 changed files with 357 additions and 5 deletions
--- a/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs
+++ b/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs
@ -0,0 +1,140 @@
+// stop-context-monitor.test.mjs — Tests for Stop hook auto-execute logic.
+// Uses runHook to spawn the script as a subprocess and inspect its behavior
+// via temporary fixture files (real fs) — simpler than mocking imports.
+
+import { test } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { mkdtempSync, writeFileSync, existsSync, rmSync, statSync, mkdirSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { runHookWithEnv } from './hook-helper.mjs';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const HOOK = join(__dirname, '..', '..', 'hooks', 'scripts', 'stop-context-monitor.mjs');
+const PLUGIN_ROOT = join(__dirname, '..', '..');
+
+function setup(transcriptSize) {
+  const dir = mkdtempSync(join(tmpdir(), 'stop-hook-'));
+  const transcriptPath = join(dir, 'transcript.jsonl');
+  // Generate transcript content of exact size (chars)
+  writeFileSync(transcriptPath, 'a'.repeat(transcriptSize), 'utf-8');
+  return { dir, transcriptPath };
+}
+
+// Build a stub plugin root with a fake handoff-pipeline.mjs that returns
+// canned JSON. This prevents tests from invoking the real pipeline (which
+// does git operations against whatever repo the test process happens to be in).
+function makeStubPluginRoot() {
+  const dir = mkdtempSync(join(tmpdir(), 'stub-plugin-root-'));
+  const scriptsDir = join(dir, 'scripts');
+  mkdirSync(scriptsDir);
+  const stub = `#!/usr/bin/env node
+process.stdout.write(JSON.stringify({
+  handoff_type: 'plugin-arbeid',
+  write_dir: '/tmp/stub',
+  artifact_path: '/tmp/stub/NEXT-SESSION-PROMPT.local.md',
+  next_steps: [],
+  git_status: { branch: 'main', dirty: false, ahead: 0 },
+  commit_message: '',
+  actions_taken: ['stub-no-op'],
+  errors: [],
+}));
+process.exit(0);
+`;
+  writeFileSync(join(scriptsDir, 'handoff-pipeline.mjs'), stub, 'utf-8');
+  return dir;
+}
+
+function cleanup(dir) {
+  rmSync(dir, { recursive: true, force: true });
+}
+
+test('estimated < 70%: no spawn, no lock file', async () => {
+  // 200k window × 70% threshold = 140k tokens × 3.5 chars = 490k chars
+  // Use 400k chars (~57%) — well under threshold
+  const { dir, transcriptPath } = setup(400_000);
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: transcriptPath,
+    session_id: 'test-1',
+    context_window: { context_window_size: 200_000 },
+  }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
+  assert.equal(res.code, 0);
+  assert.equal(res.stdout.trim(), '', `expected silent, got: ${res.stdout}`);
+  assert.ok(!existsSync(join(dir, '.handoff-lock-test-1')), 'no lock should be written below threshold');
+  cleanup(dir);
+});
+
+test('estimated ≥ 70% + no lock: lock created, stub pipeline spawned', async () => {
+  // 600k chars / 3.5 = 171k tokens / 200k = 86% — well above threshold
+  const { dir, transcriptPath } = setup(600_000);
+  const stubRoot = makeStubPluginRoot();
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: transcriptPath,
+    session_id: 'test-2',
+    context_window: { context_window_size: 200_000 },
+  }, { CLAUDE_PLUGIN_ROOT: stubRoot });
+  assert.equal(res.code, 0);
+  // Lock file must exist
+  assert.ok(existsSync(join(dir, '.handoff-lock-test-2')), 'lock file should be created');
+  // additionalContext should mention auto-handoff (stub returns no errors → success path)
+  assert.match(res.stdout, /Auto-handoff utført/i);
+  cleanup(dir);
+  cleanup(stubRoot);
+});
+
+test('estimated ≥ 70% + lock exists: no spawn, no output', async () => {
+  const { dir, transcriptPath } = setup(600_000);
+  // Pre-create the lock file
+  writeFileSync(join(dir, '.handoff-lock-test-3'), 'pre-existing', 'utf-8');
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: transcriptPath,
+    session_id: 'test-3',
+    context_window: { context_window_size: 200_000 },
+  }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
+  assert.equal(res.code, 0);
+  assert.equal(res.stdout.trim(), '', `expected silent (lock exists), got: ${res.stdout}`);
+  cleanup(dir);
+});
+
+test('missing transcript_path: silent exit 0', async () => {
+  const res = await runHookWithEnv(HOOK, { session_id: 'test-4' }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
+  assert.equal(res.code, 0);
+  assert.equal(res.stdout.trim(), '');
+});
+
+test('non-existent transcript file: silent exit 0', async () => {
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: '/tmp/does-not-exist-' + Date.now() + '.jsonl',
+    session_id: 'test-5',
+  }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
+  assert.equal(res.code, 0);
+  assert.equal(res.stdout.trim(), '');
+});
+
+test('uses context_window_size from payload (not hardcoded 200k)', async () => {
+  // 1M context window × 70% = 700k tokens × 3.5 = 2.45M chars to trigger
+  // 600k chars on a 1M window is only ~17% — should NOT trigger
+  const { dir, transcriptPath } = setup(600_000);
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: transcriptPath,
+    session_id: 'test-6',
+    context_window: { context_window_size: 1_000_000 },
+  }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
+  assert.equal(res.code, 0);
+  assert.equal(res.stdout.trim(), '', `expected silent on 1M window, got: ${res.stdout}`);
+  assert.ok(!existsSync(join(dir, '.handoff-lock-test-6')));
+  cleanup(dir);
+});
+
+test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => {
+  const { dir, transcriptPath } = setup(600_000);
+  const res = await runHookWithEnv(HOOK, {
+    transcript_path: transcriptPath,
+    session_id: 'test-7',
+    context_window: { context_window_size: 200_000 },
+  }, {}); // no CLAUDE_PLUGIN_ROOT
+  assert.equal(res.code, 0);
+  assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/);
+  cleanup(dir);
+});
--- a/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs
+++ b/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs
@ -97,11 +97,15 @@ test('--no-commit skips git operations even when dirty', async () => {
  rmSync(repo, { recursive: true, force: true });
 });

-test('idempotency: second --auto run on clean state with recent artifact is no-op', async () => {
+test('idempotency: second --auto run on clean tree with recent artifact is no-op', async () => {
  const repo = makeTempRepo();
  writeFileSync(join(repo, 'foo.txt'), 'change\n');
-  // First run: dirty, commits
+  // First run: dirty, writes artifact and commits ONLY the artifact (not foo.txt)
  await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
+  // Clean up the unrelated dirty file so second run sees a CLEAN tree.
+  // The pipeline must NEVER auto-stage user's other dirty files (CLAUDE.md
+  // anti-pattern) — the test explicitly removes it to isolate idempotency.
+  rmSync(join(repo, 'foo.txt'));
  // Second run: clean tree, recent artifact exists → idempotent no-op
  const result = await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
  const json = JSON.parse(result.stdout);
@ -112,6 +116,28 @@ test('idempotency: second --auto run on clean state with recent artifact is no-o
  rmSync(repo, { recursive: true, force: true });
 });

+test('pipeline never stages unrelated dirty files (no git add -A regression)', async () => {
+  const repo = makeTempRepo();
+  // Two unrelated dirty files — pipeline should NOT commit them
+  writeFileSync(join(repo, 'unrelated-1.txt'), 'user work\n');
+  writeFileSync(join(repo, 'unrelated-2.md'), '# user notes\n');
+  await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
+  // After commit, unrelated files must STILL be in working tree (not committed)
+  const { execFileSync } = await import('node:child_process');
+  const lastCommit = execFileSync('git', ['show', '--name-only', '--pretty=', 'HEAD'], {
+    cwd: repo, encoding: 'utf-8',
+  }).trim().split('\n').filter(Boolean);
+  assert.ok(!lastCommit.includes('unrelated-1.txt'), `unrelated-1.txt should NOT be in HEAD commit, got: ${lastCommit}`);
+  assert.ok(!lastCommit.includes('unrelated-2.md'), `unrelated-2.md should NOT be in HEAD commit, got: ${lastCommit}`);
+  // The artifact SHOULD be in HEAD
+  assert.ok(lastCommit.some(f => f.includes('NEXT-SESSION')), `artifact should be in HEAD, got: ${lastCommit}`);
+  // unrelated files still untracked
+  const status = execFileSync('git', ['status', '--porcelain'], { cwd: repo, encoding: 'utf-8' });
+  assert.match(status, /unrelated-1\.txt/);
+  assert.match(status, /unrelated-2\.md/);
+  rmSync(repo, { recursive: true, force: true });
+});
+
 test('detached HEAD is detected and reported (no commit attempted)', async () => {
  const repo = makeTempRepo();
  // Detach HEAD