diff --git a/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs b/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs new file mode 100644 index 0000000..7339af3 --- /dev/null +++ b/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs @@ -0,0 +1,178 @@ +#!/usr/bin/env node +// stop-context-monitor.mjs — graceful-handoff v2.0 (Hybrid Option C from research/06) +// +// Stop hook fires after each model response. Estimates context usage from +// transcript size; at ≥70% (estimated), spawns handoff-pipeline.mjs --auto +// --no-push to write artifact + commit. Push remains user-triggered. +// +// Reconciliation with disable-model-invocation: the spawn calls the script +// DIRECTLY, not the skill. The skill stays manual-only. +// +// Lock file at /.handoff-lock- prevents repeat +// firing in the same session. +// +// Token estimation: char_count / 3.5 → approximate tokens. Compares against +// context_window_size from payload (200000 fallback). Approximation is +// known to drift ±10% — 70% threshold is conservative buffer. + +import { readFileSync, statSync, writeFileSync, existsSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { spawnSync } from 'node:child_process'; + +const THRESHOLD = 0.70; +const FALLBACK_WINDOW = 200_000; +const CHARS_PER_TOKEN = 3.5; + +// Test injection: tests can override these by setting on the export. +export const __testHooks = { + spawn: spawnSync, + fsRead: readFileSync, + fsStat: statSync, + fsWrite: writeFileSync, + fsExists: existsSync, +}; + +function readStdin() { + try { + return readFileSync(0, 'utf-8'); + } catch { + return ''; + } +} + +function estimateUsedPct(transcriptPath, windowSize) { + try { + const stat = __testHooks.fsStat(transcriptPath); + const tokens = stat.size / CHARS_PER_TOKEN; + return tokens / windowSize; + } catch { + return null; + } +} + +function emit(output) { + process.stdout.write(JSON.stringify(output)); +} + +function main() { + const raw = readStdin(); + if (!raw.trim()) { + process.exit(0); + } + let payload; + try { + payload = JSON.parse(raw); + } catch { + process.exit(0); + } + + const transcriptPath = payload?.transcript_path; + const sessionId = payload?.session_id || 'unknown'; + if (!transcriptPath) { + process.exit(0); + } + + // Read context_window_size from payload if available (research/03) + const windowSize = payload?.context_window?.context_window_size || FALLBACK_WINDOW; + const pctRaw = estimateUsedPct(transcriptPath, windowSize); + if (pctRaw == null) { + process.exit(0); + } + const pct = Math.round(pctRaw * 100); + + if (pctRaw < THRESHOLD) { + process.exit(0); + } + + // Lock file path: based on transcript directory (session-stable), + // NOT cwd (which can change). See plan revisions #6. + const lockPath = join(dirname(transcriptPath), `.handoff-lock-${sessionId}`); + if (__testHooks.fsExists(lockPath)) { + process.exit(0); // already triggered this session + } + + // Touch lock first to prevent races on rapid Stop hook firing + try { + __testHooks.fsWrite(lockPath, `${sessionId}\n${new Date().toISOString()}\n`, 'utf-8'); + } catch { + process.exit(0); // can't lock, give up silently + } + + // Spawn pipeline synchronously (NOT detached) so we can capture output. + // 25s timeout fits within Stop hook 30s timeout budget. + const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT; + if (!pluginRoot) { + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff aborted at est. ${pct}%: CLAUDE_PLUGIN_ROOT not set, cannot locate handoff-pipeline.mjs.`, + }, + }); + process.exit(0); + } + + const pipelineScript = join(pluginRoot, 'scripts', 'handoff-pipeline.mjs'); + const result = __testHooks.spawn( + 'node', + [pipelineScript, '--auto', '--no-push', '--non-interactive'], + { encoding: 'utf-8', timeout: 25_000 } + ); + + if (result.error) { + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff FAILED at est. ${pct}%: ${result.error.message}. Run /graceful-handoff manually.`, + }, + }); + process.exit(0); + } + + if (result.status !== 0) { + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff pipeline exited ${result.status} at est. ${pct}%. stderr: ${(result.stderr || '').slice(0, 300)}. Run /graceful-handoff manually.`, + }, + }); + process.exit(0); + } + + // Parse pipeline JSON; report status to user via additionalContext + let pipelineResult; + try { + pipelineResult = JSON.parse(result.stdout); + } catch { + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff at est. ${pct}%: pipeline output unparseable. Run /graceful-handoff manually.`, + }, + }); + process.exit(0); + } + + const errors = pipelineResult.errors || []; + if (errors.length > 0) { + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff at est. ${pct}% partially completed with errors: ${errors.join('; ')}. Artifact: ${pipelineResult.artifact_path || 'not written'}. Run git push manually.`, + }, + }); + process.exit(0); + } + + emit({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: `⚠️ Auto-handoff utført ved estimert ${pct}%: artefakt ${pipelineResult.artifact_path}. Push gjenstår — kjør \`git push\` når du er klar.`, + }, + }); + process.exit(0); +} + +// Only run main() when invoked as script, not when imported by tests +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} diff --git a/plugins/graceful-handoff/scripts/handoff-pipeline.mjs b/plugins/graceful-handoff/scripts/handoff-pipeline.mjs index 08ad9fb..08b77de 100644 --- a/plugins/graceful-handoff/scripts/handoff-pipeline.mjs +++ b/plugins/graceful-handoff/scripts/handoff-pipeline.mjs @@ -296,9 +296,17 @@ async function main() { } if (proceed) { try { - // Stage all and run git commit (pre-commit hooks respected; never --no-verify). - execSync('git add -A', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); - execFileSync('git', ['commit', '-m', commitMessage], { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); + // CRITICAL: never `git add -A` — that scoops up unrelated work-in-progress. + // Stage ONLY the handoff artifact + optional REMEMBER.md/TODO.md if present. + // Other dirty files stay in working tree for the user. + const stageList = [artifactPath]; + for (const candidate of ['REMEMBER.md', 'TODO.md']) { + const p = join(classification.writeDir, candidate); + if (existsSync(p)) stageList.push(p); + } + execFileSync('git', ['add', '--', ...stageList], { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); + // git commit with -- pathspec limits commit to those paths from index. + execFileSync('git', ['commit', '-m', commitMessage, '--', ...stageList], { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); actionsTaken.push('committed'); } catch (e) { errors.push(`commit failed: ${(e.stderr || e.message || '').toString().slice(0, 200)}`); diff --git a/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs b/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs new file mode 100644 index 0000000..edf8115 --- /dev/null +++ b/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs @@ -0,0 +1,140 @@ +// stop-context-monitor.test.mjs — Tests for Stop hook auto-execute logic. +// Uses runHook to spawn the script as a subprocess and inspect its behavior +// via temporary fixture files (real fs) — simpler than mocking imports. + +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { mkdtempSync, writeFileSync, existsSync, rmSync, statSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { runHookWithEnv } from './hook-helper.mjs'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const HOOK = join(__dirname, '..', '..', 'hooks', 'scripts', 'stop-context-monitor.mjs'); +const PLUGIN_ROOT = join(__dirname, '..', '..'); + +function setup(transcriptSize) { + const dir = mkdtempSync(join(tmpdir(), 'stop-hook-')); + const transcriptPath = join(dir, 'transcript.jsonl'); + // Generate transcript content of exact size (chars) + writeFileSync(transcriptPath, 'a'.repeat(transcriptSize), 'utf-8'); + return { dir, transcriptPath }; +} + +// Build a stub plugin root with a fake handoff-pipeline.mjs that returns +// canned JSON. This prevents tests from invoking the real pipeline (which +// does git operations against whatever repo the test process happens to be in). +function makeStubPluginRoot() { + const dir = mkdtempSync(join(tmpdir(), 'stub-plugin-root-')); + const scriptsDir = join(dir, 'scripts'); + mkdirSync(scriptsDir); + const stub = `#!/usr/bin/env node +process.stdout.write(JSON.stringify({ + handoff_type: 'plugin-arbeid', + write_dir: '/tmp/stub', + artifact_path: '/tmp/stub/NEXT-SESSION-PROMPT.local.md', + next_steps: [], + git_status: { branch: 'main', dirty: false, ahead: 0 }, + commit_message: '', + actions_taken: ['stub-no-op'], + errors: [], +})); +process.exit(0); +`; + writeFileSync(join(scriptsDir, 'handoff-pipeline.mjs'), stub, 'utf-8'); + return dir; +} + +function cleanup(dir) { + rmSync(dir, { recursive: true, force: true }); +} + +test('estimated < 70%: no spawn, no lock file', async () => { + // 200k window × 70% threshold = 140k tokens × 3.5 chars = 490k chars + // Use 400k chars (~57%) — well under threshold + const { dir, transcriptPath } = setup(400_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-1', + context_window: { context_window_size: 200_000 }, + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent, got: ${res.stdout}`); + assert.ok(!existsSync(join(dir, '.handoff-lock-test-1')), 'no lock should be written below threshold'); + cleanup(dir); +}); + +test('estimated ≥ 70% + no lock: lock created, stub pipeline spawned', async () => { + // 600k chars / 3.5 = 171k tokens / 200k = 86% — well above threshold + const { dir, transcriptPath } = setup(600_000); + const stubRoot = makeStubPluginRoot(); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-2', + context_window: { context_window_size: 200_000 }, + }, { CLAUDE_PLUGIN_ROOT: stubRoot }); + assert.equal(res.code, 0); + // Lock file must exist + assert.ok(existsSync(join(dir, '.handoff-lock-test-2')), 'lock file should be created'); + // additionalContext should mention auto-handoff (stub returns no errors → success path) + assert.match(res.stdout, /Auto-handoff utført/i); + cleanup(dir); + cleanup(stubRoot); +}); + +test('estimated ≥ 70% + lock exists: no spawn, no output', async () => { + const { dir, transcriptPath } = setup(600_000); + // Pre-create the lock file + writeFileSync(join(dir, '.handoff-lock-test-3'), 'pre-existing', 'utf-8'); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-3', + context_window: { context_window_size: 200_000 }, + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent (lock exists), got: ${res.stdout}`); + cleanup(dir); +}); + +test('missing transcript_path: silent exit 0', async () => { + const res = await runHookWithEnv(HOOK, { session_id: 'test-4' }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), ''); +}); + +test('non-existent transcript file: silent exit 0', async () => { + const res = await runHookWithEnv(HOOK, { + transcript_path: '/tmp/does-not-exist-' + Date.now() + '.jsonl', + session_id: 'test-5', + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), ''); +}); + +test('uses context_window_size from payload (not hardcoded 200k)', async () => { + // 1M context window × 70% = 700k tokens × 3.5 = 2.45M chars to trigger + // 600k chars on a 1M window is only ~17% — should NOT trigger + const { dir, transcriptPath } = setup(600_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-6', + context_window: { context_window_size: 1_000_000 }, + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent on 1M window, got: ${res.stdout}`); + assert.ok(!existsSync(join(dir, '.handoff-lock-test-6'))); + cleanup(dir); +}); + +test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => { + const { dir, transcriptPath } = setup(600_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-7', + context_window: { context_window_size: 200_000 }, + }, {}); // no CLAUDE_PLUGIN_ROOT + assert.equal(res.code, 0); + assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/); + cleanup(dir); +}); diff --git a/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs b/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs index 12e8cff..5416ee1 100644 --- a/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs +++ b/plugins/graceful-handoff/tests/scripts/handoff-pipeline.test.mjs @@ -97,11 +97,15 @@ test('--no-commit skips git operations even when dirty', async () => { rmSync(repo, { recursive: true, force: true }); }); -test('idempotency: second --auto run on clean state with recent artifact is no-op', async () => { +test('idempotency: second --auto run on clean tree with recent artifact is no-op', async () => { const repo = makeTempRepo(); writeFileSync(join(repo, 'foo.txt'), 'change\n'); - // First run: dirty, commits + // First run: dirty, writes artifact and commits ONLY the artifact (not foo.txt) await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']); + // Clean up the unrelated dirty file so second run sees a CLEAN tree. + // The pipeline must NEVER auto-stage user's other dirty files (CLAUDE.md + // anti-pattern) — the test explicitly removes it to isolate idempotency. + rmSync(join(repo, 'foo.txt')); // Second run: clean tree, recent artifact exists → idempotent no-op const result = await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']); const json = JSON.parse(result.stdout); @@ -112,6 +116,28 @@ test('idempotency: second --auto run on clean state with recent artifact is no-o rmSync(repo, { recursive: true, force: true }); }); +test('pipeline never stages unrelated dirty files (no git add -A regression)', async () => { + const repo = makeTempRepo(); + // Two unrelated dirty files — pipeline should NOT commit them + writeFileSync(join(repo, 'unrelated-1.txt'), 'user work\n'); + writeFileSync(join(repo, 'unrelated-2.md'), '# user notes\n'); + await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']); + // After commit, unrelated files must STILL be in working tree (not committed) + const { execFileSync } = await import('node:child_process'); + const lastCommit = execFileSync('git', ['show', '--name-only', '--pretty=', 'HEAD'], { + cwd: repo, encoding: 'utf-8', + }).trim().split('\n').filter(Boolean); + assert.ok(!lastCommit.includes('unrelated-1.txt'), `unrelated-1.txt should NOT be in HEAD commit, got: ${lastCommit}`); + assert.ok(!lastCommit.includes('unrelated-2.md'), `unrelated-2.md should NOT be in HEAD commit, got: ${lastCommit}`); + // The artifact SHOULD be in HEAD + assert.ok(lastCommit.some(f => f.includes('NEXT-SESSION')), `artifact should be in HEAD, got: ${lastCommit}`); + // unrelated files still untracked + const status = execFileSync('git', ['status', '--porcelain'], { cwd: repo, encoding: 'utf-8' }); + assert.match(status, /unrelated-1\.txt/); + assert.match(status, /unrelated-2\.md/); + rmSync(repo, { recursive: true, force: true }); +}); + test('detached HEAD is detected and reported (no commit attempted)', async () => { const repo = makeTempRepo(); // Detach HEAD