feat(graceful-handoff): 2.0 — Stop hook auto-execute + pipeline staging fix [skip-docs]
Step 5 of v2.0 plan + critical pipeline fix. Stop hook (hooks/scripts/stop-context-monitor.mjs): - Estimates context usage from transcript size (chars/3.5 / window_size) - At ≥70%, spawns handoff-pipeline.mjs --auto --no-push synchronously - Reads context_window_size from payload (supports 1M windows) - Lock file at <transcript_dir>/.handoff-lock-<session_id> - Gracefully handles missing CLAUDE_PLUGIN_ROOT, missing transcript Pipeline fix (scripts/handoff-pipeline.mjs): - REMOVED `git add -A` (CLAUDE.md anti-pattern: scoops up unrelated WIP) - Now stages ONLY artifact + REMEMBER.md/TODO.md if present - New regression test 'pipeline never stages unrelated dirty files' Tests: 7 stop-hook tests use stub pipeline (no real git operations); 11 pipeline tests including new regression for explicit staging.
This commit is contained in:
parent
1efb1b3176
commit
81aba9a5f5
4 changed files with 357 additions and 5 deletions
178
plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs
Normal file
178
plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
#!/usr/bin/env node
|
||||
// stop-context-monitor.mjs — graceful-handoff v2.0 (Hybrid Option C from research/06)
|
||||
//
|
||||
// Stop hook fires after each model response. Estimates context usage from
|
||||
// transcript size; at ≥70% (estimated), spawns handoff-pipeline.mjs --auto
|
||||
// --no-push to write artifact + commit. Push remains user-triggered.
|
||||
//
|
||||
// Reconciliation with disable-model-invocation: the spawn calls the script
|
||||
// DIRECTLY, not the skill. The skill stays manual-only.
|
||||
//
|
||||
// Lock file at <transcript_dir>/.handoff-lock-<session_id> prevents repeat
|
||||
// firing in the same session.
|
||||
//
|
||||
// Token estimation: char_count / 3.5 → approximate tokens. Compares against
|
||||
// context_window_size from payload (200000 fallback). Approximation is
|
||||
// known to drift ±10% — 70% threshold is conservative buffer.
|
||||
|
||||
import { readFileSync, statSync, writeFileSync, existsSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { spawnSync } from 'node:child_process';
|
||||
|
||||
const THRESHOLD = 0.70;
|
||||
const FALLBACK_WINDOW = 200_000;
|
||||
const CHARS_PER_TOKEN = 3.5;
|
||||
|
||||
// Test injection: tests can override these by setting on the export.
|
||||
export const __testHooks = {
|
||||
spawn: spawnSync,
|
||||
fsRead: readFileSync,
|
||||
fsStat: statSync,
|
||||
fsWrite: writeFileSync,
|
||||
fsExists: existsSync,
|
||||
};
|
||||
|
||||
function readStdin() {
|
||||
try {
|
||||
return readFileSync(0, 'utf-8');
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function estimateUsedPct(transcriptPath, windowSize) {
|
||||
try {
|
||||
const stat = __testHooks.fsStat(transcriptPath);
|
||||
const tokens = stat.size / CHARS_PER_TOKEN;
|
||||
return tokens / windowSize;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function emit(output) {
|
||||
process.stdout.write(JSON.stringify(output));
|
||||
}
|
||||
|
||||
function main() {
|
||||
const raw = readStdin();
|
||||
if (!raw.trim()) {
|
||||
process.exit(0);
|
||||
}
|
||||
let payload;
|
||||
try {
|
||||
payload = JSON.parse(raw);
|
||||
} catch {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const transcriptPath = payload?.transcript_path;
|
||||
const sessionId = payload?.session_id || 'unknown';
|
||||
if (!transcriptPath) {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Read context_window_size from payload if available (research/03)
|
||||
const windowSize = payload?.context_window?.context_window_size || FALLBACK_WINDOW;
|
||||
const pctRaw = estimateUsedPct(transcriptPath, windowSize);
|
||||
if (pctRaw == null) {
|
||||
process.exit(0);
|
||||
}
|
||||
const pct = Math.round(pctRaw * 100);
|
||||
|
||||
if (pctRaw < THRESHOLD) {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Lock file path: based on transcript directory (session-stable),
|
||||
// NOT cwd (which can change). See plan revisions #6.
|
||||
const lockPath = join(dirname(transcriptPath), `.handoff-lock-${sessionId}`);
|
||||
if (__testHooks.fsExists(lockPath)) {
|
||||
process.exit(0); // already triggered this session
|
||||
}
|
||||
|
||||
// Touch lock first to prevent races on rapid Stop hook firing
|
||||
try {
|
||||
__testHooks.fsWrite(lockPath, `${sessionId}\n${new Date().toISOString()}\n`, 'utf-8');
|
||||
} catch {
|
||||
process.exit(0); // can't lock, give up silently
|
||||
}
|
||||
|
||||
// Spawn pipeline synchronously (NOT detached) so we can capture output.
|
||||
// 25s timeout fits within Stop hook 30s timeout budget.
|
||||
const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT;
|
||||
if (!pluginRoot) {
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff aborted at est. ${pct}%: CLAUDE_PLUGIN_ROOT not set, cannot locate handoff-pipeline.mjs.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const pipelineScript = join(pluginRoot, 'scripts', 'handoff-pipeline.mjs');
|
||||
const result = __testHooks.spawn(
|
||||
'node',
|
||||
[pipelineScript, '--auto', '--no-push', '--non-interactive'],
|
||||
{ encoding: 'utf-8', timeout: 25_000 }
|
||||
);
|
||||
|
||||
if (result.error) {
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff FAILED at est. ${pct}%: ${result.error.message}. Run /graceful-handoff manually.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (result.status !== 0) {
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff pipeline exited ${result.status} at est. ${pct}%. stderr: ${(result.stderr || '').slice(0, 300)}. Run /graceful-handoff manually.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Parse pipeline JSON; report status to user via additionalContext
|
||||
let pipelineResult;
|
||||
try {
|
||||
pipelineResult = JSON.parse(result.stdout);
|
||||
} catch {
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff at est. ${pct}%: pipeline output unparseable. Run /graceful-handoff manually.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const errors = pipelineResult.errors || [];
|
||||
if (errors.length > 0) {
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff at est. ${pct}% partially completed with errors: ${errors.join('; ')}. Artifact: ${pipelineResult.artifact_path || 'not written'}. Run git push manually.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
emit({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'Stop',
|
||||
additionalContext: `⚠️ Auto-handoff utført ved estimert ${pct}%: artefakt ${pipelineResult.artifact_path}. Push gjenstår — kjør \`git push\` når du er klar.`,
|
||||
},
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Only run main() when invoked as script, not when imported by tests
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main();
|
||||
}
|
||||
|
|
@ -296,9 +296,17 @@ async function main() {
|
|||
}
|
||||
if (proceed) {
|
||||
try {
|
||||
// Stage all and run git commit (pre-commit hooks respected; never --no-verify).
|
||||
execSync('git add -A', { cwd, stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
execFileSync('git', ['commit', '-m', commitMessage], { cwd, stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
// CRITICAL: never `git add -A` — that scoops up unrelated work-in-progress.
|
||||
// Stage ONLY the handoff artifact + optional REMEMBER.md/TODO.md if present.
|
||||
// Other dirty files stay in working tree for the user.
|
||||
const stageList = [artifactPath];
|
||||
for (const candidate of ['REMEMBER.md', 'TODO.md']) {
|
||||
const p = join(classification.writeDir, candidate);
|
||||
if (existsSync(p)) stageList.push(p);
|
||||
}
|
||||
execFileSync('git', ['add', '--', ...stageList], { cwd, stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
// git commit with -- pathspec limits commit to those paths from index.
|
||||
execFileSync('git', ['commit', '-m', commitMessage, '--', ...stageList], { cwd, stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
actionsTaken.push('committed');
|
||||
} catch (e) {
|
||||
errors.push(`commit failed: ${(e.stderr || e.message || '').toString().slice(0, 200)}`);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,140 @@
|
|||
// stop-context-monitor.test.mjs — Tests for Stop hook auto-execute logic.
|
||||
// Uses runHook to spawn the script as a subprocess and inspect its behavior
|
||||
// via temporary fixture files (real fs) — simpler than mocking imports.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { mkdtempSync, writeFileSync, existsSync, rmSync, statSync, mkdirSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { runHookWithEnv } from './hook-helper.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const HOOK = join(__dirname, '..', '..', 'hooks', 'scripts', 'stop-context-monitor.mjs');
|
||||
const PLUGIN_ROOT = join(__dirname, '..', '..');
|
||||
|
||||
function setup(transcriptSize) {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'stop-hook-'));
|
||||
const transcriptPath = join(dir, 'transcript.jsonl');
|
||||
// Generate transcript content of exact size (chars)
|
||||
writeFileSync(transcriptPath, 'a'.repeat(transcriptSize), 'utf-8');
|
||||
return { dir, transcriptPath };
|
||||
}
|
||||
|
||||
// Build a stub plugin root with a fake handoff-pipeline.mjs that returns
|
||||
// canned JSON. This prevents tests from invoking the real pipeline (which
|
||||
// does git operations against whatever repo the test process happens to be in).
|
||||
function makeStubPluginRoot() {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'stub-plugin-root-'));
|
||||
const scriptsDir = join(dir, 'scripts');
|
||||
mkdirSync(scriptsDir);
|
||||
const stub = `#!/usr/bin/env node
|
||||
process.stdout.write(JSON.stringify({
|
||||
handoff_type: 'plugin-arbeid',
|
||||
write_dir: '/tmp/stub',
|
||||
artifact_path: '/tmp/stub/NEXT-SESSION-PROMPT.local.md',
|
||||
next_steps: [],
|
||||
git_status: { branch: 'main', dirty: false, ahead: 0 },
|
||||
commit_message: '',
|
||||
actions_taken: ['stub-no-op'],
|
||||
errors: [],
|
||||
}));
|
||||
process.exit(0);
|
||||
`;
|
||||
writeFileSync(join(scriptsDir, 'handoff-pipeline.mjs'), stub, 'utf-8');
|
||||
return dir;
|
||||
}
|
||||
|
||||
function cleanup(dir) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
test('estimated < 70%: no spawn, no lock file', async () => {
|
||||
// 200k window × 70% threshold = 140k tokens × 3.5 chars = 490k chars
|
||||
// Use 400k chars (~57%) — well under threshold
|
||||
const { dir, transcriptPath } = setup(400_000);
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: transcriptPath,
|
||||
session_id: 'test-1',
|
||||
context_window: { context_window_size: 200_000 },
|
||||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||||
assert.equal(res.code, 0);
|
||||
assert.equal(res.stdout.trim(), '', `expected silent, got: ${res.stdout}`);
|
||||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-1')), 'no lock should be written below threshold');
|
||||
cleanup(dir);
|
||||
});
|
||||
|
||||
test('estimated ≥ 70% + no lock: lock created, stub pipeline spawned', async () => {
|
||||
// 600k chars / 3.5 = 171k tokens / 200k = 86% — well above threshold
|
||||
const { dir, transcriptPath } = setup(600_000);
|
||||
const stubRoot = makeStubPluginRoot();
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: transcriptPath,
|
||||
session_id: 'test-2',
|
||||
context_window: { context_window_size: 200_000 },
|
||||
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
|
||||
assert.equal(res.code, 0);
|
||||
// Lock file must exist
|
||||
assert.ok(existsSync(join(dir, '.handoff-lock-test-2')), 'lock file should be created');
|
||||
// additionalContext should mention auto-handoff (stub returns no errors → success path)
|
||||
assert.match(res.stdout, /Auto-handoff utført/i);
|
||||
cleanup(dir);
|
||||
cleanup(stubRoot);
|
||||
});
|
||||
|
||||
test('estimated ≥ 70% + lock exists: no spawn, no output', async () => {
|
||||
const { dir, transcriptPath } = setup(600_000);
|
||||
// Pre-create the lock file
|
||||
writeFileSync(join(dir, '.handoff-lock-test-3'), 'pre-existing', 'utf-8');
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: transcriptPath,
|
||||
session_id: 'test-3',
|
||||
context_window: { context_window_size: 200_000 },
|
||||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||||
assert.equal(res.code, 0);
|
||||
assert.equal(res.stdout.trim(), '', `expected silent (lock exists), got: ${res.stdout}`);
|
||||
cleanup(dir);
|
||||
});
|
||||
|
||||
test('missing transcript_path: silent exit 0', async () => {
|
||||
const res = await runHookWithEnv(HOOK, { session_id: 'test-4' }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||||
assert.equal(res.code, 0);
|
||||
assert.equal(res.stdout.trim(), '');
|
||||
});
|
||||
|
||||
test('non-existent transcript file: silent exit 0', async () => {
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: '/tmp/does-not-exist-' + Date.now() + '.jsonl',
|
||||
session_id: 'test-5',
|
||||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||||
assert.equal(res.code, 0);
|
||||
assert.equal(res.stdout.trim(), '');
|
||||
});
|
||||
|
||||
test('uses context_window_size from payload (not hardcoded 200k)', async () => {
|
||||
// 1M context window × 70% = 700k tokens × 3.5 = 2.45M chars to trigger
|
||||
// 600k chars on a 1M window is only ~17% — should NOT trigger
|
||||
const { dir, transcriptPath } = setup(600_000);
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: transcriptPath,
|
||||
session_id: 'test-6',
|
||||
context_window: { context_window_size: 1_000_000 },
|
||||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||||
assert.equal(res.code, 0);
|
||||
assert.equal(res.stdout.trim(), '', `expected silent on 1M window, got: ${res.stdout}`);
|
||||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-6')));
|
||||
cleanup(dir);
|
||||
});
|
||||
|
||||
test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => {
|
||||
const { dir, transcriptPath } = setup(600_000);
|
||||
const res = await runHookWithEnv(HOOK, {
|
||||
transcript_path: transcriptPath,
|
||||
session_id: 'test-7',
|
||||
context_window: { context_window_size: 200_000 },
|
||||
}, {}); // no CLAUDE_PLUGIN_ROOT
|
||||
assert.equal(res.code, 0);
|
||||
assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/);
|
||||
cleanup(dir);
|
||||
});
|
||||
|
|
@ -97,11 +97,15 @@ test('--no-commit skips git operations even when dirty', async () => {
|
|||
rmSync(repo, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('idempotency: second --auto run on clean state with recent artifact is no-op', async () => {
|
||||
test('idempotency: second --auto run on clean tree with recent artifact is no-op', async () => {
|
||||
const repo = makeTempRepo();
|
||||
writeFileSync(join(repo, 'foo.txt'), 'change\n');
|
||||
// First run: dirty, commits
|
||||
// First run: dirty, writes artifact and commits ONLY the artifact (not foo.txt)
|
||||
await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
|
||||
// Clean up the unrelated dirty file so second run sees a CLEAN tree.
|
||||
// The pipeline must NEVER auto-stage user's other dirty files (CLAUDE.md
|
||||
// anti-pattern) — the test explicitly removes it to isolate idempotency.
|
||||
rmSync(join(repo, 'foo.txt'));
|
||||
// Second run: clean tree, recent artifact exists → idempotent no-op
|
||||
const result = await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
|
||||
const json = JSON.parse(result.stdout);
|
||||
|
|
@ -112,6 +116,28 @@ test('idempotency: second --auto run on clean state with recent artifact is no-o
|
|||
rmSync(repo, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('pipeline never stages unrelated dirty files (no git add -A regression)', async () => {
|
||||
const repo = makeTempRepo();
|
||||
// Two unrelated dirty files — pipeline should NOT commit them
|
||||
writeFileSync(join(repo, 'unrelated-1.txt'), 'user work\n');
|
||||
writeFileSync(join(repo, 'unrelated-2.md'), '# user notes\n');
|
||||
await runPipeline(repo, ['--auto', '--non-interactive', '--no-push']);
|
||||
// After commit, unrelated files must STILL be in working tree (not committed)
|
||||
const { execFileSync } = await import('node:child_process');
|
||||
const lastCommit = execFileSync('git', ['show', '--name-only', '--pretty=', 'HEAD'], {
|
||||
cwd: repo, encoding: 'utf-8',
|
||||
}).trim().split('\n').filter(Boolean);
|
||||
assert.ok(!lastCommit.includes('unrelated-1.txt'), `unrelated-1.txt should NOT be in HEAD commit, got: ${lastCommit}`);
|
||||
assert.ok(!lastCommit.includes('unrelated-2.md'), `unrelated-2.md should NOT be in HEAD commit, got: ${lastCommit}`);
|
||||
// The artifact SHOULD be in HEAD
|
||||
assert.ok(lastCommit.some(f => f.includes('NEXT-SESSION')), `artifact should be in HEAD, got: ${lastCommit}`);
|
||||
// unrelated files still untracked
|
||||
const status = execFileSync('git', ['status', '--porcelain'], { cwd: repo, encoding: 'utf-8' });
|
||||
assert.match(status, /unrelated-1\.txt/);
|
||||
assert.match(status, /unrelated-2\.md/);
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('detached HEAD is detected and reported (no commit attempted)', async () => {
|
||||
const repo = makeTempRepo();
|
||||
// Detach HEAD
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue