ktg-plugin-marketplace/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs
Kjell Tore Guttormsen 40a82ccdb4 fix(graceful-handoff): model-aware context window detection (v2.1.0)
Stop hook fallback antok 200K-vindu. På Opus 4.7 (faktisk 1M) kunne
auto-handoff fyre 5–7x for tidlig — estimert 70% når reell bruk var
~14%. Erstatter enkel fallback med 4-stegs resolution-kjede:

  1. payload.context_window.used_percentage  (autoritativ)
  2. payload.context_window.context_window_size + transcript-estimat
  3. MODEL_WINDOWS[payload.model.id] + estimat
  4. FALLBACK_WINDOW=1_000_000 + estimat (2026-default)

additionalContext-meldinger inkluderer nå [kilde: <source>] for innsyn.
Brief som kilde-artefakt i docs/brief-context-window-detection.md.
6 nye tester (57 totalt). Ingen regresjoner.
2026-05-01 09:08:24 +02:00

236 lines
10 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// stop-context-monitor.test.mjs — Tests for Stop hook auto-execute logic.
// Uses runHook to spawn the script as a subprocess and inspect its behavior
// via temporary fixture files (real fs) — simpler than mocking imports.
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { mkdtempSync, writeFileSync, existsSync, rmSync, statSync, mkdirSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { runHookWithEnv } from './hook-helper.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const HOOK = join(__dirname, '..', '..', 'hooks', 'scripts', 'stop-context-monitor.mjs');
const PLUGIN_ROOT = join(__dirname, '..', '..');
function setup(transcriptSize) {
const dir = mkdtempSync(join(tmpdir(), 'stop-hook-'));
const transcriptPath = join(dir, 'transcript.jsonl');
// Generate transcript content of exact size (chars)
writeFileSync(transcriptPath, 'a'.repeat(transcriptSize), 'utf-8');
return { dir, transcriptPath };
}
// Build a stub plugin root with a fake handoff-pipeline.mjs that returns
// canned JSON. This prevents tests from invoking the real pipeline (which
// does git operations against whatever repo the test process happens to be in).
function makeStubPluginRoot() {
const dir = mkdtempSync(join(tmpdir(), 'stub-plugin-root-'));
const scriptsDir = join(dir, 'scripts');
mkdirSync(scriptsDir);
const stub = `#!/usr/bin/env node
process.stdout.write(JSON.stringify({
handoff_type: 'plugin-arbeid',
write_dir: '/tmp/stub',
artifact_path: '/tmp/stub/NEXT-SESSION-PROMPT.local.md',
next_steps: [],
git_status: { branch: 'main', dirty: false, ahead: 0 },
commit_message: '',
actions_taken: ['stub-no-op'],
errors: [],
}));
process.exit(0);
`;
writeFileSync(join(scriptsDir, 'handoff-pipeline.mjs'), stub, 'utf-8');
return dir;
}
function cleanup(dir) {
rmSync(dir, { recursive: true, force: true });
}
test('estimated < 70%: no spawn, no lock file', async () => {
// 200k window × 70% threshold = 140k tokens × 3.5 chars = 490k chars
// Use 400k chars (~57%) — well under threshold
const { dir, transcriptPath } = setup(400_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-1',
context_window: { context_window_size: 200_000 },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-1')), 'no lock should be written below threshold');
cleanup(dir);
});
test('estimated ≥ 70% + no lock: lock created, stub pipeline spawned', async () => {
// 600k chars / 3.5 = 171k tokens / 200k = 86% — well above threshold
const { dir, transcriptPath } = setup(600_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-2',
context_window: { context_window_size: 200_000 },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
// Lock file must exist
assert.ok(existsSync(join(dir, '.handoff-lock-test-2')), 'lock file should be created');
// additionalContext should mention auto-handoff (stub returns no errors → success path)
assert.match(res.stdout, /Auto-handoff utført/i);
cleanup(dir);
cleanup(stubRoot);
});
test('estimated ≥ 70% + lock exists: no spawn, no output', async () => {
const { dir, transcriptPath } = setup(600_000);
// Pre-create the lock file
writeFileSync(join(dir, '.handoff-lock-test-3'), 'pre-existing', 'utf-8');
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-3',
context_window: { context_window_size: 200_000 },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent (lock exists), got: ${res.stdout}`);
cleanup(dir);
});
test('missing transcript_path: silent exit 0', async () => {
const res = await runHookWithEnv(HOOK, { session_id: 'test-4' }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '');
});
test('non-existent transcript file: silent exit 0', async () => {
const res = await runHookWithEnv(HOOK, {
transcript_path: '/tmp/does-not-exist-' + Date.now() + '.jsonl',
session_id: 'test-5',
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '');
});
test('uses context_window_size from payload (not hardcoded 200k)', async () => {
// 1M context window × 70% = 700k tokens × 3.5 = 2.45M chars to trigger
// 600k chars on a 1M window is only ~17% — should NOT trigger
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-6',
context_window: { context_window_size: 1_000_000 },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent on 1M window, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-6')));
cleanup(dir);
});
test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => {
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-7',
context_window: { context_window_size: 200_000 },
}, {}); // no CLAUDE_PLUGIN_ROOT
assert.equal(res.code, 0);
assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/);
cleanup(dir);
});
// --- v2.1: 4-step context resolution -----------------------------------
test('prefers used_percentage from payload over transcript estimate', async () => {
// Big transcript that would trigger via size-estimate (600k chars / 200k window ≈ 86%),
// but used_percentage says 25% — direct path must win.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-8',
context_window: { context_window_size: 200_000, used_percentage: 25 },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent (used_percentage=25), got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-8')), 'no lock should be written when used_percentage is below threshold');
cleanup(dir);
});
test('used_percentage triggers above threshold even with tiny transcript', async () => {
// Tiny transcript would never trigger via size-estimate, but used_percentage=75 must.
const { dir, transcriptPath } = setup(1_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-9',
context_window: { context_window_size: 200_000, used_percentage: 75 },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-9')), 'lock file should be created when used_percentage ≥ 70%');
assert.match(res.stdout, /Auto-handoff utført/i);
assert.match(res.stdout, /kilde: direct/, 'message should label source as direct');
cleanup(dir);
cleanup(stubRoot);
});
test('model-mapping: Opus 4.7 resolves to 1M window (no trigger at 17%)', async () => {
// 600k chars / 3.5 = 171k tokens / 1M = 17% — well under threshold.
// No context_window in payload — must fall through to model-map.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-10',
model: { id: 'claude-opus-4-7' },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent on Opus 4.7 1M window at 17%, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-10')));
cleanup(dir);
});
test('model-mapping: Haiku resolves to 200k window (triggers at 86%)', async () => {
// 600k chars / 3.5 = 171k tokens / 200k = 86% — above threshold.
const { dir, transcriptPath } = setup(600_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-11',
model: { id: 'claude-haiku-4-5-20251001' },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-11')), 'lock should fire on Haiku 200k window at 86%');
assert.match(res.stdout, /kilde: model-map/, 'message should label source as model-map');
cleanup(dir);
cleanup(stubRoot);
});
test('default fallback (1M) when neither used_percentage nor model is in payload', async () => {
// 600k chars / 3.5 = 171k tokens / 1M = 17% — must NOT trigger with new 1M default.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-12',
// intentionally no context_window, no model
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent on default 1M fallback at 17%, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-12')));
cleanup(dir);
});
test('null used_percentage falls through to size-based path', async () => {
// Early-session payloads may have used_percentage: null. We must NOT treat that
// as 0 and skip the size-estimate. With size=200k and 600k chars (~86%) we trigger.
const { dir, transcriptPath } = setup(600_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-13',
context_window: { context_window_size: 200_000, used_percentage: null },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-13')), 'lock should fire via size-fallback when used_percentage is null');
assert.match(res.stdout, /kilde: payload-size/, 'message should label source as payload-size');
cleanup(dir);
cleanup(stubRoot);
});