Stop hook fallback antok 200K-vindu. På Opus 4.7 (faktisk 1M) kunne auto-handoff fyre 5–7x for tidlig — estimert 70% når reell bruk var ~14%. Erstatter enkel fallback med 4-stegs resolution-kjede: 1. payload.context_window.used_percentage (autoritativ) 2. payload.context_window.context_window_size + transcript-estimat 3. MODEL_WINDOWS[payload.model.id] + estimat 4. FALLBACK_WINDOW=1_000_000 + estimat (2026-default) additionalContext-meldinger inkluderer nå [kilde: <source>] for innsyn. Brief som kilde-artefakt i docs/brief-context-window-detection.md. 6 nye tester (57 totalt). Ingen regresjoner.
236 lines
10 KiB
JavaScript
236 lines
10 KiB
JavaScript
// stop-context-monitor.test.mjs — Tests for Stop hook auto-execute logic.
|
||
// Uses runHook to spawn the script as a subprocess and inspect its behavior
|
||
// via temporary fixture files (real fs) — simpler than mocking imports.
|
||
|
||
import { test } from 'node:test';
|
||
import { strict as assert } from 'node:assert';
|
||
import { dirname, join } from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
import { mkdtempSync, writeFileSync, existsSync, rmSync, statSync, mkdirSync } from 'node:fs';
|
||
import { tmpdir } from 'node:os';
|
||
import { runHookWithEnv } from './hook-helper.mjs';
|
||
|
||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||
const HOOK = join(__dirname, '..', '..', 'hooks', 'scripts', 'stop-context-monitor.mjs');
|
||
const PLUGIN_ROOT = join(__dirname, '..', '..');
|
||
|
||
function setup(transcriptSize) {
|
||
const dir = mkdtempSync(join(tmpdir(), 'stop-hook-'));
|
||
const transcriptPath = join(dir, 'transcript.jsonl');
|
||
// Generate transcript content of exact size (chars)
|
||
writeFileSync(transcriptPath, 'a'.repeat(transcriptSize), 'utf-8');
|
||
return { dir, transcriptPath };
|
||
}
|
||
|
||
// Build a stub plugin root with a fake handoff-pipeline.mjs that returns
|
||
// canned JSON. This prevents tests from invoking the real pipeline (which
|
||
// does git operations against whatever repo the test process happens to be in).
|
||
function makeStubPluginRoot() {
|
||
const dir = mkdtempSync(join(tmpdir(), 'stub-plugin-root-'));
|
||
const scriptsDir = join(dir, 'scripts');
|
||
mkdirSync(scriptsDir);
|
||
const stub = `#!/usr/bin/env node
|
||
process.stdout.write(JSON.stringify({
|
||
handoff_type: 'plugin-arbeid',
|
||
write_dir: '/tmp/stub',
|
||
artifact_path: '/tmp/stub/NEXT-SESSION-PROMPT.local.md',
|
||
next_steps: [],
|
||
git_status: { branch: 'main', dirty: false, ahead: 0 },
|
||
commit_message: '',
|
||
actions_taken: ['stub-no-op'],
|
||
errors: [],
|
||
}));
|
||
process.exit(0);
|
||
`;
|
||
writeFileSync(join(scriptsDir, 'handoff-pipeline.mjs'), stub, 'utf-8');
|
||
return dir;
|
||
}
|
||
|
||
function cleanup(dir) {
|
||
rmSync(dir, { recursive: true, force: true });
|
||
}
|
||
|
||
test('estimated < 70%: no spawn, no lock file', async () => {
|
||
// 200k window × 70% threshold = 140k tokens × 3.5 chars = 490k chars
|
||
// Use 400k chars (~57%) — well under threshold
|
||
const { dir, transcriptPath } = setup(400_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-1',
|
||
context_window: { context_window_size: 200_000 },
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent, got: ${res.stdout}`);
|
||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-1')), 'no lock should be written below threshold');
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('estimated ≥ 70% + no lock: lock created, stub pipeline spawned', async () => {
|
||
// 600k chars / 3.5 = 171k tokens / 200k = 86% — well above threshold
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const stubRoot = makeStubPluginRoot();
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-2',
|
||
context_window: { context_window_size: 200_000 },
|
||
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
|
||
assert.equal(res.code, 0);
|
||
// Lock file must exist
|
||
assert.ok(existsSync(join(dir, '.handoff-lock-test-2')), 'lock file should be created');
|
||
// additionalContext should mention auto-handoff (stub returns no errors → success path)
|
||
assert.match(res.stdout, /Auto-handoff utført/i);
|
||
cleanup(dir);
|
||
cleanup(stubRoot);
|
||
});
|
||
|
||
test('estimated ≥ 70% + lock exists: no spawn, no output', async () => {
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
// Pre-create the lock file
|
||
writeFileSync(join(dir, '.handoff-lock-test-3'), 'pre-existing', 'utf-8');
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-3',
|
||
context_window: { context_window_size: 200_000 },
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent (lock exists), got: ${res.stdout}`);
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('missing transcript_path: silent exit 0', async () => {
|
||
const res = await runHookWithEnv(HOOK, { session_id: 'test-4' }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '');
|
||
});
|
||
|
||
test('non-existent transcript file: silent exit 0', async () => {
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: '/tmp/does-not-exist-' + Date.now() + '.jsonl',
|
||
session_id: 'test-5',
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '');
|
||
});
|
||
|
||
test('uses context_window_size from payload (not hardcoded 200k)', async () => {
|
||
// 1M context window × 70% = 700k tokens × 3.5 = 2.45M chars to trigger
|
||
// 600k chars on a 1M window is only ~17% — should NOT trigger
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-6',
|
||
context_window: { context_window_size: 1_000_000 },
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent on 1M window, got: ${res.stdout}`);
|
||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-6')));
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => {
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-7',
|
||
context_window: { context_window_size: 200_000 },
|
||
}, {}); // no CLAUDE_PLUGIN_ROOT
|
||
assert.equal(res.code, 0);
|
||
assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/);
|
||
cleanup(dir);
|
||
});
|
||
|
||
// --- v2.1: 4-step context resolution -----------------------------------
|
||
|
||
test('prefers used_percentage from payload over transcript estimate', async () => {
|
||
// Big transcript that would trigger via size-estimate (600k chars / 200k window ≈ 86%),
|
||
// but used_percentage says 25% — direct path must win.
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-8',
|
||
context_window: { context_window_size: 200_000, used_percentage: 25 },
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent (used_percentage=25), got: ${res.stdout}`);
|
||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-8')), 'no lock should be written when used_percentage is below threshold');
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('used_percentage triggers above threshold even with tiny transcript', async () => {
|
||
// Tiny transcript would never trigger via size-estimate, but used_percentage=75 must.
|
||
const { dir, transcriptPath } = setup(1_000);
|
||
const stubRoot = makeStubPluginRoot();
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-9',
|
||
context_window: { context_window_size: 200_000, used_percentage: 75 },
|
||
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
|
||
assert.equal(res.code, 0);
|
||
assert.ok(existsSync(join(dir, '.handoff-lock-test-9')), 'lock file should be created when used_percentage ≥ 70%');
|
||
assert.match(res.stdout, /Auto-handoff utført/i);
|
||
assert.match(res.stdout, /kilde: direct/, 'message should label source as direct');
|
||
cleanup(dir);
|
||
cleanup(stubRoot);
|
||
});
|
||
|
||
test('model-mapping: Opus 4.7 resolves to 1M window (no trigger at 17%)', async () => {
|
||
// 600k chars / 3.5 = 171k tokens / 1M = 17% — well under threshold.
|
||
// No context_window in payload — must fall through to model-map.
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-10',
|
||
model: { id: 'claude-opus-4-7' },
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent on Opus 4.7 1M window at 17%, got: ${res.stdout}`);
|
||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-10')));
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('model-mapping: Haiku resolves to 200k window (triggers at 86%)', async () => {
|
||
// 600k chars / 3.5 = 171k tokens / 200k = 86% — above threshold.
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const stubRoot = makeStubPluginRoot();
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-11',
|
||
model: { id: 'claude-haiku-4-5-20251001' },
|
||
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
|
||
assert.equal(res.code, 0);
|
||
assert.ok(existsSync(join(dir, '.handoff-lock-test-11')), 'lock should fire on Haiku 200k window at 86%');
|
||
assert.match(res.stdout, /kilde: model-map/, 'message should label source as model-map');
|
||
cleanup(dir);
|
||
cleanup(stubRoot);
|
||
});
|
||
|
||
test('default fallback (1M) when neither used_percentage nor model is in payload', async () => {
|
||
// 600k chars / 3.5 = 171k tokens / 1M = 17% — must NOT trigger with new 1M default.
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-12',
|
||
// intentionally no context_window, no model
|
||
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
|
||
assert.equal(res.code, 0);
|
||
assert.equal(res.stdout.trim(), '', `expected silent on default 1M fallback at 17%, got: ${res.stdout}`);
|
||
assert.ok(!existsSync(join(dir, '.handoff-lock-test-12')));
|
||
cleanup(dir);
|
||
});
|
||
|
||
test('null used_percentage falls through to size-based path', async () => {
|
||
// Early-session payloads may have used_percentage: null. We must NOT treat that
|
||
// as 0 and skip the size-estimate. With size=200k and 600k chars (~86%) we trigger.
|
||
const { dir, transcriptPath } = setup(600_000);
|
||
const stubRoot = makeStubPluginRoot();
|
||
const res = await runHookWithEnv(HOOK, {
|
||
transcript_path: transcriptPath,
|
||
session_id: 'test-13',
|
||
context_window: { context_window_size: 200_000, used_percentage: null },
|
||
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
|
||
assert.equal(res.code, 0);
|
||
assert.ok(existsSync(join(dir, '.handoff-lock-test-13')), 'lock should fire via size-fallback when used_percentage is null');
|
||
assert.match(res.stdout, /kilde: payload-size/, 'message should label source as payload-size');
|
||
cleanup(dir);
|
||
cleanup(stubRoot);
|
||
});
|