fix(graceful-handoff): model-aware context window detection (v2.1.0)

Stop hook fallback antok 200K-vindu. På Opus 4.7 (faktisk 1M) kunne
auto-handoff fyre 5–7x for tidlig — estimert 70% når reell bruk var
~14%. Erstatter enkel fallback med 4-stegs resolution-kjede:

  1. payload.context_window.used_percentage  (autoritativ)
  2. payload.context_window.context_window_size + transcript-estimat
  3. MODEL_WINDOWS[payload.model.id] + estimat
  4. FALLBACK_WINDOW=1_000_000 + estimat (2026-default)

additionalContext-meldinger inkluderer nå [kilde: <source>] for innsyn.
Brief som kilde-artefakt i docs/brief-context-window-detection.md.
6 nye tester (57 totalt). Ingen regresjoner.
This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 09:08:24 +02:00
commit 40a82ccdb4
10 changed files with 347 additions and 34 deletions

View file

@ -138,3 +138,99 @@ test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => {
assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/);
cleanup(dir);
});
// --- v2.1: 4-step context resolution -----------------------------------
test('prefers used_percentage from payload over transcript estimate', async () => {
// Big transcript that would trigger via size-estimate (600k chars / 200k window ≈ 86%),
// but used_percentage says 25% — direct path must win.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-8',
context_window: { context_window_size: 200_000, used_percentage: 25 },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent (used_percentage=25), got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-8')), 'no lock should be written when used_percentage is below threshold');
cleanup(dir);
});
test('used_percentage triggers above threshold even with tiny transcript', async () => {
// Tiny transcript would never trigger via size-estimate, but used_percentage=75 must.
const { dir, transcriptPath } = setup(1_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-9',
context_window: { context_window_size: 200_000, used_percentage: 75 },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-9')), 'lock file should be created when used_percentage ≥ 70%');
assert.match(res.stdout, /Auto-handoff utført/i);
assert.match(res.stdout, /kilde: direct/, 'message should label source as direct');
cleanup(dir);
cleanup(stubRoot);
});
test('model-mapping: Opus 4.7 resolves to 1M window (no trigger at 17%)', async () => {
// 600k chars / 3.5 = 171k tokens / 1M = 17% — well under threshold.
// No context_window in payload — must fall through to model-map.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-10',
model: { id: 'claude-opus-4-7' },
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent on Opus 4.7 1M window at 17%, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-10')));
cleanup(dir);
});
test('model-mapping: Haiku resolves to 200k window (triggers at 86%)', async () => {
// 600k chars / 3.5 = 171k tokens / 200k = 86% — above threshold.
const { dir, transcriptPath } = setup(600_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-11',
model: { id: 'claude-haiku-4-5-20251001' },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-11')), 'lock should fire on Haiku 200k window at 86%');
assert.match(res.stdout, /kilde: model-map/, 'message should label source as model-map');
cleanup(dir);
cleanup(stubRoot);
});
test('default fallback (1M) when neither used_percentage nor model is in payload', async () => {
// 600k chars / 3.5 = 171k tokens / 1M = 17% — must NOT trigger with new 1M default.
const { dir, transcriptPath } = setup(600_000);
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-12',
// intentionally no context_window, no model
}, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT });
assert.equal(res.code, 0);
assert.equal(res.stdout.trim(), '', `expected silent on default 1M fallback at 17%, got: ${res.stdout}`);
assert.ok(!existsSync(join(dir, '.handoff-lock-test-12')));
cleanup(dir);
});
test('null used_percentage falls through to size-based path', async () => {
// Early-session payloads may have used_percentage: null. We must NOT treat that
// as 0 and skip the size-estimate. With size=200k and 600k chars (~86%) we trigger.
const { dir, transcriptPath } = setup(600_000);
const stubRoot = makeStubPluginRoot();
const res = await runHookWithEnv(HOOK, {
transcript_path: transcriptPath,
session_id: 'test-13',
context_window: { context_window_size: 200_000, used_percentage: null },
}, { CLAUDE_PLUGIN_ROOT: stubRoot });
assert.equal(res.code, 0);
assert.ok(existsSync(join(dir, '.handoff-lock-test-13')), 'lock should fire via size-fallback when used_percentage is null');
assert.match(res.stdout, /kilde: payload-size/, 'message should label source as payload-size');
cleanup(dir);
cleanup(stubRoot);
});

View file

@ -1,4 +1,4 @@
// plugin-manifest.test.mjs — verify plugin.json schema for v2.0
// plugin-manifest.test.mjs — verify plugin.json schema for v2.1
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
@ -10,9 +10,17 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
const MANIFEST = join(__dirname, '..', '.claude-plugin', 'plugin.json');
const CHANGELOG = join(__dirname, '..', 'CHANGELOG.md');
test('plugin.json version is 2.0.0', () => {
test('plugin.json version is 2.1.0', () => {
const m = JSON.parse(readFileSync(MANIFEST, 'utf-8'));
assert.equal(m.version, '2.0.0');
assert.equal(m.version, '2.1.0');
});
test('CHANGELOG has [2.1.0] entry mentioning model-aware fix', () => {
const c = readFileSync(CHANGELOG, 'utf-8');
assert.match(c, /## \[2\.1\.0\]/);
const match = c.match(/## \[2\.1\.0\][\s\S]*?(?=## \[2\.0\.0\]|$)/);
assert.ok(match, '[2.1.0] section missing');
assert.match(match[0], /modell-bevisst|model-aware|resolveContextSource/i);
});
test('plugin.json does NOT include auto_discover (not in documented schema)', () => {