diff --git a/CLAUDE.md b/CLAUDE.md index a201c2a..60ab1aa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,7 @@ Open-source Claude Code plugin marketplace. Solo project by Kjell Tore Guttormse plugins/ ai-psychosis/ v1.0.0 — Interaction awareness (sycophancy, reinforcement loops) config-audit/ v3.1.0 — Configuration intelligence (health, opportunities, auto-fix, whats-active) - graceful-handoff/ v1.0.0 — Session handoff in <60s (NEXT-SESSION artifact + commit+push + copy-paste prompt) + graceful-handoff/ v2.1.0 — Auto-trigger handoff via Stop hook (skill + JSON pipeline + 4-step model-aware context resolution) linkedin-thought-leadership/ v1.2.0 — LinkedIn content pipeline + analytics llm-security/ v6.0.0 — Security scanning, auditing, threat modeling ms-ai-architect/ v1.8.0 — Microsoft AI architecture (Cosmo Skyberg persona) diff --git a/README.md b/README.md index 669cbd5..1a8a8b6 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ Research-informed thresholds. Alerts are progressive and never blocking. Privacy --- -### [Graceful Handoff](plugins/graceful-handoff/) `v2.0.0` +### [Graceful Handoff](plugins/graceful-handoff/) `v2.1.0` Auto-trigger session handoff at context threshold. Manual `/graceful-handoff` always works as backup. Built for Opus 4.7. diff --git a/plugins/graceful-handoff/.claude-plugin/plugin.json b/plugins/graceful-handoff/.claude-plugin/plugin.json index 49119ea..95359c0 100644 --- a/plugins/graceful-handoff/.claude-plugin/plugin.json +++ b/plugins/graceful-handoff/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "graceful-handoff", - "version": "2.0.0", + "version": "2.1.0", "description": "Auto-trigger session handoff at context-threshold (Stop hook + statusLine hint), with manual /graceful-handoff fallback. Skill-architecture (disable-model-invocation: true) + JSON pipeline + auto-load on session resume.", "author": { "name": "Kjell Tore Guttormsen" diff --git a/plugins/graceful-handoff/CHANGELOG.md b/plugins/graceful-handoff/CHANGELOG.md index c942536..982c909 100644 --- a/plugins/graceful-handoff/CHANGELOG.md +++ b/plugins/graceful-handoff/CHANGELOG.md @@ -4,6 +4,29 @@ All notable changes to graceful-handoff are documented here. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [2.1.0] - 2026-05-01 + +### Fixed + +- **Modell-bevisst kontekstvindu i Stop hook (`stop-context-monitor.mjs`).** Tidligere fallback antok 200K-vindu. På Opus 4.7-sesjoner (faktisk 1M-vindu) kunne auto-handoff fyre 5–7x for tidlig — estimert 70% når reell bruk var ~14%. Erstatter `windowSize = payload?.context_window?.context_window_size || 200_000` med en 4-stegs resolution-kjede (`resolveContextSource()`): + 1. `payload.context_window.used_percentage` (autoritativ, modell-agnostisk — kilde: `direct`) + 2. `payload.context_window.context_window_size` + transcript-estimat (kilde: `payload-size`) + 3. `MODEL_WINDOWS[payload.model.id]` + estimat (Opus 4.7=1M, Sonnet 4.6=200K, Haiku=200K — kilde: `model-map`) + 4. `FALLBACK_WINDOW = 1_000_000` + estimat (oppdatert 2026-default — kilde: `default-1m`) + +### Changed + +- `additionalContext`-meldinger fra Stop hook inkluderer nå `[kilde: ]` for innsyn i hvilken resolution-path som ble brukt (`direct`/`payload-size`/`model-map`/`default-1m`). +- Inline-kommentar (linje 14–22) og README/CLAUDE.md-seksjoner om auto-trigger-mekanikk oppdatert til å beskrive ny kjede. + +### Tests + +- 6 nye tester i `tests/hooks/stop-context-monitor.test.mjs` dekker hver path: `used_percentage` foretrukket, `used_percentage` trigger med tom transcript, model-map for Opus 4.7 (1M) og Haiku (200K), default 1M-fallback med tomt payload, og `null used_percentage` faller gjennom til size-pathen. Total: 56 tester (50 + 6). + +### Open + +- Stop-hook payload-schema er ikke offisielt dokumentert. Det er ikke bekreftet at Stop-payload faktisk inneholder `used_percentage` eller `model.id` (statusLine-payload gjør). Hvis ingen av feltene leveres, faller resolveren til `default-1m`. Smoke-test ved første Opus 4.7-sesjon vil avgjøre hvilken kilde som blir primær. + ## [2.0.0] - 2026-05-01 ### BREAKING diff --git a/plugins/graceful-handoff/CLAUDE.md b/plugins/graceful-handoff/CLAUDE.md index 37e045e..8ee7386 100644 --- a/plugins/graceful-handoff/CLAUDE.md +++ b/plugins/graceful-handoff/CLAUDE.md @@ -1,4 +1,4 @@ -# graceful-handoff (v2.0) +# graceful-handoff (v2.1) Auto-trigger sesjonsoverlevering ved kontekst-terskel, med manuell `/graceful-handoff` som backup. Skill-arkitektur (`disable-model-invocation: true`), deterministisk JSON-pipeline, og tre hooks som dekker hint, auto-eksekvering, og auto-load. @@ -29,13 +29,14 @@ Auto-trigger sesjonsoverlevering ved kontekst-terskel, med manuell `/graceful-ha ## Auto-trigger-mekanikk -Claude Code eksponerer ikke real-time kontekst-prosent til hooks (Anthropic har closed feature requests #16988, #27969, #34340). Vi approksimerer: +Claude Code eksponerer ikke real-time kontekst-prosent direkte til Stop hook (Anthropic har closed feature requests #16988, #27969, #34340). v2.1 bruker en **4-stegs resolution-kjede** (`resolveContextSource()` i `stop-context-monitor.mjs`): -1. Stop hook får `transcript_path` i payload -2. `wc -c ` → char count -3. Tokens ≈ chars / 3.5 -4. Sammenlign mot `context_window_size` fra payload (200k default, 1M støttet) -5. Ved ≥ 70% (estimert): spawn pipeline med `--auto --no-push --non-interactive` +1. `payload.context_window.used_percentage` — autoritativ, modell-agnostisk (kilde: `direct`) +2. `payload.context_window.context_window_size` + `chars/3.5`-estimat (kilde: `payload-size`) +3. `MODEL_WINDOWS[payload.model.id]` + estimat — Opus 4.7=1M, Sonnet 4.6=200k, Haiku=200k (kilde: `model-map`) +4. `FALLBACK_WINDOW = 1_000_000` + estimat — oppdatert 2026-default (kilde: `default-1m`) + +Ved ≥ 70% (estimert): spawn pipeline med `--auto --no-push --non-interactive`. additionalContext-meldingen inkluderer `[kilde: ]` for innsyn. Lock-fil `/.handoff-lock-` hindrer repeat-firing innen samme sesjon. @@ -61,3 +62,4 @@ node --test plugins/graceful-handoff/tests/ - v1.0.0 (2026-04-19): initial declarative command - v2.0.0 (2026-05-01): skill-arkitektur + JSON-pipeline + 3 hooks + auto-trigger (BREAKING) +- v2.1.0 (2026-05-01): modell-bevisst kontekstvindu — 4-stegs resolution-kjede (used_percentage → payload-size → model-map → 1M default). Fikser for-tidlig auto-handoff på Opus 4.7 diff --git a/plugins/graceful-handoff/README.md b/plugins/graceful-handoff/README.md index f5fb982..bd74137 100644 --- a/plugins/graceful-handoff/README.md +++ b/plugins/graceful-handoff/README.md @@ -80,15 +80,17 @@ hooks/hooks.json # Registrerer alt ## Auto-trigger ved kontekst-terskel — hvordan virker det? -Det finnes ingen Claude Code-primitiv som eksponerer real-time kontekst-prosent til hooks (Anthropic har closed feature requests #16988, #27969, #34340). Vi bruker derfor en **approksimasjon**: +Det finnes ingen Claude Code-primitiv som eksponerer real-time kontekst-prosent til hooks (Anthropic har closed feature requests #16988, #27969, #34340). Vi bruker derfor en **4-stegs resolution-kjede** (v2.1): -1. **Stop hook** fyrer etter hver model-respons og får `transcript_path` i payload -2. Hooken leser `wc -c ` for char-count -3. Estimerer tokens som `chars / 3.5` -4. Sammenligner mot `context_window_size` fra payload (default 200k, men respekterer 1M-windows) -5. Ved estimert ≥ 70%: spawner pipeline synkront +1. **Stop hook** fyrer etter hver model-respons og får `transcript_path` + payload +2. Resolver kontekstbruk i prioritert rekkefølge: + - `payload.context_window.used_percentage` — autoritativ, modell-agnostisk (ingen estimering) + - `payload.context_window.context_window_size` + transcript-estimat (`chars / 3.5`) + - Modell-mapping: `payload.model.id` → kjent vindusstørrelse (Opus 4.7=1M, Sonnet 4.6=200k, Haiku=200k) + - Siste fallback: 1M (oppdatert 2026-default — antar moderne modell) +3. Ved estimert ≥ 70%: spawner pipeline synkront -Dette estimatet kan avvike ±10% fra Claude's reelle telling. **70% er konservativt valgt** — gir buffer mot drift. +Estimerings-pathen (steg 2-4) kan avvike ±10% fra Claude's reelle telling. **70% er konservativt valgt** — gir buffer mot drift. Direct-pathen (`used_percentage`) har ingen drift. additionalContext-meldingen viser hvilken kilde som ble brukt (`direct`/`payload-size`/`model-map`/`default-1m`) for innsyn. For å hindre repeat-firing innen samme sesjon: lock-fil `/.handoff-lock-` opprettes ved første trigger. diff --git a/plugins/graceful-handoff/docs/brief-context-window-detection.md b/plugins/graceful-handoff/docs/brief-context-window-detection.md new file mode 100644 index 0000000..54b6125 --- /dev/null +++ b/plugins/graceful-handoff/docs/brief-context-window-detection.md @@ -0,0 +1,144 @@ +# Brief: Modell-bevisst kontekstvindu i graceful-handoff + +**Dato:** 2026-05-01 +**Status:** Forslag — ikke implementert +**Trigger:** Bruker oppdaget at Opus 4.7 har 1M kontekstvindu, ikke 200K. Plugin antar 200K i fallback. + +## Problem + +`hooks/scripts/stop-context-monitor.mjs:23` definerer: + +```js +const FALLBACK_WINDOW = 200_000; +``` + +Logikken (linje 76-77): + +```js +const windowSize = payload?.context_window?.context_window_size || FALLBACK_WINDOW; +const pctRaw = estimateUsedPct(transcriptPath, windowSize); +``` + +Hvis Stop-hook payload ikke leverer `context_window.context_window_size` — eller leverer `0`/`undefined` — beregner hooken brukt prosent mot 200K. På en Opus 4.7-sesjon med faktisk 1M-vindu betyr det: + +- Estimat treffer 70% når faktisk bruk er **~14%** (140K av 1M) +- Auto-handoff fyrer 5-7x for tidlig +- Bruker mister kontinuitet i lange sesjoner + +`statusline-monitor.mjs` har ikke samme problem — den leser `used_percentage` direkte fra payload og er modell-agnostisk. + +## Hvorfor 200K-fallback ble valgt + +Kommentar (linje 14-16): +> Token estimation: char_count / 3.5 → approximate tokens. Compares against +> context_window_size from payload (200000 fallback). Approximation is +> known to drift ±10% — 70% threshold is conservative buffer. + +Antakelsen ved skriving av v2.0: Claude-modeller har 200K-vindu som standard. Det stemmer ikke lenger. + +## Modell-landskap (verifisert 2026-05-01) + +| Modell | Kontekstvindu | +|--------|---------------| +| Opus 4.7 | **1M tokens** (standard, ingen long-context premium) | +| Sonnet 4.6 | 1M tokens (1M tier, beta) eller 200K | +| Haiku 4.5 | 200K tokens | +| Eldre Claude 3.x | 200K tokens | + +Kilder: +- https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7 +- https://platform.claude.com/docs/en/build-with-claude/context-windows + +## Løsningsalternativer + +### Alt 1 — Bedre fallback-detektering (minimal endring) + +Detekter modell fra payload (`payload?.model` eller lignende felt) og map til kontekstvindu: + +```js +const MODEL_WINDOWS = { + 'claude-opus-4-7': 1_000_000, + 'claude-sonnet-4-6': 200_000, // default, kan ha 1M tier + 'claude-haiku-4-5-20251001': 200_000, +}; + +function resolveWindowSize(payload) { + const fromPayload = payload?.context_window?.context_window_size; + if (fromPayload && fromPayload > 0) return fromPayload; + const model = payload?.model || payload?.session?.model; + if (model && MODEL_WINDOWS[model]) return MODEL_WINDOWS[model]; + return 1_000_000; // safer default i 2026 +} +``` + +**Pros:** Minimal kode, dekker 95% av tilfellene. +**Cons:** Hard-kodet modell-tabell må vedlikeholdes. Sonnet 4.6 1M-tier er ikke alltid aktiv — kan over-estimere. + +### Alt 2 — Foretrekk `used_percentage` fra payload (foretrukket) + +Hvis Stop-hook payload har `context_window.used_percentage` (slik statusline-payload har), bruk den direkte og hopp over transcript-estimat helt: + +```js +function estimateUsedPct(payload, transcriptPath, windowSize) { + const direct = payload?.context_window?.used_percentage; + if (typeof direct === 'number' && !isNaN(direct)) { + return direct / 100; // already a percent + } + // Fall back to transcript-size estimate + const stat = statSync(transcriptPath); + const tokens = stat.size / CHARS_PER_TOKEN; + return tokens / windowSize; +} +``` + +**Pros:** Bruker autoritativ kilde når tilgjengelig. Modell-agnostisk. +**Cons:** Krever verifisering av Stop-hook payload-schema — usikkert om feltet alltid er der. + +### Alt 3 — Kombinert (anbefalt) + +1. Foretrekk `used_percentage` fra payload (Alt 2) +2. Hvis ikke tilgjengelig, bruk `context_window_size` fra payload + transcript-estimat +3. Hvis heller ikke det, prøv modell-mapping (Alt 1) +4. Siste fallback: 1M (oppdatert default for 2026) + +Behold 70% terskel — den er prosent-basert og fungerer uavhengig av vindusstørrelse. + +## Sekundært designspørsmål + +Er fast 70% terskel optimal for både 200K og 1M? + +- 200K × 70% = 140K brukt → 60K headroom +- 1M × 70% = 700K brukt → 300K headroom + +Det er rimelig argumenterbart at terskelen bør være høyere ved store vinduer (f.eks. 75-80% for 1M-modeller), siden absolutt headroom betyr mer enn relativ. Men auto-compaction og prompt cache TTL er også prosent-baserte fenomener — så en universell 70% er sannsynligvis fortsatt riktig som default. Lavere prioritet enn fallback-fixen. + +## Verifisering + +Etter implementering, test: + +1. **Smoke test:** Opus 4.7-sesjon, kjør til ~50% (statusline viser pct), bekreft at auto-handoff IKKE trigger. +2. **Unit test:** Mock payload uten `context_window`, med `model: 'claude-opus-4-7'`, verifiser at `windowSize` resolver til 1M. +3. **Unit test:** Payload med `used_percentage: 75`, verifiser at funksjonen returnerer 0.75 uansett windowSize. +4. **Regresjon:** Eksisterende tester i `tests/` skal fortsatt passere. + +## Scope-vurdering + +- **Innenfor:** Fix av `stop-context-monitor.mjs` fallback. Oppdater inline-kommentar (linje 14-16) og README/CLAUDE.md hvis 200K nevnes der. +- **Utenfor:** Endring av terskel-strategi (70% → variabel). Kan vurderes som separat oppgave. +- **Utenfor:** Endring av `statusline-monitor.mjs` (fungerer allerede modell-agnostisk). + +## Estimat + +- Implementering: ~30 min (én fil + tester) +- Verifisering: ~15 min smoke + 15 min regresjon +- Doc-oppdatering: ~10 min (README, CLAUDE.md, CHANGELOG) +- Total: ~70 min, én sesjon + +## Neste skritt (når godkjent) + +1. Bekreft Stop-hook payload-schema (har den `used_percentage` eller bare `context_window_size`?) +2. Implementer Alt 3 i `stop-context-monitor.mjs` +3. Oppdater fallback-kommentaren +4. Skriv tester for nye fallback-veier +5. Bump til v2.1.0 (minor — bug-fix + behavioral change) +6. Oppdater CHANGELOG, README, CLAUDE.md, rot-README diff --git a/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs b/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs index 7339af3..1141e13 100644 --- a/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs +++ b/plugins/graceful-handoff/hooks/scripts/stop-context-monitor.mjs @@ -11,18 +11,30 @@ // Lock file at /.handoff-lock- prevents repeat // firing in the same session. // -// Token estimation: char_count / 3.5 → approximate tokens. Compares against -// context_window_size from payload (200000 fallback). Approximation is -// known to drift ±10% — 70% threshold is conservative buffer. +// Context resolution (4-step fallback, v2.1): +// 1. payload.context_window.used_percentage → authoritative, model-agnostic +// 2. payload.context_window.context_window_size + transcript estimate +// 3. MODEL_WINDOWS[payload.model.id] + transcript estimate +// 4. FALLBACK_WINDOW (1M, 2026 default) + transcript estimate +// Token estimation (steps 2-4): char_count / 3.5. Approximation drifts ±10%; +// 70% threshold is conservative buffer. import { readFileSync, statSync, writeFileSync, existsSync } from 'node:fs'; import { dirname, join } from 'node:path'; import { spawnSync } from 'node:child_process'; const THRESHOLD = 0.70; -const FALLBACK_WINDOW = 200_000; +const FALLBACK_WINDOW = 1_000_000; const CHARS_PER_TOKEN = 3.5; +// Model → context window mapping. Sonnet 4.6 has an opt-in 1M tier that is +// not always active and not exposed in payload — use the safer 200k default. +const MODEL_WINDOWS = { + 'claude-opus-4-7': 1_000_000, + 'claude-sonnet-4-6': 200_000, + 'claude-haiku-4-5-20251001': 200_000, +}; + // Test injection: tests can override these by setting on the export. export const __testHooks = { spawn: spawnSync, @@ -50,6 +62,32 @@ function estimateUsedPct(transcriptPath, windowSize) { } } +// Resolve context usage via the 4-step fallback chain documented above. +// Returns { pct, source } or null if pct cannot be computed. +export function resolveContextSource(payload, transcriptPath) { + const ctx = payload?.context_window; + const direct = ctx?.used_percentage; + if (typeof direct === 'number' && !isNaN(direct) && direct > 0) { + return { pct: direct / 100, source: 'direct' }; + } + + const payloadSize = ctx?.context_window_size; + if (typeof payloadSize === 'number' && payloadSize > 0) { + const pct = estimateUsedPct(transcriptPath, payloadSize); + return pct == null ? null : { pct, source: 'payload-size' }; + } + + const modelId = payload?.model?.id; + const mapped = modelId ? MODEL_WINDOWS[modelId] : undefined; + if (mapped) { + const pct = estimateUsedPct(transcriptPath, mapped); + return pct == null ? null : { pct, source: 'model-map' }; + } + + const pct = estimateUsedPct(transcriptPath, FALLBACK_WINDOW); + return pct == null ? null : { pct, source: 'default-1m' }; +} + function emit(output) { process.stdout.write(JSON.stringify(output)); } @@ -72,12 +110,12 @@ function main() { process.exit(0); } - // Read context_window_size from payload if available (research/03) - const windowSize = payload?.context_window?.context_window_size || FALLBACK_WINDOW; - const pctRaw = estimateUsedPct(transcriptPath, windowSize); - if (pctRaw == null) { + // 4-step resolution: used_percentage → payload-size → model-map → 1M fallback + const resolved = resolveContextSource(payload, transcriptPath); + if (resolved == null) { process.exit(0); } + const { pct: pctRaw, source } = resolved; const pct = Math.round(pctRaw * 100); if (pctRaw < THRESHOLD) { @@ -105,7 +143,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff aborted at est. ${pct}%: CLAUDE_PLUGIN_ROOT not set, cannot locate handoff-pipeline.mjs.`, + additionalContext: `⚠️ Auto-handoff aborted at est. ${pct}% [kilde: ${source}]: CLAUDE_PLUGIN_ROOT not set, cannot locate handoff-pipeline.mjs.`, }, }); process.exit(0); @@ -122,7 +160,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff FAILED at est. ${pct}%: ${result.error.message}. Run /graceful-handoff manually.`, + additionalContext: `⚠️ Auto-handoff FAILED at est. ${pct}% [kilde: ${source}]: ${result.error.message}. Run /graceful-handoff manually.`, }, }); process.exit(0); @@ -132,7 +170,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff pipeline exited ${result.status} at est. ${pct}%. stderr: ${(result.stderr || '').slice(0, 300)}. Run /graceful-handoff manually.`, + additionalContext: `⚠️ Auto-handoff pipeline exited ${result.status} at est. ${pct}% [kilde: ${source}]. stderr: ${(result.stderr || '').slice(0, 300)}. Run /graceful-handoff manually.`, }, }); process.exit(0); @@ -146,7 +184,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff at est. ${pct}%: pipeline output unparseable. Run /graceful-handoff manually.`, + additionalContext: `⚠️ Auto-handoff at est. ${pct}% [kilde: ${source}]: pipeline output unparseable. Run /graceful-handoff manually.`, }, }); process.exit(0); @@ -157,7 +195,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff at est. ${pct}% partially completed with errors: ${errors.join('; ')}. Artifact: ${pipelineResult.artifact_path || 'not written'}. Run git push manually.`, + additionalContext: `⚠️ Auto-handoff at est. ${pct}% [kilde: ${source}] partially completed with errors: ${errors.join('; ')}. Artifact: ${pipelineResult.artifact_path || 'not written'}. Run git push manually.`, }, }); process.exit(0); @@ -166,7 +204,7 @@ function main() { emit({ hookSpecificOutput: { hookEventName: 'Stop', - additionalContext: `⚠️ Auto-handoff utført ved estimert ${pct}%: artefakt ${pipelineResult.artifact_path}. Push gjenstår — kjør \`git push\` når du er klar.`, + additionalContext: `⚠️ Auto-handoff utført ved estimert ${pct}% [kilde: ${source}]: artefakt ${pipelineResult.artifact_path}. Push gjenstår — kjør \`git push\` når du er klar.`, }, }); process.exit(0); diff --git a/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs b/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs index edf8115..2ebeb8c 100644 --- a/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs +++ b/plugins/graceful-handoff/tests/hooks/stop-context-monitor.test.mjs @@ -138,3 +138,99 @@ test('CLAUDE_PLUGIN_ROOT missing: graceful error message', async () => { assert.match(res.stdout, /CLAUDE_PLUGIN_ROOT not set/); cleanup(dir); }); + +// --- v2.1: 4-step context resolution ----------------------------------- + +test('prefers used_percentage from payload over transcript estimate', async () => { + // Big transcript that would trigger via size-estimate (600k chars / 200k window ≈ 86%), + // but used_percentage says 25% — direct path must win. + const { dir, transcriptPath } = setup(600_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-8', + context_window: { context_window_size: 200_000, used_percentage: 25 }, + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent (used_percentage=25), got: ${res.stdout}`); + assert.ok(!existsSync(join(dir, '.handoff-lock-test-8')), 'no lock should be written when used_percentage is below threshold'); + cleanup(dir); +}); + +test('used_percentage triggers above threshold even with tiny transcript', async () => { + // Tiny transcript would never trigger via size-estimate, but used_percentage=75 must. + const { dir, transcriptPath } = setup(1_000); + const stubRoot = makeStubPluginRoot(); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-9', + context_window: { context_window_size: 200_000, used_percentage: 75 }, + }, { CLAUDE_PLUGIN_ROOT: stubRoot }); + assert.equal(res.code, 0); + assert.ok(existsSync(join(dir, '.handoff-lock-test-9')), 'lock file should be created when used_percentage ≥ 70%'); + assert.match(res.stdout, /Auto-handoff utført/i); + assert.match(res.stdout, /kilde: direct/, 'message should label source as direct'); + cleanup(dir); + cleanup(stubRoot); +}); + +test('model-mapping: Opus 4.7 resolves to 1M window (no trigger at 17%)', async () => { + // 600k chars / 3.5 = 171k tokens / 1M = 17% — well under threshold. + // No context_window in payload — must fall through to model-map. + const { dir, transcriptPath } = setup(600_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-10', + model: { id: 'claude-opus-4-7' }, + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent on Opus 4.7 1M window at 17%, got: ${res.stdout}`); + assert.ok(!existsSync(join(dir, '.handoff-lock-test-10'))); + cleanup(dir); +}); + +test('model-mapping: Haiku resolves to 200k window (triggers at 86%)', async () => { + // 600k chars / 3.5 = 171k tokens / 200k = 86% — above threshold. + const { dir, transcriptPath } = setup(600_000); + const stubRoot = makeStubPluginRoot(); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-11', + model: { id: 'claude-haiku-4-5-20251001' }, + }, { CLAUDE_PLUGIN_ROOT: stubRoot }); + assert.equal(res.code, 0); + assert.ok(existsSync(join(dir, '.handoff-lock-test-11')), 'lock should fire on Haiku 200k window at 86%'); + assert.match(res.stdout, /kilde: model-map/, 'message should label source as model-map'); + cleanup(dir); + cleanup(stubRoot); +}); + +test('default fallback (1M) when neither used_percentage nor model is in payload', async () => { + // 600k chars / 3.5 = 171k tokens / 1M = 17% — must NOT trigger with new 1M default. + const { dir, transcriptPath } = setup(600_000); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-12', + // intentionally no context_window, no model + }, { CLAUDE_PLUGIN_ROOT: PLUGIN_ROOT }); + assert.equal(res.code, 0); + assert.equal(res.stdout.trim(), '', `expected silent on default 1M fallback at 17%, got: ${res.stdout}`); + assert.ok(!existsSync(join(dir, '.handoff-lock-test-12'))); + cleanup(dir); +}); + +test('null used_percentage falls through to size-based path', async () => { + // Early-session payloads may have used_percentage: null. We must NOT treat that + // as 0 and skip the size-estimate. With size=200k and 600k chars (~86%) we trigger. + const { dir, transcriptPath } = setup(600_000); + const stubRoot = makeStubPluginRoot(); + const res = await runHookWithEnv(HOOK, { + transcript_path: transcriptPath, + session_id: 'test-13', + context_window: { context_window_size: 200_000, used_percentage: null }, + }, { CLAUDE_PLUGIN_ROOT: stubRoot }); + assert.equal(res.code, 0); + assert.ok(existsSync(join(dir, '.handoff-lock-test-13')), 'lock should fire via size-fallback when used_percentage is null'); + assert.match(res.stdout, /kilde: payload-size/, 'message should label source as payload-size'); + cleanup(dir); + cleanup(stubRoot); +}); diff --git a/plugins/graceful-handoff/tests/plugin-manifest.test.mjs b/plugins/graceful-handoff/tests/plugin-manifest.test.mjs index 1ba3967..ae8347c 100644 --- a/plugins/graceful-handoff/tests/plugin-manifest.test.mjs +++ b/plugins/graceful-handoff/tests/plugin-manifest.test.mjs @@ -1,4 +1,4 @@ -// plugin-manifest.test.mjs — verify plugin.json schema for v2.0 +// plugin-manifest.test.mjs — verify plugin.json schema for v2.1 import { test } from 'node:test'; import { strict as assert } from 'node:assert'; @@ -10,9 +10,17 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); const MANIFEST = join(__dirname, '..', '.claude-plugin', 'plugin.json'); const CHANGELOG = join(__dirname, '..', 'CHANGELOG.md'); -test('plugin.json version is 2.0.0', () => { +test('plugin.json version is 2.1.0', () => { const m = JSON.parse(readFileSync(MANIFEST, 'utf-8')); - assert.equal(m.version, '2.0.0'); + assert.equal(m.version, '2.1.0'); +}); + +test('CHANGELOG has [2.1.0] entry mentioning model-aware fix', () => { + const c = readFileSync(CHANGELOG, 'utf-8'); + assert.match(c, /## \[2\.1\.0\]/); + const match = c.match(/## \[2\.1\.0\][\s\S]*?(?=## \[2\.0\.0\]|$)/); + assert.ok(match, '[2.1.0] section missing'); + assert.match(match[0], /modell-bevisst|model-aware|resolveContextSource/i); }); test('plugin.json does NOT include auto_discover (not in documented schema)', () => {