From e3aba9bab5bdaecd6e458b52814a3063700a038c Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 17 Apr 2026 14:40:50 +0200 Subject: [PATCH] feat(hooks): add pre-compact-scan hook skeleton --- .../hooks/scripts/pre-compact-scan.mjs | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 plugins/llm-security/hooks/scripts/pre-compact-scan.mjs diff --git a/plugins/llm-security/hooks/scripts/pre-compact-scan.mjs b/plugins/llm-security/hooks/scripts/pre-compact-scan.mjs new file mode 100644 index 0000000..4c31cf3 --- /dev/null +++ b/plugins/llm-security/hooks/scripts/pre-compact-scan.mjs @@ -0,0 +1,164 @@ +#!/usr/bin/env node +// Hook: pre-compact-scan.mjs +// Event: PreCompact +// Purpose: Scan conversation transcript for injection patterns and credentials +// BEFORE the harness compacts the context. Prevents poisoned content +// from surviving compaction in a more compact form where the model +// can no longer see the surrounding injection context. +// +// Protocol: +// - Read JSON from stdin: { session_id, transcript_path, cwd, hook_event_name, +// trigger, compaction_trigger } +// - Both `trigger` (official docs) and `compaction_trigger` (research-brief) +// are read defensively. +// - Read transcript file (JSONL), read at most the last 500 KB for <500ms +// latency on large transcripts. +// - Run injection-patterns + a small set of credential regexes against +// the transcript content. +// - Mode from env var LLM_SECURITY_PRECOMPACT_MODE: +// * "off" → exit 0, no scan +// * "warn" → exit 0, write systemMessage JSON to stdout if findings +// * "block" → exit 2 if findings, exit 0 otherwise +// Default: warn. +// +// Exit codes: +// 0 = allow compaction to proceed +// 2 = block compaction (only in block mode with findings) +// +// Env: LLM_SECURITY_PRECOMPACT_MODE=block|warn|off +// LLM_SECURITY_PRECOMPACT_MAX_BYTES (default 512000) + +import { readFileSync, statSync, openSync, readSync, closeSync } from 'node:fs'; +import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs'; + +const VALID_MODES = new Set(['block', 'warn', 'off']); +const modeEnv = process.env.LLM_SECURITY_PRECOMPACT_MODE; +const mode = VALID_MODES.has(modeEnv) ? modeEnv : 'warn'; + +if (mode === 'off') { + process.exit(0); +} + +const MAX_BYTES = (() => { + const n = parseInt(process.env.LLM_SECURITY_PRECOMPACT_MAX_BYTES || '', 10); + return Number.isFinite(n) && n > 0 ? n : 512_000; +})(); + +const SECRET_PATTERNS = [ + { name: 'AWS Access Key ID', pattern: /AKIA[0-9A-Z]{16}/ }, + { name: 'GitHub Token', pattern: /(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}/ }, + { name: 'npm Token', pattern: /npm_[A-Za-z0-9]{36}/ }, + { name: 'Private Key PEM Block', pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/ }, + { name: 'Generic credential assignment', pattern: /(?:password|passwd|secret|token|api[_-]?key)\s*[=:]\s*['"][^'"]{8,}['"]/i }, + { name: 'Authorization bearer token', pattern: /[Bb]earer [A-Za-z0-9\-._~+/]{20,}/ }, +]; + +function readStdinSync() { + try { + return readFileSync(0, 'utf8'); + } catch { + return ''; + } +} + +function readTailCapped(filePath, maxBytes) { + const st = statSync(filePath); + if (st.size <= maxBytes) { + return readFileSync(filePath, 'utf8'); + } + const fd = openSync(filePath, 'r'); + try { + const buf = Buffer.alloc(maxBytes); + readSync(fd, buf, 0, maxBytes, st.size - maxBytes); + return buf.toString('utf8'); + } finally { + closeSync(fd); + } +} + +function extractTextFromTranscript(raw) { + const chunks = []; + for (const line of raw.split(/\r?\n/)) { + if (!line) continue; + try { + const obj = JSON.parse(line); + const content = obj?.message?.content ?? obj?.content; + if (typeof content === 'string') { + chunks.push(content); + } else if (Array.isArray(content)) { + for (const block of content) { + if (typeof block?.text === 'string') chunks.push(block.text); + else if (typeof block === 'string') chunks.push(block); + } + } + } catch { + chunks.push(line); + } + } + return chunks.join('\n'); +} + +function scanForSecrets(text) { + const findings = []; + for (const { name, pattern } of SECRET_PATTERNS) { + if (pattern.test(text)) { + findings.push({ type: 'secret', label: name }); + } + } + return findings; +} + +function emit(obj) { + try { + process.stdout.write(JSON.stringify(obj)); + } catch { + // swallow — hook must never crash harness + } +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +let input; +try { + input = JSON.parse(readStdinSync() || '{}'); +} catch { + process.exit(0); +} + +const transcriptPath = input?.transcript_path; +const trigger = input?.trigger ?? input?.compaction_trigger ?? 'unknown'; + +if (!transcriptPath) { + process.exit(0); +} + +let transcriptText = ''; +try { + const raw = readTailCapped(transcriptPath, MAX_BYTES); + transcriptText = extractTextFromTranscript(raw); +} catch { + process.exit(0); +} + +const injectionFindings = scanForInjection(transcriptText) || []; +const secretFindings = scanForSecrets(transcriptText); +const allFindings = [...injectionFindings, ...secretFindings]; + +if (allFindings.length === 0) { + process.exit(0); +} + +const labels = allFindings.slice(0, 8).map((f) => f.label || f.type || 'finding').join(', '); +const summary = + `pre-compact-scan (${trigger}): ${allFindings.length} finding(s) in transcript. ` + + `Compaction may preserve poisoned content in condensed form. Top: ${labels}.`; + +if (mode === 'block') { + emit({ decision: 'block', reason: summary }); + process.exit(2); +} + +// warn mode +emit({ systemMessage: summary }); +process.exit(0);