feat(hooks): add pre-compact-scan hook skeleton

This commit is contained in:
Kjell Tore Guttormsen 2026-04-17 14:40:50 +02:00
commit e3aba9bab5

View file

@ -0,0 +1,164 @@
#!/usr/bin/env node
// Hook: pre-compact-scan.mjs
// Event: PreCompact
// Purpose: Scan conversation transcript for injection patterns and credentials
// BEFORE the harness compacts the context. Prevents poisoned content
// from surviving compaction in a more compact form where the model
// can no longer see the surrounding injection context.
//
// Protocol:
// - Read JSON from stdin: { session_id, transcript_path, cwd, hook_event_name,
// trigger, compaction_trigger }
// - Both `trigger` (official docs) and `compaction_trigger` (research-brief)
// are read defensively.
// - Read transcript file (JSONL), read at most the last 500 KB for <500ms
// latency on large transcripts.
// - Run injection-patterns + a small set of credential regexes against
// the transcript content.
// - Mode from env var LLM_SECURITY_PRECOMPACT_MODE:
// * "off" → exit 0, no scan
// * "warn" → exit 0, write systemMessage JSON to stdout if findings
// * "block" → exit 2 if findings, exit 0 otherwise
// Default: warn.
//
// Exit codes:
// 0 = allow compaction to proceed
// 2 = block compaction (only in block mode with findings)
//
// Env: LLM_SECURITY_PRECOMPACT_MODE=block|warn|off
// LLM_SECURITY_PRECOMPACT_MAX_BYTES (default 512000)
import { readFileSync, statSync, openSync, readSync, closeSync } from 'node:fs';
import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs';
const VALID_MODES = new Set(['block', 'warn', 'off']);
const modeEnv = process.env.LLM_SECURITY_PRECOMPACT_MODE;
const mode = VALID_MODES.has(modeEnv) ? modeEnv : 'warn';
if (mode === 'off') {
process.exit(0);
}
const MAX_BYTES = (() => {
const n = parseInt(process.env.LLM_SECURITY_PRECOMPACT_MAX_BYTES || '', 10);
return Number.isFinite(n) && n > 0 ? n : 512_000;
})();
const SECRET_PATTERNS = [
{ name: 'AWS Access Key ID', pattern: /AKIA[0-9A-Z]{16}/ },
{ name: 'GitHub Token', pattern: /(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}/ },
{ name: 'npm Token', pattern: /npm_[A-Za-z0-9]{36}/ },
{ name: 'Private Key PEM Block', pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/ },
{ name: 'Generic credential assignment', pattern: /(?:password|passwd|secret|token|api[_-]?key)\s*[=:]\s*['"][^'"]{8,}['"]/i },
{ name: 'Authorization bearer token', pattern: /[Bb]earer [A-Za-z0-9\-._~+/]{20,}/ },
];
function readStdinSync() {
try {
return readFileSync(0, 'utf8');
} catch {
return '';
}
}
function readTailCapped(filePath, maxBytes) {
const st = statSync(filePath);
if (st.size <= maxBytes) {
return readFileSync(filePath, 'utf8');
}
const fd = openSync(filePath, 'r');
try {
const buf = Buffer.alloc(maxBytes);
readSync(fd, buf, 0, maxBytes, st.size - maxBytes);
return buf.toString('utf8');
} finally {
closeSync(fd);
}
}
function extractTextFromTranscript(raw) {
const chunks = [];
for (const line of raw.split(/\r?\n/)) {
if (!line) continue;
try {
const obj = JSON.parse(line);
const content = obj?.message?.content ?? obj?.content;
if (typeof content === 'string') {
chunks.push(content);
} else if (Array.isArray(content)) {
for (const block of content) {
if (typeof block?.text === 'string') chunks.push(block.text);
else if (typeof block === 'string') chunks.push(block);
}
}
} catch {
chunks.push(line);
}
}
return chunks.join('\n');
}
function scanForSecrets(text) {
const findings = [];
for (const { name, pattern } of SECRET_PATTERNS) {
if (pattern.test(text)) {
findings.push({ type: 'secret', label: name });
}
}
return findings;
}
function emit(obj) {
try {
process.stdout.write(JSON.stringify(obj));
} catch {
// swallow — hook must never crash harness
}
}
// ---------------------------------------------------------------------------
// main
// ---------------------------------------------------------------------------
let input;
try {
input = JSON.parse(readStdinSync() || '{}');
} catch {
process.exit(0);
}
const transcriptPath = input?.transcript_path;
const trigger = input?.trigger ?? input?.compaction_trigger ?? 'unknown';
if (!transcriptPath) {
process.exit(0);
}
let transcriptText = '';
try {
const raw = readTailCapped(transcriptPath, MAX_BYTES);
transcriptText = extractTextFromTranscript(raw);
} catch {
process.exit(0);
}
const injectionFindings = scanForInjection(transcriptText) || [];
const secretFindings = scanForSecrets(transcriptText);
const allFindings = [...injectionFindings, ...secretFindings];
if (allFindings.length === 0) {
process.exit(0);
}
const labels = allFindings.slice(0, 8).map((f) => f.label || f.type || 'finding').join(', ');
const summary =
`pre-compact-scan (${trigger}): ${allFindings.length} finding(s) in transcript. ` +
`Compaction may preserve poisoned content in condensed form. Top: ${labels}.`;
if (mode === 'block') {
emit({ decision: 'block', reason: summary });
process.exit(2);
}
// warn mode
emit({ systemMessage: summary });
process.exit(0);