164 lines
5 KiB
JavaScript
164 lines
5 KiB
JavaScript
#!/usr/bin/env node
|
|
// Hook: pre-compact-scan.mjs
|
|
// Event: PreCompact
|
|
// Purpose: Scan conversation transcript for injection patterns and credentials
|
|
// BEFORE the harness compacts the context. Prevents poisoned content
|
|
// from surviving compaction in a more compact form where the model
|
|
// can no longer see the surrounding injection context.
|
|
//
|
|
// Protocol:
|
|
// - Read JSON from stdin: { session_id, transcript_path, cwd, hook_event_name,
|
|
// trigger, compaction_trigger }
|
|
// - Both `trigger` (official docs) and `compaction_trigger` (research-brief)
|
|
// are read defensively.
|
|
// - Read transcript file (JSONL), read at most the last 500 KB for <500ms
|
|
// latency on large transcripts.
|
|
// - Run injection-patterns + a small set of credential regexes against
|
|
// the transcript content.
|
|
// - Mode from env var LLM_SECURITY_PRECOMPACT_MODE:
|
|
// * "off" → exit 0, no scan
|
|
// * "warn" → exit 0, write systemMessage JSON to stdout if findings
|
|
// * "block" → exit 2 if findings, exit 0 otherwise
|
|
// Default: warn.
|
|
//
|
|
// Exit codes:
|
|
// 0 = allow compaction to proceed
|
|
// 2 = block compaction (only in block mode with findings)
|
|
//
|
|
// Env: LLM_SECURITY_PRECOMPACT_MODE=block|warn|off
|
|
// LLM_SECURITY_PRECOMPACT_MAX_BYTES (default 512000)
|
|
|
|
import { readFileSync, statSync, openSync, readSync, closeSync } from 'node:fs';
|
|
import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs';
|
|
|
|
const VALID_MODES = new Set(['block', 'warn', 'off']);
|
|
const modeEnv = process.env.LLM_SECURITY_PRECOMPACT_MODE;
|
|
const mode = VALID_MODES.has(modeEnv) ? modeEnv : 'warn';
|
|
|
|
if (mode === 'off') {
|
|
process.exit(0);
|
|
}
|
|
|
|
const MAX_BYTES = (() => {
|
|
const n = parseInt(process.env.LLM_SECURITY_PRECOMPACT_MAX_BYTES || '', 10);
|
|
return Number.isFinite(n) && n > 0 ? n : 512_000;
|
|
})();
|
|
|
|
const SECRET_PATTERNS = [
|
|
{ name: 'AWS Access Key ID', pattern: /AKIA[0-9A-Z]{16}/ },
|
|
{ name: 'GitHub Token', pattern: /(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36,}/ },
|
|
{ name: 'npm Token', pattern: /npm_[A-Za-z0-9]{36}/ },
|
|
{ name: 'Private Key PEM Block', pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/ },
|
|
{ name: 'Generic credential assignment', pattern: /(?:password|passwd|secret|token|api[_-]?key)\s*[=:]\s*['"][^'"]{8,}['"]/i },
|
|
{ name: 'Authorization bearer token', pattern: /[Bb]earer [A-Za-z0-9\-._~+/]{20,}/ },
|
|
];
|
|
|
|
function readStdinSync() {
|
|
try {
|
|
return readFileSync(0, 'utf8');
|
|
} catch {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
function readTailCapped(filePath, maxBytes) {
|
|
const st = statSync(filePath);
|
|
if (st.size <= maxBytes) {
|
|
return readFileSync(filePath, 'utf8');
|
|
}
|
|
const fd = openSync(filePath, 'r');
|
|
try {
|
|
const buf = Buffer.alloc(maxBytes);
|
|
readSync(fd, buf, 0, maxBytes, st.size - maxBytes);
|
|
return buf.toString('utf8');
|
|
} finally {
|
|
closeSync(fd);
|
|
}
|
|
}
|
|
|
|
function extractTextFromTranscript(raw) {
|
|
const chunks = [];
|
|
for (const line of raw.split(/\r?\n/)) {
|
|
if (!line) continue;
|
|
try {
|
|
const obj = JSON.parse(line);
|
|
const content = obj?.message?.content ?? obj?.content;
|
|
if (typeof content === 'string') {
|
|
chunks.push(content);
|
|
} else if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (typeof block?.text === 'string') chunks.push(block.text);
|
|
else if (typeof block === 'string') chunks.push(block);
|
|
}
|
|
}
|
|
} catch {
|
|
chunks.push(line);
|
|
}
|
|
}
|
|
return chunks.join('\n');
|
|
}
|
|
|
|
function scanForSecrets(text) {
|
|
const findings = [];
|
|
for (const { name, pattern } of SECRET_PATTERNS) {
|
|
if (pattern.test(text)) {
|
|
findings.push({ type: 'secret', label: name });
|
|
}
|
|
}
|
|
return findings;
|
|
}
|
|
|
|
function emit(obj) {
|
|
try {
|
|
process.stdout.write(JSON.stringify(obj));
|
|
} catch {
|
|
// swallow — hook must never crash harness
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// main
|
|
// ---------------------------------------------------------------------------
|
|
let input;
|
|
try {
|
|
input = JSON.parse(readStdinSync() || '{}');
|
|
} catch {
|
|
process.exit(0);
|
|
}
|
|
|
|
const transcriptPath = input?.transcript_path;
|
|
const trigger = input?.trigger ?? input?.compaction_trigger ?? 'unknown';
|
|
|
|
if (!transcriptPath) {
|
|
process.exit(0);
|
|
}
|
|
|
|
let transcriptText = '';
|
|
try {
|
|
const raw = readTailCapped(transcriptPath, MAX_BYTES);
|
|
transcriptText = extractTextFromTranscript(raw);
|
|
} catch {
|
|
process.exit(0);
|
|
}
|
|
|
|
const injectionFindings = scanForInjection(transcriptText) || [];
|
|
const secretFindings = scanForSecrets(transcriptText);
|
|
const allFindings = [...injectionFindings, ...secretFindings];
|
|
|
|
if (allFindings.length === 0) {
|
|
process.exit(0);
|
|
}
|
|
|
|
const labels = allFindings.slice(0, 8).map((f) => f.label || f.type || 'finding').join(', ');
|
|
const summary =
|
|
`pre-compact-scan (${trigger}): ${allFindings.length} finding(s) in transcript. ` +
|
|
`Compaction may preserve poisoned content in condensed form. Top: ${labels}.`;
|
|
|
|
if (mode === 'block') {
|
|
emit({ decision: 'block', reason: summary });
|
|
process.exit(2);
|
|
}
|
|
|
|
// warn mode
|
|
emit({ systemMessage: summary });
|
|
process.exit(0);
|