1036 lines
31 KiB
JavaScript
1036 lines
31 KiB
JavaScript
#!/usr/bin/env node
|
||
// auto-cleaner.mjs — Deterministic remediation engine for security findings
|
||
// Zero external dependencies. Reuses scanners/lib/ shared library.
|
||
//
|
||
// CLI: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]
|
||
//
|
||
// Fix operations are pure functions (content in → content out).
|
||
// Atomic writes: write to .clean-tmp, validate, rename over original.
|
||
// Content-based matching (not line-number based) for robustness.
|
||
|
||
import { readFile, writeFile, rename, unlink, stat } from 'node:fs/promises';
|
||
import { writeFileSync, unlinkSync } from 'node:fs';
|
||
import { resolve, extname, join, dirname } from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
import { execSync } from 'node:child_process';
|
||
import { fixResult, cleanEnvelope } from './lib/output.mjs';
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Classification: finding → tier
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* Classify a finding into a remediation tier.
|
||
* @param {object} finding - Scanner finding object
|
||
* @returns {'auto'|'semi_auto'|'manual'|'skip'}
|
||
*/
|
||
function classifyFinding(f) {
|
||
const s = f.scanner || '';
|
||
const title = (f.title || '').toLowerCase();
|
||
const desc = (f.description || '').toLowerCase();
|
||
const file = (f.file || '').toLowerCase();
|
||
const combined = `${title} ${desc}`;
|
||
|
||
// --- UNI findings ---
|
||
if (s === 'UNI') {
|
||
if (title.includes('zero-width')) return 'auto';
|
||
if (title.includes('unicode tag') || title.includes('steganograph')) return 'auto';
|
||
if (title.includes('bidi')) return 'auto';
|
||
if (title.includes('homoglyph')) {
|
||
// Code files → auto, markdown → semi_auto
|
||
const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
|
||
return codeExts.some(ext => file.endsWith(ext)) ? 'auto' : 'semi_auto';
|
||
}
|
||
return 'semi_auto';
|
||
}
|
||
|
||
// --- ENT findings ---
|
||
if (s === 'ENT') return 'semi_auto';
|
||
|
||
// --- PRM findings ---
|
||
if (s === 'PRM') {
|
||
if (title.includes('haiku') && combined.includes('sensitive')) return 'auto';
|
||
if (title.includes('ghost hook') || combined.includes('script not found')) return 'semi_auto';
|
||
if (combined.includes('read-only') && combined.includes('write')) return 'semi_auto';
|
||
if (combined.includes('dangerous') && combined.includes('triple')) return 'semi_auto';
|
||
return 'manual';
|
||
}
|
||
|
||
// --- DEP findings ---
|
||
if (s === 'DEP') {
|
||
if (combined.includes('cve') && !combined.includes('fix available')) return 'manual';
|
||
return 'semi_auto';
|
||
}
|
||
|
||
// --- TNT findings ---
|
||
if (s === 'TNT') return 'manual';
|
||
|
||
// --- GIT findings ---
|
||
if (s === 'GIT') {
|
||
if (combined.includes('suspicious domain') && combined.includes('post-commit')) return 'auto';
|
||
if (combined.includes('hook') && combined.includes('network')) return 'semi_auto';
|
||
return 'skip';
|
||
}
|
||
|
||
// --- NET findings ---
|
||
if (s === 'NET') {
|
||
if (f.severity === 'high' && combined.includes('suspicious')) return 'auto';
|
||
if (combined.includes('loopback') || combined.includes('127.0.0.1')) return 'auto';
|
||
if (combined.includes('ip-based url') && f.severity !== 'info') return 'semi_auto';
|
||
if (f.severity === 'info') return 'manual';
|
||
return 'semi_auto';
|
||
}
|
||
|
||
// --- LLM-detected findings (from skill-scanner-agent) ---
|
||
if (s === 'SKL' || s === 'MCP') {
|
||
if (combined.includes('html comment injection') || combined.includes('<!-- agent')) return 'auto';
|
||
if (combined.includes('system:') && combined.includes('header')) return 'auto';
|
||
if (combined.includes('persistence') || combined.includes('cron') ||
|
||
combined.includes('launchagent') || combined.includes('zshrc')) return 'auto';
|
||
if (combined.includes('privilege escalation') || combined.includes('hooks.json') ||
|
||
combined.includes('settings.json')) return 'auto';
|
||
if (combined.includes('registry') && combined.includes('redirect')) return 'auto';
|
||
if (combined.includes('injection') && combined.includes('frontmatter')) return 'auto';
|
||
if (combined.includes('exfiltration') || combined.includes('suspicious')) return 'auto';
|
||
if (combined.includes('credential') && combined.includes('env')) return 'auto';
|
||
if (combined.includes('self-modif') || combined.includes('self-update')) return 'auto';
|
||
if (combined.includes('credential access')) return 'semi_auto';
|
||
if (combined.includes('unannounced') && combined.includes('install')) return 'semi_auto';
|
||
if (combined.includes('hidden directive')) return 'semi_auto';
|
||
return 'manual';
|
||
}
|
||
|
||
return 'manual';
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Fix operations — pure functions: content in → content out
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/** Zero-width characters to strip (preserve BOM at pos 0) */
|
||
const ZERO_WIDTH = new Set([0x200B, 0x200C, 0x200D, 0xFEFF, 0x00AD]);
|
||
|
||
/** Unicode Tags block U+E0001–U+E007F */
|
||
const TAG_START = 0xE0001;
|
||
const TAG_END = 0xE007F;
|
||
|
||
/** BIDI control codepoints */
|
||
const BIDI = new Set([0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x2066, 0x2067, 0x2068, 0x2069]);
|
||
|
||
/** Cyrillic → Latin confusable mapping */
|
||
const CYRILLIC_TO_LATIN = new Map([
|
||
[0x0430, 'a'], [0x0435, 'e'], [0x043E, 'o'], [0x0441, 'c'],
|
||
[0x0440, 'p'], [0x0443, 'y'], [0x0445, 'x'],
|
||
[0x0410, 'A'], [0x0415, 'E'], [0x041E, 'O'], [0x0421, 'C'],
|
||
[0x0420, 'P'], [0x0425, 'X'],
|
||
]);
|
||
|
||
/** Suspicious domains to strip */
|
||
const EXFIL_DOMAINS = [
|
||
'webhook.site', 'ngrok', 'requestbin', 'pipedream.net',
|
||
'pastebin.com', 'bit.ly', 'hookbin.com', 'beeceptor.com',
|
||
'smee.io', 'transfer.sh', 'file.io', 'paste.ee', 'hastebin.com',
|
||
'tinyurl.com', 'is.gd', 'goo.gl', 'cutt.ly',
|
||
];
|
||
|
||
/**
|
||
* Strip zero-width characters from content.
|
||
* Preserves BOM at position 0.
|
||
*/
|
||
function stripZeroWidth(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let changed = false;
|
||
|
||
for (let i = 0; i < lines.length; i++) {
|
||
let line = lines[i];
|
||
let newLine = '';
|
||
let pos = 0;
|
||
|
||
for (const char of line) {
|
||
const cp = char.codePointAt(0);
|
||
// Preserve BOM (U+FEFF) only at file start (line 0, pos 0)
|
||
if (ZERO_WIDTH.has(cp) && !(cp === 0xFEFF && i === 0 && pos === 0)) {
|
||
changed = true;
|
||
} else {
|
||
newLine += char;
|
||
}
|
||
pos += char.length;
|
||
}
|
||
|
||
// Keep the line (even if empty after stripping — preserves structure)
|
||
result.push(newLine);
|
||
}
|
||
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip Unicode Tag block codepoints (steganography).
|
||
*/
|
||
function stripUnicodeTags(content) {
|
||
let changed = false;
|
||
const result = [...content].filter(char => {
|
||
const cp = char.codePointAt(0);
|
||
if (cp >= TAG_START && cp <= TAG_END) {
|
||
changed = true;
|
||
return false;
|
||
}
|
||
return true;
|
||
}).join('');
|
||
return changed ? result : null;
|
||
}
|
||
|
||
/**
|
||
* Strip BIDI override characters.
|
||
*/
|
||
function stripBidi(content) {
|
||
let changed = false;
|
||
const result = [...content].filter(char => {
|
||
const cp = char.codePointAt(0);
|
||
if (BIDI.has(cp)) {
|
||
changed = true;
|
||
return false;
|
||
}
|
||
return true;
|
||
}).join('');
|
||
return changed ? result : null;
|
||
}
|
||
|
||
/**
|
||
* Normalize Cyrillic confusables to Latin equivalents.
|
||
* Only applied to code files (.js, .ts, .mjs, .py, etc.)
|
||
*/
|
||
function normalizeHomoglyphs(content) {
|
||
let changed = false;
|
||
const result = [...content].map(char => {
|
||
const cp = char.codePointAt(0);
|
||
const latin = CYRILLIC_TO_LATIN.get(cp);
|
||
if (latin) {
|
||
changed = true;
|
||
return latin;
|
||
}
|
||
return char;
|
||
}).join('');
|
||
return changed ? result : null;
|
||
}
|
||
|
||
/**
|
||
* Strip HTML comment injections (<!-- AGENT:..., <!-- HIDDEN:..., <!-- SYSTEM:...).
|
||
*/
|
||
function stripHtmlCommentInjections(content) {
|
||
const pattern = /<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g;
|
||
const result = content.replace(pattern, '');
|
||
return result !== content ? result : null;
|
||
}
|
||
|
||
/**
|
||
* Strip spoofed "# SYSTEM:" headers (not inside code fences).
|
||
*/
|
||
function stripSystemHeaders(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let inCodeFence = false;
|
||
let changed = false;
|
||
|
||
for (const line of lines) {
|
||
if (line.trimStart().startsWith('```')) {
|
||
inCodeFence = !inCodeFence;
|
||
}
|
||
if (!inCodeFence && /^#\s*SYSTEM\s*:/i.test(line)) {
|
||
changed = true;
|
||
continue; // Remove this line
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip persistence mechanism code blocks (crontab, LaunchAgent, systemctl, zshrc writes).
|
||
*/
|
||
function stripPersistence(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let inMaliciousBlock = false;
|
||
let inCodeFence = false;
|
||
let changed = false;
|
||
|
||
const PERSISTENCE_PATTERNS = [
|
||
/crontab\s+-/,
|
||
/LaunchAgent/i,
|
||
/systemctl\s+(enable|start|restart)/,
|
||
/>>?\s*~\/\.(?:zshrc|bashrc|profile|bash_profile)/,
|
||
/Library\/LaunchAgents/,
|
||
];
|
||
|
||
for (const line of lines) {
|
||
const trimmed = line.trimStart();
|
||
|
||
if (trimmed.startsWith('```')) {
|
||
if (!inCodeFence) {
|
||
inCodeFence = true;
|
||
// Check if next lines contain persistence patterns
|
||
result.push(line);
|
||
continue;
|
||
} else {
|
||
inCodeFence = false;
|
||
if (inMaliciousBlock) {
|
||
inMaliciousBlock = false;
|
||
changed = true;
|
||
continue; // Skip the closing ```
|
||
}
|
||
result.push(line);
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if (inCodeFence && !inMaliciousBlock) {
|
||
if (PERSISTENCE_PATTERNS.some(p => p.test(line))) {
|
||
inMaliciousBlock = true;
|
||
changed = true;
|
||
// Remove the opening ``` we already pushed
|
||
result.pop();
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if (inMaliciousBlock) {
|
||
continue; // Skip lines inside malicious code block
|
||
}
|
||
|
||
// Also catch inline persistence commands outside code fences
|
||
if (!inCodeFence && PERSISTENCE_PATTERNS.some(p => p.test(line))) {
|
||
changed = true;
|
||
continue;
|
||
}
|
||
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip privilege escalation writes (to hooks.json, settings.json, CLAUDE.md).
|
||
*/
|
||
function stripEscalation(content) {
|
||
const ESCALATION_TARGETS = [
|
||
/hooks\/hooks\.json/,
|
||
/~\/\.claude\/settings\.json/,
|
||
/\.claude\/settings\.json/,
|
||
/CLAUDE\.md/i,
|
||
];
|
||
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let changed = false;
|
||
|
||
for (const line of lines) {
|
||
if (ESCALATION_TARGETS.some(p => p.test(line)) &&
|
||
(/modif|write|update|overwrite|create|set|add|push|insert|append|config/i.test(line))) {
|
||
changed = true;
|
||
continue;
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip non-standard registry redirections (npm config set registry, --index-url).
|
||
*/
|
||
function stripRegistryRedirect(content) {
|
||
const patterns = [
|
||
/npm\s+config\s+set\s+registry\s+(?!https:\/\/registry\.npmjs\.org)/,
|
||
/--index-url\s+(?!https:\/\/pypi\.org)/,
|
||
/--extra-index-url\s+https?:\/\/(?!pypi\.org)/,
|
||
];
|
||
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let changed = false;
|
||
|
||
for (const line of lines) {
|
||
if (patterns.some(p => p.test(line))) {
|
||
changed = true;
|
||
continue;
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip lines containing suspicious exfiltration domain URLs.
|
||
*/
|
||
function stripSuspiciousUrls(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let changed = false;
|
||
|
||
for (const line of lines) {
|
||
const lower = line.toLowerCase();
|
||
if (EXFIL_DOMAINS.some(d => lower.includes(d)) && /https?:\/\//.test(line)) {
|
||
changed = true;
|
||
continue;
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Normalize loopback IPs to localhost.
|
||
*/
|
||
function normalizeLoopback(content) {
|
||
const pattern = /http:\/\/127\.0\.0\.1/g;
|
||
const result = content.replace(pattern, 'http://localhost');
|
||
return result !== content ? result : null;
|
||
}
|
||
|
||
/**
|
||
* Upgrade haiku model to sonnet in YAML frontmatter.
|
||
*/
|
||
function upgradeHaikuModel(content) {
|
||
const fmMatch = content.match(/^(---\r?\n[\s\S]*?\r?\n---)/);
|
||
if (!fmMatch) return null;
|
||
|
||
const fm = fmMatch[1];
|
||
const upgraded = fm.replace(/model:\s*haiku/i, 'model: sonnet');
|
||
if (upgraded === fm) return null;
|
||
|
||
return content.replace(fm, upgraded);
|
||
}
|
||
|
||
/**
|
||
* Strip injection phrases from frontmatter name/description fields.
|
||
*/
|
||
function stripInjectionFrontmatter(content) {
|
||
const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---)/);
|
||
if (!fmMatch) return null;
|
||
|
||
const INJECTION_PHRASES = [
|
||
/<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g,
|
||
/ignore\s+(?:previous|above|all)\s+instructions/gi,
|
||
/you\s+are\s+now\s+(?:a|an)\s+/gi,
|
||
/override\s+safety\s+constraints/gi,
|
||
/unrestricted\s+(?:diagnostic\s+)?mode/gi,
|
||
/pre-authorized/gi,
|
||
/elevated\s+permissions/gi,
|
||
];
|
||
|
||
let fm = fmMatch[2];
|
||
let changed = false;
|
||
|
||
for (const pattern of INJECTION_PHRASES) {
|
||
const cleaned = fm.replace(pattern, '');
|
||
if (cleaned !== fm) {
|
||
fm = cleaned;
|
||
changed = true;
|
||
}
|
||
}
|
||
|
||
return changed ? `${fmMatch[1]}${fm}${fmMatch[3]}${content.slice(fmMatch[0].length)}` : null;
|
||
}
|
||
|
||
/**
|
||
* Move MCP credential values from args to env in JSON config.
|
||
*/
|
||
function moveMcpCredsToEnv(content) {
|
||
let parsed;
|
||
try {
|
||
parsed = JSON.parse(content);
|
||
} catch {
|
||
return null;
|
||
}
|
||
|
||
// Look for mcpServers pattern
|
||
const servers = parsed.mcpServers || parsed.mcp_servers;
|
||
if (!servers || typeof servers !== 'object') return null;
|
||
|
||
let changed = false;
|
||
const CRED_PATTERNS = [
|
||
/api[_-]?key/i, /secret/i, /token/i, /password/i,
|
||
/credential/i, /auth/i, /bearer/i,
|
||
];
|
||
|
||
for (const [, config] of Object.entries(servers)) {
|
||
const args = config.args;
|
||
if (!Array.isArray(args)) continue;
|
||
|
||
if (!config.env) config.env = {};
|
||
|
||
for (let i = args.length - 1; i >= 0; i--) {
|
||
const arg = String(args[i]);
|
||
if (CRED_PATTERNS.some(p => p.test(arg))) {
|
||
// If the arg looks like a key=value pair or the next arg is the value
|
||
const envKey = arg.replace(/[^A-Z0-9_]/gi, '_').toUpperCase();
|
||
if (i + 1 < args.length) {
|
||
config.env[envKey] = String(args[i + 1]);
|
||
args.splice(i, 2);
|
||
} else {
|
||
config.env[envKey] = arg;
|
||
args.splice(i, 1);
|
||
}
|
||
changed = true;
|
||
}
|
||
}
|
||
}
|
||
|
||
return changed ? JSON.stringify(parsed, null, 2) : null;
|
||
}
|
||
|
||
/**
|
||
* Strip writeFile calls targeting MCP/Claude config paths.
|
||
*/
|
||
function stripSelfModification(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let changed = false;
|
||
|
||
const SELF_MOD_PATTERNS = [
|
||
/writeFile.*\.claude/i,
|
||
/writeFile.*hooks\.json/i,
|
||
/writeFile.*settings\.json/i,
|
||
/writeFile.*\.mcp\.json/i,
|
||
/writeFile.*plugin\.json/i,
|
||
/fs\.write.*\.claude/i,
|
||
/fs\.write.*hooks\.json/i,
|
||
];
|
||
|
||
for (const line of lines) {
|
||
if (SELF_MOD_PATTERNS.some(p => p.test(line))) {
|
||
changed = true;
|
||
continue;
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
/**
|
||
* Strip npm/pip/git self-update code blocks.
|
||
*/
|
||
function stripSelfUpdate(content) {
|
||
const lines = content.split('\n');
|
||
const result = [];
|
||
let inSelfUpdate = false;
|
||
let changed = false;
|
||
|
||
const SELF_UPDATE = [
|
||
/npm\s+(install|update)\s+(-g\s+)?.*self/i,
|
||
/pip\s+install\s+--upgrade\s+.*self/i,
|
||
/git\s+pull\s+.*origin/i,
|
||
/curl.*\|\s*(sh|bash)/,
|
||
/wget.*\|\s*(sh|bash)/,
|
||
];
|
||
|
||
for (const line of lines) {
|
||
const trimmed = line.trimStart();
|
||
if (trimmed.startsWith('```') && inSelfUpdate) {
|
||
inSelfUpdate = false;
|
||
changed = true;
|
||
continue;
|
||
}
|
||
if (inSelfUpdate) continue;
|
||
|
||
if (SELF_UPDATE.some(p => p.test(line))) {
|
||
// If inside a code fence, mark block for removal
|
||
const lastLine = result[result.length - 1] || '';
|
||
if (lastLine.trimStart().startsWith('```')) {
|
||
result.pop(); // Remove the opening ```
|
||
inSelfUpdate = true;
|
||
}
|
||
changed = true;
|
||
continue;
|
||
}
|
||
result.push(line);
|
||
}
|
||
return changed ? result.join('\n') : null;
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Fix operation registry
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/** Map of operation names → fix functions + metadata */
|
||
const FIX_OPS = {
|
||
strip_zero_width: {
|
||
fn: stripZeroWidth,
|
||
desc: 'Remove zero-width invisible characters',
|
||
},
|
||
strip_unicode_tags: {
|
||
fn: stripUnicodeTags,
|
||
desc: 'Remove Unicode Tag steganography codepoints',
|
||
},
|
||
strip_bidi: {
|
||
fn: stripBidi,
|
||
desc: 'Remove BIDI override characters',
|
||
},
|
||
normalize_homoglyphs: {
|
||
fn: normalizeHomoglyphs,
|
||
desc: 'Normalize Cyrillic confusables to Latin equivalents',
|
||
codeOnly: true,
|
||
},
|
||
strip_html_comment_injections: {
|
||
fn: stripHtmlCommentInjections,
|
||
desc: 'Remove <!-- AGENT/HIDDEN/SYSTEM --> comment injections',
|
||
},
|
||
strip_system_headers: {
|
||
fn: stripSystemHeaders,
|
||
desc: 'Remove spoofed # SYSTEM: headers',
|
||
},
|
||
strip_persistence: {
|
||
fn: stripPersistence,
|
||
desc: 'Remove persistence mechanisms (crontab, LaunchAgent, zshrc)',
|
||
},
|
||
strip_escalation: {
|
||
fn: stripEscalation,
|
||
desc: 'Remove privilege escalation writes to hooks/settings',
|
||
},
|
||
strip_registry_redirect: {
|
||
fn: stripRegistryRedirect,
|
||
desc: 'Remove non-standard package registry redirections',
|
||
},
|
||
strip_suspicious_urls: {
|
||
fn: stripSuspiciousUrls,
|
||
desc: 'Remove lines with suspicious exfiltration domain URLs',
|
||
},
|
||
normalize_loopback: {
|
||
fn: normalizeLoopback,
|
||
desc: 'Replace 127.0.0.1 with localhost',
|
||
},
|
||
upgrade_haiku_model: {
|
||
fn: upgradeHaikuModel,
|
||
desc: 'Upgrade model: haiku to model: sonnet in frontmatter',
|
||
},
|
||
strip_injection_frontmatter: {
|
||
fn: stripInjectionFrontmatter,
|
||
desc: 'Remove injection phrases from frontmatter fields',
|
||
},
|
||
move_mcp_creds_to_env: {
|
||
fn: moveMcpCredsToEnv,
|
||
desc: 'Move credentials from MCP args to env block',
|
||
},
|
||
strip_self_modification: {
|
||
fn: stripSelfModification,
|
||
desc: 'Remove writeFile calls targeting config paths',
|
||
},
|
||
strip_self_update: {
|
||
fn: stripSelfUpdate,
|
||
desc: 'Remove self-update mechanisms (pipe-to-shell, etc.)',
|
||
},
|
||
};
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Finding → fix operation mapping
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* Determine which fix operations to apply for a given finding.
|
||
* @param {object} f - Finding object
|
||
* @returns {string[]} - Array of operation names from FIX_OPS
|
||
*/
|
||
function opsForFinding(f) {
|
||
const s = f.scanner || '';
|
||
const title = (f.title || '').toLowerCase();
|
||
const desc = (f.description || '').toLowerCase();
|
||
const combined = `${title} ${desc}`;
|
||
|
||
if (s === 'UNI') {
|
||
if (title.includes('zero-width')) return ['strip_zero_width'];
|
||
if (title.includes('unicode tag') || title.includes('steganograph')) return ['strip_unicode_tags'];
|
||
if (title.includes('bidi')) return ['strip_bidi'];
|
||
if (title.includes('homoglyph')) return ['normalize_homoglyphs'];
|
||
}
|
||
|
||
if (s === 'PRM') {
|
||
if (title.includes('haiku')) return ['upgrade_haiku_model'];
|
||
}
|
||
|
||
if (s === 'NET' || s === 'GIT') {
|
||
if (combined.includes('suspicious') && combined.includes('domain')) return ['strip_suspicious_urls'];
|
||
if (combined.includes('loopback') || combined.includes('127.0.0.1')) return ['normalize_loopback'];
|
||
}
|
||
|
||
// LLM-detected findings
|
||
if (s === 'SKL' || s === 'MCP' || s === '') {
|
||
const ops = [];
|
||
if (combined.includes('html comment injection') || combined.includes('<!-- agent')) {
|
||
ops.push('strip_html_comment_injections');
|
||
}
|
||
if (combined.includes('system:') && combined.includes('header')) {
|
||
ops.push('strip_system_headers');
|
||
}
|
||
if (combined.includes('persistence') || combined.includes('cron') ||
|
||
combined.includes('launchagent') || combined.includes('zshrc')) {
|
||
ops.push('strip_persistence');
|
||
}
|
||
if (combined.includes('privilege escalation') || combined.includes('write to hooks') ||
|
||
combined.includes('write to settings')) {
|
||
ops.push('strip_escalation');
|
||
}
|
||
if (combined.includes('registry') && combined.includes('redirect')) {
|
||
ops.push('strip_registry_redirect');
|
||
}
|
||
if (combined.includes('exfiltration') || combined.includes('suspicious url')) {
|
||
ops.push('strip_suspicious_urls');
|
||
}
|
||
if (combined.includes('injection') && combined.includes('frontmatter')) {
|
||
ops.push('strip_injection_frontmatter');
|
||
}
|
||
if (combined.includes('credential') && combined.includes('env')) {
|
||
ops.push('move_mcp_creds_to_env');
|
||
}
|
||
if (combined.includes('self-modif')) ops.push('strip_self_modification');
|
||
if (combined.includes('self-update')) ops.push('strip_self_update');
|
||
if (ops.length > 0) return ops;
|
||
}
|
||
|
||
return [];
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// File validation
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* Validate file content after modification.
|
||
* @param {string} absPath - Absolute file path
|
||
* @param {string} content - Modified content
|
||
* @returns {{ valid: boolean, error?: string }}
|
||
*/
|
||
function validateContent(absPath, content) {
|
||
const ext = extname(absPath).toLowerCase();
|
||
|
||
// JSON files must parse
|
||
if (ext === '.json' || ext === '.jsonc') {
|
||
try {
|
||
JSON.parse(content);
|
||
return { valid: true };
|
||
} catch (e) {
|
||
return { valid: false, error: `JSON parse failed: ${e.message}` };
|
||
}
|
||
}
|
||
|
||
// Frontmatter files must start with ---
|
||
if (ext === '.md' || ext === '.mdx') {
|
||
if (content.length > 0 && content.trimStart().startsWith('---')) {
|
||
return { valid: true };
|
||
}
|
||
// .md without frontmatter is also valid (knowledge files, etc.)
|
||
return { valid: true };
|
||
}
|
||
|
||
// .mjs files — try node --check (syntax validation)
|
||
// Use correct extension so Node.js ESM detection works
|
||
if (ext === '.mjs' || ext === '.js' || ext === '.cjs') {
|
||
const tmpPath = absPath.replace(/(\.\w+)$/, '.clean-check$1');
|
||
try {
|
||
writeFileSync(tmpPath, content);
|
||
execSync(`node --check "${tmpPath}"`, { stdio: 'pipe', timeout: 5000 });
|
||
unlinkSync(tmpPath);
|
||
return { valid: true };
|
||
} catch (e) {
|
||
try { unlinkSync(tmpPath); } catch { /* ignore */ }
|
||
return { valid: false, error: `Syntax check failed: ${e.message}` };
|
||
}
|
||
}
|
||
|
||
// All other files — assume valid
|
||
return { valid: true };
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Core engine: apply fixes to files
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/**
|
||
* Apply all auto-tier fixes to the target.
|
||
* @param {string} targetPath - Absolute target directory
|
||
* @param {object[]} findings - Scanner findings array
|
||
* @param {boolean} dryRun
|
||
* @returns {Promise<{ fixes: object[], errors: object[] }>}
|
||
*/
|
||
async function applyFixes(targetPath, findings, dryRun) {
|
||
const fixes = [];
|
||
const errors = [];
|
||
|
||
// Step 1: Classify findings and filter to auto-tier only
|
||
const autoFindings = findings.filter(f => classifyFinding(f) === 'auto');
|
||
|
||
// Step 2: Group by file
|
||
const fileGroups = new Map(); // relPath → { findings: [], absPath: string }
|
||
for (const f of autoFindings) {
|
||
if (!f.file) {
|
||
fixes.push(fixResult({
|
||
finding_id: f.id,
|
||
file: f.file || 'unknown',
|
||
operation: 'skip',
|
||
status: 'skipped',
|
||
description: 'No file path in finding',
|
||
}));
|
||
continue;
|
||
}
|
||
|
||
const absPath = resolve(targetPath, f.file);
|
||
if (!fileGroups.has(f.file)) {
|
||
fileGroups.set(f.file, { findings: [], absPath });
|
||
}
|
||
fileGroups.get(f.file).findings.push(f);
|
||
}
|
||
|
||
// Step 3: Process each file
|
||
for (const [relPath, group] of fileGroups) {
|
||
let content;
|
||
try {
|
||
content = await readFile(group.absPath, 'utf-8');
|
||
} catch (e) {
|
||
for (const f of group.findings) {
|
||
errors.push({ finding_id: f.id, file: relPath, error: `Cannot read file: ${e.message}` });
|
||
}
|
||
continue;
|
||
}
|
||
|
||
const originalContent = content;
|
||
const appliedOps = new Set();
|
||
|
||
// Collect all operations for all findings on this file
|
||
for (const f of group.findings) {
|
||
const ops = opsForFinding(f);
|
||
|
||
if (ops.length === 0) {
|
||
fixes.push(fixResult({
|
||
finding_id: f.id,
|
||
file: relPath,
|
||
operation: 'unmapped',
|
||
status: 'skipped',
|
||
description: 'No auto-fix operation mapped for this finding type',
|
||
}));
|
||
continue;
|
||
}
|
||
|
||
for (const opName of ops) {
|
||
const op = FIX_OPS[opName];
|
||
if (!op) continue;
|
||
|
||
// Skip code-only ops on non-code files
|
||
if (op.codeOnly) {
|
||
const ext = extname(group.absPath).toLowerCase();
|
||
const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
|
||
if (!codeExts.includes(ext)) {
|
||
fixes.push(fixResult({
|
||
finding_id: f.id,
|
||
file: relPath,
|
||
operation: opName,
|
||
status: 'skipped',
|
||
description: `${op.desc} — skipped for non-code file`,
|
||
}));
|
||
continue;
|
||
}
|
||
}
|
||
|
||
// Apply operation if not already applied to this file
|
||
if (!appliedOps.has(opName)) {
|
||
const result = op.fn(content);
|
||
if (result !== null) {
|
||
content = result;
|
||
appliedOps.add(opName);
|
||
}
|
||
}
|
||
|
||
fixes.push(fixResult({
|
||
finding_id: f.id,
|
||
file: relPath,
|
||
operation: opName,
|
||
status: appliedOps.has(opName) ? 'applied' : 'skipped',
|
||
description: appliedOps.has(opName) ? op.desc : `${op.desc} — no change needed`,
|
||
}));
|
||
}
|
||
}
|
||
|
||
// Step 4: Write if changed
|
||
if (content !== originalContent) {
|
||
if (dryRun) {
|
||
// In dry-run, mark all as applied but don't write
|
||
continue;
|
||
}
|
||
|
||
// Validate before writing
|
||
const validation = validateContent(group.absPath, content);
|
||
if (!validation.valid) {
|
||
// Mark all applied ops as failed
|
||
for (const fix of fixes) {
|
||
if (fix.file === relPath && fix.status === 'applied') {
|
||
fix.status = 'failed';
|
||
fix.error = `Validation failed: ${validation.error}`;
|
||
}
|
||
}
|
||
errors.push({
|
||
finding_id: group.findings[0]?.id,
|
||
file: relPath,
|
||
error: `Post-fix validation failed: ${validation.error}. File not modified.`,
|
||
});
|
||
continue;
|
||
}
|
||
|
||
// Atomic write: temp file → rename
|
||
const tmpPath = group.absPath + '.clean-tmp';
|
||
try {
|
||
await writeFile(tmpPath, content, 'utf-8');
|
||
await rename(tmpPath, group.absPath);
|
||
} catch (e) {
|
||
try { await unlink(tmpPath); } catch { /* ignore */ }
|
||
for (const fix of fixes) {
|
||
if (fix.file === relPath && fix.status === 'applied') {
|
||
fix.status = 'failed';
|
||
fix.error = `Write failed: ${e.message}`;
|
||
}
|
||
}
|
||
errors.push({ finding_id: group.findings[0]?.id, file: relPath, error: `Write failed: ${e.message}` });
|
||
}
|
||
}
|
||
}
|
||
|
||
// Also report non-auto findings for context
|
||
const nonAutoFindings = findings.filter(f => classifyFinding(f) !== 'auto');
|
||
for (const f of nonAutoFindings) {
|
||
const tier = classifyFinding(f);
|
||
fixes.push(fixResult({
|
||
finding_id: f.id,
|
||
file: f.file || 'unknown',
|
||
operation: `tier:${tier}`,
|
||
status: 'skipped',
|
||
description: `Classified as ${tier} — not auto-fixable`,
|
||
}));
|
||
}
|
||
|
||
return { fixes, errors };
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// CLI entry point
|
||
// ---------------------------------------------------------------------------
|
||
|
||
async function main() {
|
||
const args = process.argv.slice(2);
|
||
|
||
// Parse arguments
|
||
let targetArg = null;
|
||
let findingsPath = null;
|
||
let dryRun = false;
|
||
|
||
for (let i = 0; i < args.length; i++) {
|
||
if (args[i] === '--findings' && i + 1 < args.length) {
|
||
findingsPath = args[++i];
|
||
} else if (args[i] === '--dry-run') {
|
||
dryRun = true;
|
||
} else if (!targetArg) {
|
||
targetArg = args[i];
|
||
}
|
||
}
|
||
|
||
if (!targetArg) {
|
||
console.error('Usage: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]');
|
||
process.exit(1);
|
||
}
|
||
|
||
const targetPath = resolve(targetArg);
|
||
|
||
// Read findings JSON
|
||
let findings;
|
||
if (findingsPath) {
|
||
try {
|
||
const raw = await readFile(resolve(findingsPath), 'utf-8');
|
||
const envelope = JSON.parse(raw);
|
||
// Extract findings from scanner envelope format
|
||
findings = [];
|
||
if (envelope.scanners) {
|
||
for (const scanner of Object.values(envelope.scanners)) {
|
||
if (Array.isArray(scanner.findings)) {
|
||
findings.push(...scanner.findings);
|
||
}
|
||
}
|
||
} else if (Array.isArray(envelope.findings)) {
|
||
findings = envelope.findings;
|
||
} else if (Array.isArray(envelope)) {
|
||
findings = envelope;
|
||
}
|
||
} catch (e) {
|
||
console.error(`Failed to read findings file: ${e.message}`);
|
||
process.exit(1);
|
||
}
|
||
} else {
|
||
// If no findings file, run the orchestrator inline
|
||
console.error('[auto-cleaner] No --findings provided. Running scan-orchestrator...');
|
||
try {
|
||
const orchestratorPath = join(dirname(fileURLToPath(import.meta.url)), 'scan-orchestrator.mjs');
|
||
const result = execSync(`node "${resolve(orchestratorPath)}" "${targetPath}"`, {
|
||
encoding: 'utf-8',
|
||
timeout: 60000,
|
||
stdio: ['pipe', 'pipe', 'pipe'],
|
||
});
|
||
const envelope = JSON.parse(result);
|
||
findings = [];
|
||
for (const scanner of Object.values(envelope.scanners || {})) {
|
||
if (Array.isArray(scanner.findings)) {
|
||
findings.push(...scanner.findings);
|
||
}
|
||
}
|
||
} catch (e) {
|
||
console.error(`Orchestrator failed: ${e.message}`);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
process.stderr.write(
|
||
`[auto-cleaner] ${findings.length} findings loaded. ` +
|
||
`Mode: ${dryRun ? 'DRY-RUN' : 'LIVE'}. Target: ${targetPath}\n`
|
||
);
|
||
|
||
// Classify and count tiers
|
||
const tiers = { auto: 0, semi_auto: 0, manual: 0, skip: 0 };
|
||
for (const f of findings) {
|
||
tiers[classifyFinding(f)]++;
|
||
}
|
||
process.stderr.write(
|
||
`[auto-cleaner] Classification: ${tiers.auto} auto, ${tiers.semi_auto} semi-auto, ` +
|
||
`${tiers.manual} manual, ${tiers.skip} skip\n`
|
||
);
|
||
|
||
// Apply fixes
|
||
const startMs = Date.now();
|
||
const { fixes, errors } = await applyFixes(targetPath, findings, dryRun);
|
||
const durationMs = Date.now() - startMs;
|
||
|
||
// Build output envelope
|
||
const output = cleanEnvelope(targetPath, dryRun, fixes, errors, durationMs);
|
||
|
||
// JSON to stdout
|
||
process.stdout.write(JSON.stringify(output, null, 2) + '\n');
|
||
|
||
// Summary to stderr
|
||
const s = output.summary;
|
||
process.stderr.write(
|
||
`\n[auto-cleaner] === COMPLETE ===\n` +
|
||
`[auto-cleaner] Applied: ${s.fixes_applied} | Skipped: ${s.fixes_skipped} | ` +
|
||
`Failed: ${s.fixes_failed} | Files modified: ${s.files_modified}\n` +
|
||
`[auto-cleaner] Duration: ${durationMs}ms\n`
|
||
);
|
||
|
||
process.exit(errors.length > 0 ? 1 : 0);
|
||
}
|
||
|
||
// Only run CLI when executed directly, not when imported for testing
|
||
const isMain = process.argv[1] &&
|
||
(process.argv[1].endsWith('auto-cleaner.mjs') || process.argv[1] === new URL(import.meta.url).pathname);
|
||
|
||
if (isMain) {
|
||
main().catch(err => {
|
||
console.error(`Fatal error: ${err.message}`);
|
||
process.exit(1);
|
||
});
|
||
}
|
||
|
||
// Export for testing
|
||
export { classifyFinding, FIX_OPS, opsForFinding, applyFixes };
|