ktg-plugin-marketplace/plugins/llm-security/scanners/auto-cleaner.mjs

1036 lines
31 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
// auto-cleaner.mjs — Deterministic remediation engine for security findings
// Zero external dependencies. Reuses scanners/lib/ shared library.
//
// CLI: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]
//
// Fix operations are pure functions (content in → content out).
// Atomic writes: write to .clean-tmp, validate, rename over original.
// Content-based matching (not line-number based) for robustness.
import { readFile, writeFile, rename, unlink, stat } from 'node:fs/promises';
import { writeFileSync, unlinkSync } from 'node:fs';
import { resolve, extname, join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { execSync } from 'node:child_process';
import { fixResult, cleanEnvelope } from './lib/output.mjs';
// ---------------------------------------------------------------------------
// Classification: finding → tier
// ---------------------------------------------------------------------------
/**
* Classify a finding into a remediation tier.
* @param {object} finding - Scanner finding object
* @returns {'auto'|'semi_auto'|'manual'|'skip'}
*/
function classifyFinding(f) {
const s = f.scanner || '';
const title = (f.title || '').toLowerCase();
const desc = (f.description || '').toLowerCase();
const file = (f.file || '').toLowerCase();
const combined = `${title} ${desc}`;
// --- UNI findings ---
if (s === 'UNI') {
if (title.includes('zero-width')) return 'auto';
if (title.includes('unicode tag') || title.includes('steganograph')) return 'auto';
if (title.includes('bidi')) return 'auto';
if (title.includes('homoglyph')) {
// Code files → auto, markdown → semi_auto
const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
return codeExts.some(ext => file.endsWith(ext)) ? 'auto' : 'semi_auto';
}
return 'semi_auto';
}
// --- ENT findings ---
if (s === 'ENT') return 'semi_auto';
// --- PRM findings ---
if (s === 'PRM') {
if (title.includes('haiku') && combined.includes('sensitive')) return 'auto';
if (title.includes('ghost hook') || combined.includes('script not found')) return 'semi_auto';
if (combined.includes('read-only') && combined.includes('write')) return 'semi_auto';
if (combined.includes('dangerous') && combined.includes('triple')) return 'semi_auto';
return 'manual';
}
// --- DEP findings ---
if (s === 'DEP') {
if (combined.includes('cve') && !combined.includes('fix available')) return 'manual';
return 'semi_auto';
}
// --- TNT findings ---
if (s === 'TNT') return 'manual';
// --- GIT findings ---
if (s === 'GIT') {
if (combined.includes('suspicious domain') && combined.includes('post-commit')) return 'auto';
if (combined.includes('hook') && combined.includes('network')) return 'semi_auto';
return 'skip';
}
// --- NET findings ---
if (s === 'NET') {
if (f.severity === 'high' && combined.includes('suspicious')) return 'auto';
if (combined.includes('loopback') || combined.includes('127.0.0.1')) return 'auto';
if (combined.includes('ip-based url') && f.severity !== 'info') return 'semi_auto';
if (f.severity === 'info') return 'manual';
return 'semi_auto';
}
// --- LLM-detected findings (from skill-scanner-agent) ---
if (s === 'SKL' || s === 'MCP') {
if (combined.includes('html comment injection') || combined.includes('<!-- agent')) return 'auto';
if (combined.includes('system:') && combined.includes('header')) return 'auto';
if (combined.includes('persistence') || combined.includes('cron') ||
combined.includes('launchagent') || combined.includes('zshrc')) return 'auto';
if (combined.includes('privilege escalation') || combined.includes('hooks.json') ||
combined.includes('settings.json')) return 'auto';
if (combined.includes('registry') && combined.includes('redirect')) return 'auto';
if (combined.includes('injection') && combined.includes('frontmatter')) return 'auto';
if (combined.includes('exfiltration') || combined.includes('suspicious')) return 'auto';
if (combined.includes('credential') && combined.includes('env')) return 'auto';
if (combined.includes('self-modif') || combined.includes('self-update')) return 'auto';
if (combined.includes('credential access')) return 'semi_auto';
if (combined.includes('unannounced') && combined.includes('install')) return 'semi_auto';
if (combined.includes('hidden directive')) return 'semi_auto';
return 'manual';
}
return 'manual';
}
// ---------------------------------------------------------------------------
// Fix operations — pure functions: content in → content out
// ---------------------------------------------------------------------------
/** Zero-width characters to strip (preserve BOM at pos 0) */
const ZERO_WIDTH = new Set([0x200B, 0x200C, 0x200D, 0xFEFF, 0x00AD]);
/** Unicode Tags block U+E0001U+E007F */
const TAG_START = 0xE0001;
const TAG_END = 0xE007F;
/** BIDI control codepoints */
const BIDI = new Set([0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x2066, 0x2067, 0x2068, 0x2069]);
/** Cyrillic → Latin confusable mapping */
const CYRILLIC_TO_LATIN = new Map([
[0x0430, 'a'], [0x0435, 'e'], [0x043E, 'o'], [0x0441, 'c'],
[0x0440, 'p'], [0x0443, 'y'], [0x0445, 'x'],
[0x0410, 'A'], [0x0415, 'E'], [0x041E, 'O'], [0x0421, 'C'],
[0x0420, 'P'], [0x0425, 'X'],
]);
/** Suspicious domains to strip */
const EXFIL_DOMAINS = [
'webhook.site', 'ngrok', 'requestbin', 'pipedream.net',
'pastebin.com', 'bit.ly', 'hookbin.com', 'beeceptor.com',
'smee.io', 'transfer.sh', 'file.io', 'paste.ee', 'hastebin.com',
'tinyurl.com', 'is.gd', 'goo.gl', 'cutt.ly',
];
/**
* Strip zero-width characters from content.
* Preserves BOM at position 0.
*/
function stripZeroWidth(content) {
const lines = content.split('\n');
const result = [];
let changed = false;
for (let i = 0; i < lines.length; i++) {
let line = lines[i];
let newLine = '';
let pos = 0;
for (const char of line) {
const cp = char.codePointAt(0);
// Preserve BOM (U+FEFF) only at file start (line 0, pos 0)
if (ZERO_WIDTH.has(cp) && !(cp === 0xFEFF && i === 0 && pos === 0)) {
changed = true;
} else {
newLine += char;
}
pos += char.length;
}
// Keep the line (even if empty after stripping — preserves structure)
result.push(newLine);
}
return changed ? result.join('\n') : null;
}
/**
* Strip Unicode Tag block codepoints (steganography).
*/
function stripUnicodeTags(content) {
let changed = false;
const result = [...content].filter(char => {
const cp = char.codePointAt(0);
if (cp >= TAG_START && cp <= TAG_END) {
changed = true;
return false;
}
return true;
}).join('');
return changed ? result : null;
}
/**
* Strip BIDI override characters.
*/
function stripBidi(content) {
let changed = false;
const result = [...content].filter(char => {
const cp = char.codePointAt(0);
if (BIDI.has(cp)) {
changed = true;
return false;
}
return true;
}).join('');
return changed ? result : null;
}
/**
* Normalize Cyrillic confusables to Latin equivalents.
* Only applied to code files (.js, .ts, .mjs, .py, etc.)
*/
function normalizeHomoglyphs(content) {
let changed = false;
const result = [...content].map(char => {
const cp = char.codePointAt(0);
const latin = CYRILLIC_TO_LATIN.get(cp);
if (latin) {
changed = true;
return latin;
}
return char;
}).join('');
return changed ? result : null;
}
/**
* Strip HTML comment injections (<!-- AGENT:..., <!-- HIDDEN:..., <!-- SYSTEM:...).
*/
function stripHtmlCommentInjections(content) {
const pattern = /<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g;
const result = content.replace(pattern, '');
return result !== content ? result : null;
}
/**
* Strip spoofed "# SYSTEM:" headers (not inside code fences).
*/
function stripSystemHeaders(content) {
const lines = content.split('\n');
const result = [];
let inCodeFence = false;
let changed = false;
for (const line of lines) {
if (line.trimStart().startsWith('```')) {
inCodeFence = !inCodeFence;
}
if (!inCodeFence && /^#\s*SYSTEM\s*:/i.test(line)) {
changed = true;
continue; // Remove this line
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Strip persistence mechanism code blocks (crontab, LaunchAgent, systemctl, zshrc writes).
*/
function stripPersistence(content) {
const lines = content.split('\n');
const result = [];
let inMaliciousBlock = false;
let inCodeFence = false;
let changed = false;
const PERSISTENCE_PATTERNS = [
/crontab\s+-/,
/LaunchAgent/i,
/systemctl\s+(enable|start|restart)/,
/>>?\s*~\/\.(?:zshrc|bashrc|profile|bash_profile)/,
/Library\/LaunchAgents/,
];
for (const line of lines) {
const trimmed = line.trimStart();
if (trimmed.startsWith('```')) {
if (!inCodeFence) {
inCodeFence = true;
// Check if next lines contain persistence patterns
result.push(line);
continue;
} else {
inCodeFence = false;
if (inMaliciousBlock) {
inMaliciousBlock = false;
changed = true;
continue; // Skip the closing ```
}
result.push(line);
continue;
}
}
if (inCodeFence && !inMaliciousBlock) {
if (PERSISTENCE_PATTERNS.some(p => p.test(line))) {
inMaliciousBlock = true;
changed = true;
// Remove the opening ``` we already pushed
result.pop();
continue;
}
}
if (inMaliciousBlock) {
continue; // Skip lines inside malicious code block
}
// Also catch inline persistence commands outside code fences
if (!inCodeFence && PERSISTENCE_PATTERNS.some(p => p.test(line))) {
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Strip privilege escalation writes (to hooks.json, settings.json, CLAUDE.md).
*/
function stripEscalation(content) {
const ESCALATION_TARGETS = [
/hooks\/hooks\.json/,
/~\/\.claude\/settings\.json/,
/\.claude\/settings\.json/,
/CLAUDE\.md/i,
];
const lines = content.split('\n');
const result = [];
let changed = false;
for (const line of lines) {
if (ESCALATION_TARGETS.some(p => p.test(line)) &&
(/modif|write|update|overwrite|create|set|add|push|insert|append|config/i.test(line))) {
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Strip non-standard registry redirections (npm config set registry, --index-url).
*/
function stripRegistryRedirect(content) {
const patterns = [
/npm\s+config\s+set\s+registry\s+(?!https:\/\/registry\.npmjs\.org)/,
/--index-url\s+(?!https:\/\/pypi\.org)/,
/--extra-index-url\s+https?:\/\/(?!pypi\.org)/,
];
const lines = content.split('\n');
const result = [];
let changed = false;
for (const line of lines) {
if (patterns.some(p => p.test(line))) {
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Strip lines containing suspicious exfiltration domain URLs.
*/
function stripSuspiciousUrls(content) {
const lines = content.split('\n');
const result = [];
let changed = false;
for (const line of lines) {
const lower = line.toLowerCase();
if (EXFIL_DOMAINS.some(d => lower.includes(d)) && /https?:\/\//.test(line)) {
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Normalize loopback IPs to localhost.
*/
function normalizeLoopback(content) {
const pattern = /http:\/\/127\.0\.0\.1/g;
const result = content.replace(pattern, 'http://localhost');
return result !== content ? result : null;
}
/**
* Upgrade haiku model to sonnet in YAML frontmatter.
*/
function upgradeHaikuModel(content) {
const fmMatch = content.match(/^(---\r?\n[\s\S]*?\r?\n---)/);
if (!fmMatch) return null;
const fm = fmMatch[1];
const upgraded = fm.replace(/model:\s*haiku/i, 'model: sonnet');
if (upgraded === fm) return null;
return content.replace(fm, upgraded);
}
/**
* Strip injection phrases from frontmatter name/description fields.
*/
function stripInjectionFrontmatter(content) {
const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---)/);
if (!fmMatch) return null;
const INJECTION_PHRASES = [
/<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g,
/ignore\s+(?:previous|above|all)\s+instructions/gi,
/you\s+are\s+now\s+(?:a|an)\s+/gi,
/override\s+safety\s+constraints/gi,
/unrestricted\s+(?:diagnostic\s+)?mode/gi,
/pre-authorized/gi,
/elevated\s+permissions/gi,
];
let fm = fmMatch[2];
let changed = false;
for (const pattern of INJECTION_PHRASES) {
const cleaned = fm.replace(pattern, '');
if (cleaned !== fm) {
fm = cleaned;
changed = true;
}
}
return changed ? `${fmMatch[1]}${fm}${fmMatch[3]}${content.slice(fmMatch[0].length)}` : null;
}
/**
* Move MCP credential values from args to env in JSON config.
*/
function moveMcpCredsToEnv(content) {
let parsed;
try {
parsed = JSON.parse(content);
} catch {
return null;
}
// Look for mcpServers pattern
const servers = parsed.mcpServers || parsed.mcp_servers;
if (!servers || typeof servers !== 'object') return null;
let changed = false;
const CRED_PATTERNS = [
/api[_-]?key/i, /secret/i, /token/i, /password/i,
/credential/i, /auth/i, /bearer/i,
];
for (const [, config] of Object.entries(servers)) {
const args = config.args;
if (!Array.isArray(args)) continue;
if (!config.env) config.env = {};
for (let i = args.length - 1; i >= 0; i--) {
const arg = String(args[i]);
if (CRED_PATTERNS.some(p => p.test(arg))) {
// If the arg looks like a key=value pair or the next arg is the value
const envKey = arg.replace(/[^A-Z0-9_]/gi, '_').toUpperCase();
if (i + 1 < args.length) {
config.env[envKey] = String(args[i + 1]);
args.splice(i, 2);
} else {
config.env[envKey] = arg;
args.splice(i, 1);
}
changed = true;
}
}
}
return changed ? JSON.stringify(parsed, null, 2) : null;
}
/**
* Strip writeFile calls targeting MCP/Claude config paths.
*/
function stripSelfModification(content) {
const lines = content.split('\n');
const result = [];
let changed = false;
const SELF_MOD_PATTERNS = [
/writeFile.*\.claude/i,
/writeFile.*hooks\.json/i,
/writeFile.*settings\.json/i,
/writeFile.*\.mcp\.json/i,
/writeFile.*plugin\.json/i,
/fs\.write.*\.claude/i,
/fs\.write.*hooks\.json/i,
];
for (const line of lines) {
if (SELF_MOD_PATTERNS.some(p => p.test(line))) {
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
/**
* Strip npm/pip/git self-update code blocks.
*/
function stripSelfUpdate(content) {
const lines = content.split('\n');
const result = [];
let inSelfUpdate = false;
let changed = false;
const SELF_UPDATE = [
/npm\s+(install|update)\s+(-g\s+)?.*self/i,
/pip\s+install\s+--upgrade\s+.*self/i,
/git\s+pull\s+.*origin/i,
/curl.*\|\s*(sh|bash)/,
/wget.*\|\s*(sh|bash)/,
];
for (const line of lines) {
const trimmed = line.trimStart();
if (trimmed.startsWith('```') && inSelfUpdate) {
inSelfUpdate = false;
changed = true;
continue;
}
if (inSelfUpdate) continue;
if (SELF_UPDATE.some(p => p.test(line))) {
// If inside a code fence, mark block for removal
const lastLine = result[result.length - 1] || '';
if (lastLine.trimStart().startsWith('```')) {
result.pop(); // Remove the opening ```
inSelfUpdate = true;
}
changed = true;
continue;
}
result.push(line);
}
return changed ? result.join('\n') : null;
}
// ---------------------------------------------------------------------------
// Fix operation registry
// ---------------------------------------------------------------------------
/** Map of operation names → fix functions + metadata */
const FIX_OPS = {
strip_zero_width: {
fn: stripZeroWidth,
desc: 'Remove zero-width invisible characters',
},
strip_unicode_tags: {
fn: stripUnicodeTags,
desc: 'Remove Unicode Tag steganography codepoints',
},
strip_bidi: {
fn: stripBidi,
desc: 'Remove BIDI override characters',
},
normalize_homoglyphs: {
fn: normalizeHomoglyphs,
desc: 'Normalize Cyrillic confusables to Latin equivalents',
codeOnly: true,
},
strip_html_comment_injections: {
fn: stripHtmlCommentInjections,
desc: 'Remove <!-- AGENT/HIDDEN/SYSTEM --> comment injections',
},
strip_system_headers: {
fn: stripSystemHeaders,
desc: 'Remove spoofed # SYSTEM: headers',
},
strip_persistence: {
fn: stripPersistence,
desc: 'Remove persistence mechanisms (crontab, LaunchAgent, zshrc)',
},
strip_escalation: {
fn: stripEscalation,
desc: 'Remove privilege escalation writes to hooks/settings',
},
strip_registry_redirect: {
fn: stripRegistryRedirect,
desc: 'Remove non-standard package registry redirections',
},
strip_suspicious_urls: {
fn: stripSuspiciousUrls,
desc: 'Remove lines with suspicious exfiltration domain URLs',
},
normalize_loopback: {
fn: normalizeLoopback,
desc: 'Replace 127.0.0.1 with localhost',
},
upgrade_haiku_model: {
fn: upgradeHaikuModel,
desc: 'Upgrade model: haiku to model: sonnet in frontmatter',
},
strip_injection_frontmatter: {
fn: stripInjectionFrontmatter,
desc: 'Remove injection phrases from frontmatter fields',
},
move_mcp_creds_to_env: {
fn: moveMcpCredsToEnv,
desc: 'Move credentials from MCP args to env block',
},
strip_self_modification: {
fn: stripSelfModification,
desc: 'Remove writeFile calls targeting config paths',
},
strip_self_update: {
fn: stripSelfUpdate,
desc: 'Remove self-update mechanisms (pipe-to-shell, etc.)',
},
};
// ---------------------------------------------------------------------------
// Finding → fix operation mapping
// ---------------------------------------------------------------------------
/**
* Determine which fix operations to apply for a given finding.
* @param {object} f - Finding object
* @returns {string[]} - Array of operation names from FIX_OPS
*/
function opsForFinding(f) {
const s = f.scanner || '';
const title = (f.title || '').toLowerCase();
const desc = (f.description || '').toLowerCase();
const combined = `${title} ${desc}`;
if (s === 'UNI') {
if (title.includes('zero-width')) return ['strip_zero_width'];
if (title.includes('unicode tag') || title.includes('steganograph')) return ['strip_unicode_tags'];
if (title.includes('bidi')) return ['strip_bidi'];
if (title.includes('homoglyph')) return ['normalize_homoglyphs'];
}
if (s === 'PRM') {
if (title.includes('haiku')) return ['upgrade_haiku_model'];
}
if (s === 'NET' || s === 'GIT') {
if (combined.includes('suspicious') && combined.includes('domain')) return ['strip_suspicious_urls'];
if (combined.includes('loopback') || combined.includes('127.0.0.1')) return ['normalize_loopback'];
}
// LLM-detected findings
if (s === 'SKL' || s === 'MCP' || s === '') {
const ops = [];
if (combined.includes('html comment injection') || combined.includes('<!-- agent')) {
ops.push('strip_html_comment_injections');
}
if (combined.includes('system:') && combined.includes('header')) {
ops.push('strip_system_headers');
}
if (combined.includes('persistence') || combined.includes('cron') ||
combined.includes('launchagent') || combined.includes('zshrc')) {
ops.push('strip_persistence');
}
if (combined.includes('privilege escalation') || combined.includes('write to hooks') ||
combined.includes('write to settings')) {
ops.push('strip_escalation');
}
if (combined.includes('registry') && combined.includes('redirect')) {
ops.push('strip_registry_redirect');
}
if (combined.includes('exfiltration') || combined.includes('suspicious url')) {
ops.push('strip_suspicious_urls');
}
if (combined.includes('injection') && combined.includes('frontmatter')) {
ops.push('strip_injection_frontmatter');
}
if (combined.includes('credential') && combined.includes('env')) {
ops.push('move_mcp_creds_to_env');
}
if (combined.includes('self-modif')) ops.push('strip_self_modification');
if (combined.includes('self-update')) ops.push('strip_self_update');
if (ops.length > 0) return ops;
}
return [];
}
// ---------------------------------------------------------------------------
// File validation
// ---------------------------------------------------------------------------
/**
* Validate file content after modification.
* @param {string} absPath - Absolute file path
* @param {string} content - Modified content
* @returns {{ valid: boolean, error?: string }}
*/
function validateContent(absPath, content) {
const ext = extname(absPath).toLowerCase();
// JSON files must parse
if (ext === '.json' || ext === '.jsonc') {
try {
JSON.parse(content);
return { valid: true };
} catch (e) {
return { valid: false, error: `JSON parse failed: ${e.message}` };
}
}
// Frontmatter files must start with ---
if (ext === '.md' || ext === '.mdx') {
if (content.length > 0 && content.trimStart().startsWith('---')) {
return { valid: true };
}
// .md without frontmatter is also valid (knowledge files, etc.)
return { valid: true };
}
// .mjs files — try node --check (syntax validation)
// Use correct extension so Node.js ESM detection works
if (ext === '.mjs' || ext === '.js' || ext === '.cjs') {
const tmpPath = absPath.replace(/(\.\w+)$/, '.clean-check$1');
try {
writeFileSync(tmpPath, content);
execSync(`node --check "${tmpPath}"`, { stdio: 'pipe', timeout: 5000 });
unlinkSync(tmpPath);
return { valid: true };
} catch (e) {
try { unlinkSync(tmpPath); } catch { /* ignore */ }
return { valid: false, error: `Syntax check failed: ${e.message}` };
}
}
// All other files — assume valid
return { valid: true };
}
// ---------------------------------------------------------------------------
// Core engine: apply fixes to files
// ---------------------------------------------------------------------------
/**
* Apply all auto-tier fixes to the target.
* @param {string} targetPath - Absolute target directory
* @param {object[]} findings - Scanner findings array
* @param {boolean} dryRun
* @returns {Promise<{ fixes: object[], errors: object[] }>}
*/
async function applyFixes(targetPath, findings, dryRun) {
const fixes = [];
const errors = [];
// Step 1: Classify findings and filter to auto-tier only
const autoFindings = findings.filter(f => classifyFinding(f) === 'auto');
// Step 2: Group by file
const fileGroups = new Map(); // relPath → { findings: [], absPath: string }
for (const f of autoFindings) {
if (!f.file) {
fixes.push(fixResult({
finding_id: f.id,
file: f.file || 'unknown',
operation: 'skip',
status: 'skipped',
description: 'No file path in finding',
}));
continue;
}
const absPath = resolve(targetPath, f.file);
if (!fileGroups.has(f.file)) {
fileGroups.set(f.file, { findings: [], absPath });
}
fileGroups.get(f.file).findings.push(f);
}
// Step 3: Process each file
for (const [relPath, group] of fileGroups) {
let content;
try {
content = await readFile(group.absPath, 'utf-8');
} catch (e) {
for (const f of group.findings) {
errors.push({ finding_id: f.id, file: relPath, error: `Cannot read file: ${e.message}` });
}
continue;
}
const originalContent = content;
const appliedOps = new Set();
// Collect all operations for all findings on this file
for (const f of group.findings) {
const ops = opsForFinding(f);
if (ops.length === 0) {
fixes.push(fixResult({
finding_id: f.id,
file: relPath,
operation: 'unmapped',
status: 'skipped',
description: 'No auto-fix operation mapped for this finding type',
}));
continue;
}
for (const opName of ops) {
const op = FIX_OPS[opName];
if (!op) continue;
// Skip code-only ops on non-code files
if (op.codeOnly) {
const ext = extname(group.absPath).toLowerCase();
const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
if (!codeExts.includes(ext)) {
fixes.push(fixResult({
finding_id: f.id,
file: relPath,
operation: opName,
status: 'skipped',
description: `${op.desc} — skipped for non-code file`,
}));
continue;
}
}
// Apply operation if not already applied to this file
if (!appliedOps.has(opName)) {
const result = op.fn(content);
if (result !== null) {
content = result;
appliedOps.add(opName);
}
}
fixes.push(fixResult({
finding_id: f.id,
file: relPath,
operation: opName,
status: appliedOps.has(opName) ? 'applied' : 'skipped',
description: appliedOps.has(opName) ? op.desc : `${op.desc} — no change needed`,
}));
}
}
// Step 4: Write if changed
if (content !== originalContent) {
if (dryRun) {
// In dry-run, mark all as applied but don't write
continue;
}
// Validate before writing
const validation = validateContent(group.absPath, content);
if (!validation.valid) {
// Mark all applied ops as failed
for (const fix of fixes) {
if (fix.file === relPath && fix.status === 'applied') {
fix.status = 'failed';
fix.error = `Validation failed: ${validation.error}`;
}
}
errors.push({
finding_id: group.findings[0]?.id,
file: relPath,
error: `Post-fix validation failed: ${validation.error}. File not modified.`,
});
continue;
}
// Atomic write: temp file → rename
const tmpPath = group.absPath + '.clean-tmp';
try {
await writeFile(tmpPath, content, 'utf-8');
await rename(tmpPath, group.absPath);
} catch (e) {
try { await unlink(tmpPath); } catch { /* ignore */ }
for (const fix of fixes) {
if (fix.file === relPath && fix.status === 'applied') {
fix.status = 'failed';
fix.error = `Write failed: ${e.message}`;
}
}
errors.push({ finding_id: group.findings[0]?.id, file: relPath, error: `Write failed: ${e.message}` });
}
}
}
// Also report non-auto findings for context
const nonAutoFindings = findings.filter(f => classifyFinding(f) !== 'auto');
for (const f of nonAutoFindings) {
const tier = classifyFinding(f);
fixes.push(fixResult({
finding_id: f.id,
file: f.file || 'unknown',
operation: `tier:${tier}`,
status: 'skipped',
description: `Classified as ${tier} — not auto-fixable`,
}));
}
return { fixes, errors };
}
// ---------------------------------------------------------------------------
// CLI entry point
// ---------------------------------------------------------------------------
async function main() {
const args = process.argv.slice(2);
// Parse arguments
let targetArg = null;
let findingsPath = null;
let dryRun = false;
for (let i = 0; i < args.length; i++) {
if (args[i] === '--findings' && i + 1 < args.length) {
findingsPath = args[++i];
} else if (args[i] === '--dry-run') {
dryRun = true;
} else if (!targetArg) {
targetArg = args[i];
}
}
if (!targetArg) {
console.error('Usage: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]');
process.exit(1);
}
const targetPath = resolve(targetArg);
// Read findings JSON
let findings;
if (findingsPath) {
try {
const raw = await readFile(resolve(findingsPath), 'utf-8');
const envelope = JSON.parse(raw);
// Extract findings from scanner envelope format
findings = [];
if (envelope.scanners) {
for (const scanner of Object.values(envelope.scanners)) {
if (Array.isArray(scanner.findings)) {
findings.push(...scanner.findings);
}
}
} else if (Array.isArray(envelope.findings)) {
findings = envelope.findings;
} else if (Array.isArray(envelope)) {
findings = envelope;
}
} catch (e) {
console.error(`Failed to read findings file: ${e.message}`);
process.exit(1);
}
} else {
// If no findings file, run the orchestrator inline
console.error('[auto-cleaner] No --findings provided. Running scan-orchestrator...');
try {
const orchestratorPath = join(dirname(fileURLToPath(import.meta.url)), 'scan-orchestrator.mjs');
const result = execSync(`node "${resolve(orchestratorPath)}" "${targetPath}"`, {
encoding: 'utf-8',
timeout: 60000,
stdio: ['pipe', 'pipe', 'pipe'],
});
const envelope = JSON.parse(result);
findings = [];
for (const scanner of Object.values(envelope.scanners || {})) {
if (Array.isArray(scanner.findings)) {
findings.push(...scanner.findings);
}
}
} catch (e) {
console.error(`Orchestrator failed: ${e.message}`);
process.exit(1);
}
}
process.stderr.write(
`[auto-cleaner] ${findings.length} findings loaded. ` +
`Mode: ${dryRun ? 'DRY-RUN' : 'LIVE'}. Target: ${targetPath}\n`
);
// Classify and count tiers
const tiers = { auto: 0, semi_auto: 0, manual: 0, skip: 0 };
for (const f of findings) {
tiers[classifyFinding(f)]++;
}
process.stderr.write(
`[auto-cleaner] Classification: ${tiers.auto} auto, ${tiers.semi_auto} semi-auto, ` +
`${tiers.manual} manual, ${tiers.skip} skip\n`
);
// Apply fixes
const startMs = Date.now();
const { fixes, errors } = await applyFixes(targetPath, findings, dryRun);
const durationMs = Date.now() - startMs;
// Build output envelope
const output = cleanEnvelope(targetPath, dryRun, fixes, errors, durationMs);
// JSON to stdout
process.stdout.write(JSON.stringify(output, null, 2) + '\n');
// Summary to stderr
const s = output.summary;
process.stderr.write(
`\n[auto-cleaner] === COMPLETE ===\n` +
`[auto-cleaner] Applied: ${s.fixes_applied} | Skipped: ${s.fixes_skipped} | ` +
`Failed: ${s.fixes_failed} | Files modified: ${s.files_modified}\n` +
`[auto-cleaner] Duration: ${durationMs}ms\n`
);
process.exit(errors.length > 0 ? 1 : 0);
}
// Only run CLI when executed directly, not when imported for testing
const isMain = process.argv[1] &&
(process.argv[1].endsWith('auto-cleaner.mjs') || process.argv[1] === new URL(import.meta.url).pathname);
if (isMain) {
main().catch(err => {
console.error(`Fatal error: ${err.message}`);
process.exit(1);
});
}
// Export for testing
export { classifyFinding, FIX_OPS, opsForFinding, applyFixes };