ktg-plugin-marketplace/plugins/llm-security/scanners/auto-cleaner.mjs

#!/usr/bin/env node
// auto-cleaner.mjs — Deterministic remediation engine for security findings
// Zero external dependencies. Reuses scanners/lib/ shared library.
//
// CLI: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]
//
// Fix operations are pure functions (content in → content out).
// Atomic writes: write to .clean-tmp, validate, rename over original.
// Content-based matching (not line-number based) for robustness.

import { readFile, writeFile, rename, unlink, stat } from 'node:fs/promises';
import { writeFileSync, unlinkSync } from 'node:fs';
import { resolve, extname, join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { execSync } from 'node:child_process';
import { fixResult, cleanEnvelope } from './lib/output.mjs';

// ---------------------------------------------------------------------------
// Classification: finding → tier
// ---------------------------------------------------------------------------

/**
 * Classify a finding into a remediation tier.
 * @param {object} finding - Scanner finding object
 * @returns {'auto'|'semi_auto'|'manual'|'skip'}
 */
function classifyFinding(f) {
  const s = f.scanner || '';
  const title = (f.title || '').toLowerCase();
  const desc = (f.description || '').toLowerCase();
  const file = (f.file || '').toLowerCase();
  const combined = `${title} ${desc}`;

  // --- UNI findings ---
  if (s === 'UNI') {
    if (title.includes('zero-width')) return 'auto';
    if (title.includes('unicode tag') || title.includes('steganograph')) return 'auto';
    if (title.includes('bidi')) return 'auto';
    if (title.includes('homoglyph')) {
      // Code files → auto, markdown → semi_auto
      const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
      return codeExts.some(ext => file.endsWith(ext)) ? 'auto' : 'semi_auto';
    }
    return 'semi_auto';
  }

  // --- ENT findings ---
  if (s === 'ENT') return 'semi_auto';

  // --- PRM findings ---
  if (s === 'PRM') {
    if (title.includes('haiku') && combined.includes('sensitive')) return 'auto';
    if (title.includes('ghost hook') || combined.includes('script not found')) return 'semi_auto';
    if (combined.includes('read-only') && combined.includes('write')) return 'semi_auto';
    if (combined.includes('dangerous') && combined.includes('triple')) return 'semi_auto';
    return 'manual';
  }

  // --- DEP findings ---
  if (s === 'DEP') {
    if (combined.includes('cve') && !combined.includes('fix available')) return 'manual';
    return 'semi_auto';
  }

  // --- TNT findings ---
  if (s === 'TNT') return 'manual';

  // --- GIT findings ---
  if (s === 'GIT') {
    if (combined.includes('suspicious domain') && combined.includes('post-commit')) return 'auto';
    if (combined.includes('hook') && combined.includes('network')) return 'semi_auto';
    return 'skip';
  }

  // --- NET findings ---
  if (s === 'NET') {
    if (f.severity === 'high' && combined.includes('suspicious')) return 'auto';
    if (combined.includes('loopback') || combined.includes('127.0.0.1')) return 'auto';
    if (combined.includes('ip-based url') && f.severity !== 'info') return 'semi_auto';
    if (f.severity === 'info') return 'manual';
    return 'semi_auto';
  }

  // --- LLM-detected findings (from skill-scanner-agent) ---
  if (s === 'SKL' || s === 'MCP') {
    if (combined.includes('html comment injection') || combined.includes('<!-- agent')) return 'auto';
    if (combined.includes('system:') && combined.includes('header')) return 'auto';
    if (combined.includes('persistence') || combined.includes('cron') ||
        combined.includes('launchagent') || combined.includes('zshrc')) return 'auto';
    if (combined.includes('privilege escalation') || combined.includes('hooks.json') ||
        combined.includes('settings.json')) return 'auto';
    if (combined.includes('registry') && combined.includes('redirect')) return 'auto';
    if (combined.includes('injection') && combined.includes('frontmatter')) return 'auto';
    if (combined.includes('exfiltration') || combined.includes('suspicious')) return 'auto';
    if (combined.includes('credential') && combined.includes('env')) return 'auto';
    if (combined.includes('self-modif') || combined.includes('self-update')) return 'auto';
    if (combined.includes('credential access')) return 'semi_auto';
    if (combined.includes('unannounced') && combined.includes('install')) return 'semi_auto';
    if (combined.includes('hidden directive')) return 'semi_auto';
    return 'manual';
  }

  return 'manual';
}

// ---------------------------------------------------------------------------
// Fix operations — pure functions: content in → content out
// ---------------------------------------------------------------------------

/** Zero-width characters to strip (preserve BOM at pos 0) */
const ZERO_WIDTH = new Set([0x200B, 0x200C, 0x200D, 0xFEFF, 0x00AD]);

/** Unicode Tags block U+E0001–U+E007F */
const TAG_START = 0xE0001;
const TAG_END = 0xE007F;

/** BIDI control codepoints */
const BIDI = new Set([0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x2066, 0x2067, 0x2068, 0x2069]);

/** Cyrillic → Latin confusable mapping */
const CYRILLIC_TO_LATIN = new Map([
  [0x0430, 'a'], [0x0435, 'e'], [0x043E, 'o'], [0x0441, 'c'],
  [0x0440, 'p'], [0x0443, 'y'], [0x0445, 'x'],
  [0x0410, 'A'], [0x0415, 'E'], [0x041E, 'O'], [0x0421, 'C'],
  [0x0420, 'P'], [0x0425, 'X'],
]);

/** Suspicious domains to strip */
const EXFIL_DOMAINS = [
  'webhook.site', 'ngrok', 'requestbin', 'pipedream.net',
  'pastebin.com', 'bit.ly', 'hookbin.com', 'beeceptor.com',
  'smee.io', 'transfer.sh', 'file.io', 'paste.ee', 'hastebin.com',
  'tinyurl.com', 'is.gd', 'goo.gl', 'cutt.ly',
];

/**
 * Strip zero-width characters from content.
 * Preserves BOM at position 0.
 */
function stripZeroWidth(content) {
  const lines = content.split('\n');
  const result = [];
  let changed = false;

  for (let i = 0; i < lines.length; i++) {
    let line = lines[i];
    let newLine = '';
    let pos = 0;

    for (const char of line) {
      const cp = char.codePointAt(0);
      // Preserve BOM (U+FEFF) only at file start (line 0, pos 0)
      if (ZERO_WIDTH.has(cp) && !(cp === 0xFEFF && i === 0 && pos === 0)) {
        changed = true;
      } else {
        newLine += char;
      }
      pos += char.length;
    }

    // Keep the line (even if empty after stripping — preserves structure)
    result.push(newLine);
  }

  return changed ? result.join('\n') : null;
}

/**
 * Strip Unicode Tag block codepoints (steganography).
 */
function stripUnicodeTags(content) {
  let changed = false;
  const result = [...content].filter(char => {
    const cp = char.codePointAt(0);
    if (cp >= TAG_START && cp <= TAG_END) {
      changed = true;
      return false;
    }
    return true;
  }).join('');
  return changed ? result : null;
}

/**
 * Strip BIDI override characters.
 */
function stripBidi(content) {
  let changed = false;
  const result = [...content].filter(char => {
    const cp = char.codePointAt(0);
    if (BIDI.has(cp)) {
      changed = true;
      return false;
    }
    return true;
  }).join('');
  return changed ? result : null;
}

/**
 * Normalize Cyrillic confusables to Latin equivalents.
 * Only applied to code files (.js, .ts, .mjs, .py, etc.)
 */
function normalizeHomoglyphs(content) {
  let changed = false;
  const result = [...content].map(char => {
    const cp = char.codePointAt(0);
    const latin = CYRILLIC_TO_LATIN.get(cp);
    if (latin) {
      changed = true;
      return latin;
    }
    return char;
  }).join('');
  return changed ? result : null;
}

/**
 * Strip HTML comment injections (<!-- AGENT:..., <!-- HIDDEN:..., <!-- SYSTEM:...).
 */
function stripHtmlCommentInjections(content) {
  const pattern = /<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g;
  const result = content.replace(pattern, '');
  return result !== content ? result : null;
}

/**
 * Strip spoofed "# SYSTEM:" headers (not inside code fences).
 */
function stripSystemHeaders(content) {
  const lines = content.split('\n');
  const result = [];
  let inCodeFence = false;
  let changed = false;

  for (const line of lines) {
    if (line.trimStart().startsWith('```')) {
      inCodeFence = !inCodeFence;
    }
    if (!inCodeFence && /^#\s*SYSTEM\s*:/i.test(line)) {
      changed = true;
      continue; // Remove this line
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Strip persistence mechanism code blocks (crontab, LaunchAgent, systemctl, zshrc writes).
 */
function stripPersistence(content) {
  const lines = content.split('\n');
  const result = [];
  let inMaliciousBlock = false;
  let inCodeFence = false;
  let changed = false;

  const PERSISTENCE_PATTERNS = [
    /crontab\s+-/,
    /LaunchAgent/i,
    /systemctl\s+(enable|start|restart)/,
    />>?\s*~\/\.(?:zshrc|bashrc|profile|bash_profile)/,
    /Library\/LaunchAgents/,
  ];

  for (const line of lines) {
    const trimmed = line.trimStart();

    if (trimmed.startsWith('```')) {
      if (!inCodeFence) {
        inCodeFence = true;
        // Check if next lines contain persistence patterns
        result.push(line);
        continue;
      } else {
        inCodeFence = false;
        if (inMaliciousBlock) {
          inMaliciousBlock = false;
          changed = true;
          continue; // Skip the closing ```
        }
        result.push(line);
        continue;
      }
    }

    if (inCodeFence && !inMaliciousBlock) {
      if (PERSISTENCE_PATTERNS.some(p => p.test(line))) {
        inMaliciousBlock = true;
        changed = true;
        // Remove the opening ``` we already pushed
        result.pop();
        continue;
      }
    }

    if (inMaliciousBlock) {
      continue; // Skip lines inside malicious code block
    }

    // Also catch inline persistence commands outside code fences
    if (!inCodeFence && PERSISTENCE_PATTERNS.some(p => p.test(line))) {
      changed = true;
      continue;
    }

    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Strip privilege escalation writes (to hooks.json, settings.json, CLAUDE.md).
 */
function stripEscalation(content) {
  const ESCALATION_TARGETS = [
    /hooks\/hooks\.json/,
    /~\/\.claude\/settings\.json/,
    /\.claude\/settings\.json/,
    /CLAUDE\.md/i,
  ];

  const lines = content.split('\n');
  const result = [];
  let changed = false;

  for (const line of lines) {
    if (ESCALATION_TARGETS.some(p => p.test(line)) &&
        (/modif|write|update|overwrite|create|set|add|push|insert|append|config/i.test(line))) {
      changed = true;
      continue;
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Strip non-standard registry redirections (npm config set registry, --index-url).
 */
function stripRegistryRedirect(content) {
  const patterns = [
    /npm\s+config\s+set\s+registry\s+(?!https:\/\/registry\.npmjs\.org)/,
    /--index-url\s+(?!https:\/\/pypi\.org)/,
    /--extra-index-url\s+https?:\/\/(?!pypi\.org)/,
  ];

  const lines = content.split('\n');
  const result = [];
  let changed = false;

  for (const line of lines) {
    if (patterns.some(p => p.test(line))) {
      changed = true;
      continue;
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Strip lines containing suspicious exfiltration domain URLs.
 */
function stripSuspiciousUrls(content) {
  const lines = content.split('\n');
  const result = [];
  let changed = false;

  for (const line of lines) {
    const lower = line.toLowerCase();
    if (EXFIL_DOMAINS.some(d => lower.includes(d)) && /https?:\/\//.test(line)) {
      changed = true;
      continue;
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Normalize loopback IPs to localhost.
 */
function normalizeLoopback(content) {
  const pattern = /http:\/\/127\.0\.0\.1/g;
  const result = content.replace(pattern, 'http://localhost');
  return result !== content ? result : null;
}

/**
 * Upgrade haiku model to sonnet in YAML frontmatter.
 */
function upgradeHaikuModel(content) {
  const fmMatch = content.match(/^(---\r?\n[\s\S]*?\r?\n---)/);
  if (!fmMatch) return null;

  const fm = fmMatch[1];
  const upgraded = fm.replace(/model:\s*haiku/i, 'model: sonnet');
  if (upgraded === fm) return null;

  return content.replace(fm, upgraded);
}

/**
 * Strip injection phrases from frontmatter name/description fields.
 */
function stripInjectionFrontmatter(content) {
  const fmMatch = content.match(/^(---\r?\n)([\s\S]*?)(\r?\n---)/);
  if (!fmMatch) return null;

  const INJECTION_PHRASES = [
    /<!--\s*(?:AGENT|HIDDEN|SYSTEM)\s*:[\s\S]*?-->/g,
    /ignore\s+(?:previous|above|all)\s+instructions/gi,
    /you\s+are\s+now\s+(?:a|an)\s+/gi,
    /override\s+safety\s+constraints/gi,
    /unrestricted\s+(?:diagnostic\s+)?mode/gi,
    /pre-authorized/gi,
    /elevated\s+permissions/gi,
  ];

  let fm = fmMatch[2];
  let changed = false;

  for (const pattern of INJECTION_PHRASES) {
    const cleaned = fm.replace(pattern, '');
    if (cleaned !== fm) {
      fm = cleaned;
      changed = true;
    }
  }

  return changed ? `${fmMatch[1]}${fm}${fmMatch[3]}${content.slice(fmMatch[0].length)}` : null;
}

/**
 * Move MCP credential values from args to env in JSON config.
 */
function moveMcpCredsToEnv(content) {
  let parsed;
  try {
    parsed = JSON.parse(content);
  } catch {
    return null;
  }

  // Look for mcpServers pattern
  const servers = parsed.mcpServers || parsed.mcp_servers;
  if (!servers || typeof servers !== 'object') return null;

  let changed = false;
  const CRED_PATTERNS = [
    /api[_-]?key/i, /secret/i, /token/i, /password/i,
    /credential/i, /auth/i, /bearer/i,
  ];

  for (const [, config] of Object.entries(servers)) {
    const args = config.args;
    if (!Array.isArray(args)) continue;

    if (!config.env) config.env = {};

    for (let i = args.length - 1; i >= 0; i--) {
      const arg = String(args[i]);
      if (CRED_PATTERNS.some(p => p.test(arg))) {
        // If the arg looks like a key=value pair or the next arg is the value
        const envKey = arg.replace(/[^A-Z0-9_]/gi, '_').toUpperCase();
        if (i + 1 < args.length) {
          config.env[envKey] = String(args[i + 1]);
          args.splice(i, 2);
        } else {
          config.env[envKey] = arg;
          args.splice(i, 1);
        }
        changed = true;
      }
    }
  }

  return changed ? JSON.stringify(parsed, null, 2) : null;
}

/**
 * Strip writeFile calls targeting MCP/Claude config paths.
 */
function stripSelfModification(content) {
  const lines = content.split('\n');
  const result = [];
  let changed = false;

  const SELF_MOD_PATTERNS = [
    /writeFile.*\.claude/i,
    /writeFile.*hooks\.json/i,
    /writeFile.*settings\.json/i,
    /writeFile.*\.mcp\.json/i,
    /writeFile.*plugin\.json/i,
    /fs\.write.*\.claude/i,
    /fs\.write.*hooks\.json/i,
  ];

  for (const line of lines) {
    if (SELF_MOD_PATTERNS.some(p => p.test(line))) {
      changed = true;
      continue;
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

/**
 * Strip npm/pip/git self-update code blocks.
 */
function stripSelfUpdate(content) {
  const lines = content.split('\n');
  const result = [];
  let inSelfUpdate = false;
  let changed = false;

  const SELF_UPDATE = [
    /npm\s+(install|update)\s+(-g\s+)?.*self/i,
    /pip\s+install\s+--upgrade\s+.*self/i,
    /git\s+pull\s+.*origin/i,
    /curl.*\|\s*(sh|bash)/,
    /wget.*\|\s*(sh|bash)/,
  ];

  for (const line of lines) {
    const trimmed = line.trimStart();
    if (trimmed.startsWith('```') && inSelfUpdate) {
      inSelfUpdate = false;
      changed = true;
      continue;
    }
    if (inSelfUpdate) continue;

    if (SELF_UPDATE.some(p => p.test(line))) {
      // If inside a code fence, mark block for removal
      const lastLine = result[result.length - 1] || '';
      if (lastLine.trimStart().startsWith('```')) {
        result.pop(); // Remove the opening ```
        inSelfUpdate = true;
      }
      changed = true;
      continue;
    }
    result.push(line);
  }
  return changed ? result.join('\n') : null;
}

// ---------------------------------------------------------------------------
// Fix operation registry
// ---------------------------------------------------------------------------

/** Map of operation names → fix functions + metadata */
const FIX_OPS = {
  strip_zero_width: {
    fn: stripZeroWidth,
    desc: 'Remove zero-width invisible characters',
  },
  strip_unicode_tags: {
    fn: stripUnicodeTags,
    desc: 'Remove Unicode Tag steganography codepoints',
  },
  strip_bidi: {
    fn: stripBidi,
    desc: 'Remove BIDI override characters',
  },
  normalize_homoglyphs: {
    fn: normalizeHomoglyphs,
    desc: 'Normalize Cyrillic confusables to Latin equivalents',
    codeOnly: true,
  },
  strip_html_comment_injections: {
    fn: stripHtmlCommentInjections,
    desc: 'Remove <!-- AGENT/HIDDEN/SYSTEM --> comment injections',
  },
  strip_system_headers: {
    fn: stripSystemHeaders,
    desc: 'Remove spoofed # SYSTEM: headers',
  },
  strip_persistence: {
    fn: stripPersistence,
    desc: 'Remove persistence mechanisms (crontab, LaunchAgent, zshrc)',
  },
  strip_escalation: {
    fn: stripEscalation,
    desc: 'Remove privilege escalation writes to hooks/settings',
  },
  strip_registry_redirect: {
    fn: stripRegistryRedirect,
    desc: 'Remove non-standard package registry redirections',
  },
  strip_suspicious_urls: {
    fn: stripSuspiciousUrls,
    desc: 'Remove lines with suspicious exfiltration domain URLs',
  },
  normalize_loopback: {
    fn: normalizeLoopback,
    desc: 'Replace 127.0.0.1 with localhost',
  },
  upgrade_haiku_model: {
    fn: upgradeHaikuModel,
    desc: 'Upgrade model: haiku to model: sonnet in frontmatter',
  },
  strip_injection_frontmatter: {
    fn: stripInjectionFrontmatter,
    desc: 'Remove injection phrases from frontmatter fields',
  },
  move_mcp_creds_to_env: {
    fn: moveMcpCredsToEnv,
    desc: 'Move credentials from MCP args to env block',
  },
  strip_self_modification: {
    fn: stripSelfModification,
    desc: 'Remove writeFile calls targeting config paths',
  },
  strip_self_update: {
    fn: stripSelfUpdate,
    desc: 'Remove self-update mechanisms (pipe-to-shell, etc.)',
  },
};

// ---------------------------------------------------------------------------
// Finding → fix operation mapping
// ---------------------------------------------------------------------------

/**
 * Determine which fix operations to apply for a given finding.
 * @param {object} f - Finding object
 * @returns {string[]} - Array of operation names from FIX_OPS
 */
function opsForFinding(f) {
  const s = f.scanner || '';
  const title = (f.title || '').toLowerCase();
  const desc = (f.description || '').toLowerCase();
  const combined = `${title} ${desc}`;

  if (s === 'UNI') {
    if (title.includes('zero-width')) return ['strip_zero_width'];
    if (title.includes('unicode tag') || title.includes('steganograph')) return ['strip_unicode_tags'];
    if (title.includes('bidi')) return ['strip_bidi'];
    if (title.includes('homoglyph')) return ['normalize_homoglyphs'];
  }

  if (s === 'PRM') {
    if (title.includes('haiku')) return ['upgrade_haiku_model'];
  }

  if (s === 'NET' || s === 'GIT') {
    if (combined.includes('suspicious') && combined.includes('domain')) return ['strip_suspicious_urls'];
    if (combined.includes('loopback') || combined.includes('127.0.0.1')) return ['normalize_loopback'];
  }

  // LLM-detected findings
  if (s === 'SKL' || s === 'MCP' || s === '') {
    const ops = [];
    if (combined.includes('html comment injection') || combined.includes('<!-- agent')) {
      ops.push('strip_html_comment_injections');
    }
    if (combined.includes('system:') && combined.includes('header')) {
      ops.push('strip_system_headers');
    }
    if (combined.includes('persistence') || combined.includes('cron') ||
        combined.includes('launchagent') || combined.includes('zshrc')) {
      ops.push('strip_persistence');
    }
    if (combined.includes('privilege escalation') || combined.includes('write to hooks') ||
        combined.includes('write to settings')) {
      ops.push('strip_escalation');
    }
    if (combined.includes('registry') && combined.includes('redirect')) {
      ops.push('strip_registry_redirect');
    }
    if (combined.includes('exfiltration') || combined.includes('suspicious url')) {
      ops.push('strip_suspicious_urls');
    }
    if (combined.includes('injection') && combined.includes('frontmatter')) {
      ops.push('strip_injection_frontmatter');
    }
    if (combined.includes('credential') && combined.includes('env')) {
      ops.push('move_mcp_creds_to_env');
    }
    if (combined.includes('self-modif')) ops.push('strip_self_modification');
    if (combined.includes('self-update')) ops.push('strip_self_update');
    if (ops.length > 0) return ops;
  }

  return [];
}

// ---------------------------------------------------------------------------
// File validation
// ---------------------------------------------------------------------------

/**
 * Validate file content after modification.
 * @param {string} absPath - Absolute file path
 * @param {string} content - Modified content
 * @returns {{ valid: boolean, error?: string }}
 */
function validateContent(absPath, content) {
  const ext = extname(absPath).toLowerCase();

  // JSON files must parse
  if (ext === '.json' || ext === '.jsonc') {
    try {
      JSON.parse(content);
      return { valid: true };
    } catch (e) {
      return { valid: false, error: `JSON parse failed: ${e.message}` };
    }
  }

  // Frontmatter files must start with ---
  if (ext === '.md' || ext === '.mdx') {
    if (content.length > 0 && content.trimStart().startsWith('---')) {
      return { valid: true };
    }
    // .md without frontmatter is also valid (knowledge files, etc.)
    return { valid: true };
  }

  // .mjs files — try node --check (syntax validation)
  // Use correct extension so Node.js ESM detection works
  if (ext === '.mjs' || ext === '.js' || ext === '.cjs') {
    const tmpPath = absPath.replace(/(\.\w+)$/, '.clean-check$1');
    try {
      writeFileSync(tmpPath, content);
      execSync(`node --check "${tmpPath}"`, { stdio: 'pipe', timeout: 5000 });
      unlinkSync(tmpPath);
      return { valid: true };
    } catch (e) {
      try { unlinkSync(tmpPath); } catch { /* ignore */ }
      return { valid: false, error: `Syntax check failed: ${e.message}` };
    }
  }

  // All other files — assume valid
  return { valid: true };
}

// ---------------------------------------------------------------------------
// Core engine: apply fixes to files
// ---------------------------------------------------------------------------

/**
 * Apply all auto-tier fixes to the target.
 * @param {string} targetPath - Absolute target directory
 * @param {object[]} findings - Scanner findings array
 * @param {boolean} dryRun
 * @returns {Promise<{ fixes: object[], errors: object[] }>}
 */
async function applyFixes(targetPath, findings, dryRun) {
  const fixes = [];
  const errors = [];

  // Step 1: Classify findings and filter to auto-tier only
  const autoFindings = findings.filter(f => classifyFinding(f) === 'auto');

  // Step 2: Group by file
  const fileGroups = new Map(); // relPath → { findings: [], absPath: string }
  for (const f of autoFindings) {
    if (!f.file) {
      fixes.push(fixResult({
        finding_id: f.id,
        file: f.file || 'unknown',
        operation: 'skip',
        status: 'skipped',
        description: 'No file path in finding',
      }));
      continue;
    }

    const absPath = resolve(targetPath, f.file);
    if (!fileGroups.has(f.file)) {
      fileGroups.set(f.file, { findings: [], absPath });
    }
    fileGroups.get(f.file).findings.push(f);
  }

  // Step 3: Process each file
  for (const [relPath, group] of fileGroups) {
    let content;
    try {
      content = await readFile(group.absPath, 'utf-8');
    } catch (e) {
      for (const f of group.findings) {
        errors.push({ finding_id: f.id, file: relPath, error: `Cannot read file: ${e.message}` });
      }
      continue;
    }

    const originalContent = content;
    const appliedOps = new Set();

    // Collect all operations for all findings on this file
    for (const f of group.findings) {
      const ops = opsForFinding(f);

      if (ops.length === 0) {
        fixes.push(fixResult({
          finding_id: f.id,
          file: relPath,
          operation: 'unmapped',
          status: 'skipped',
          description: 'No auto-fix operation mapped for this finding type',
        }));
        continue;
      }

      for (const opName of ops) {
        const op = FIX_OPS[opName];
        if (!op) continue;

        // Skip code-only ops on non-code files
        if (op.codeOnly) {
          const ext = extname(group.absPath).toLowerCase();
          const codeExts = ['.js', '.mjs', '.cjs', '.ts', '.mts', '.py', '.jsx', '.tsx'];
          if (!codeExts.includes(ext)) {
            fixes.push(fixResult({
              finding_id: f.id,
              file: relPath,
              operation: opName,
              status: 'skipped',
              description: `${op.desc} — skipped for non-code file`,
            }));
            continue;
          }
        }

        // Apply operation if not already applied to this file
        if (!appliedOps.has(opName)) {
          const result = op.fn(content);
          if (result !== null) {
            content = result;
            appliedOps.add(opName);
          }
        }

        fixes.push(fixResult({
          finding_id: f.id,
          file: relPath,
          operation: opName,
          status: appliedOps.has(opName) ? 'applied' : 'skipped',
          description: appliedOps.has(opName) ? op.desc : `${op.desc} — no change needed`,
        }));
      }
    }

    // Step 4: Write if changed
    if (content !== originalContent) {
      if (dryRun) {
        // In dry-run, mark all as applied but don't write
        continue;
      }

      // Validate before writing
      const validation = validateContent(group.absPath, content);
      if (!validation.valid) {
        // Mark all applied ops as failed
        for (const fix of fixes) {
          if (fix.file === relPath && fix.status === 'applied') {
            fix.status = 'failed';
            fix.error = `Validation failed: ${validation.error}`;
          }
        }
        errors.push({
          finding_id: group.findings[0]?.id,
          file: relPath,
          error: `Post-fix validation failed: ${validation.error}. File not modified.`,
        });
        continue;
      }

      // Atomic write: temp file → rename
      const tmpPath = group.absPath + '.clean-tmp';
      try {
        await writeFile(tmpPath, content, 'utf-8');
        await rename(tmpPath, group.absPath);
      } catch (e) {
        try { await unlink(tmpPath); } catch { /* ignore */ }
        for (const fix of fixes) {
          if (fix.file === relPath && fix.status === 'applied') {
            fix.status = 'failed';
            fix.error = `Write failed: ${e.message}`;
          }
        }
        errors.push({ finding_id: group.findings[0]?.id, file: relPath, error: `Write failed: ${e.message}` });
      }
    }
  }

  // Also report non-auto findings for context
  const nonAutoFindings = findings.filter(f => classifyFinding(f) !== 'auto');
  for (const f of nonAutoFindings) {
    const tier = classifyFinding(f);
    fixes.push(fixResult({
      finding_id: f.id,
      file: f.file || 'unknown',
      operation: `tier:${tier}`,
      status: 'skipped',
      description: `Classified as ${tier} — not auto-fixable`,
    }));
  }

  return { fixes, errors };
}

// ---------------------------------------------------------------------------
// CLI entry point
// ---------------------------------------------------------------------------

async function main() {
  const args = process.argv.slice(2);

  // Parse arguments
  let targetArg = null;
  let findingsPath = null;
  let dryRun = false;

  for (let i = 0; i < args.length; i++) {
    if (args[i] === '--findings' && i + 1 < args.length) {
      findingsPath = args[++i];
    } else if (args[i] === '--dry-run') {
      dryRun = true;
    } else if (!targetArg) {
      targetArg = args[i];
    }
  }

  if (!targetArg) {
    console.error('Usage: node auto-cleaner.mjs <target> --findings <json-file> [--dry-run]');
    process.exit(1);
  }

  const targetPath = resolve(targetArg);

  // Read findings JSON
  let findings;
  if (findingsPath) {
    try {
      const raw = await readFile(resolve(findingsPath), 'utf-8');
      const envelope = JSON.parse(raw);
      // Extract findings from scanner envelope format
      findings = [];
      if (envelope.scanners) {
        for (const scanner of Object.values(envelope.scanners)) {
          if (Array.isArray(scanner.findings)) {
            findings.push(...scanner.findings);
          }
        }
      } else if (Array.isArray(envelope.findings)) {
        findings = envelope.findings;
      } else if (Array.isArray(envelope)) {
        findings = envelope;
      }
    } catch (e) {
      console.error(`Failed to read findings file: ${e.message}`);
      process.exit(1);
    }
  } else {
    // If no findings file, run the orchestrator inline
    console.error('[auto-cleaner] No --findings provided. Running scan-orchestrator...');
    try {
      const orchestratorPath = join(dirname(fileURLToPath(import.meta.url)), 'scan-orchestrator.mjs');
      const result = execSync(`node "${resolve(orchestratorPath)}" "${targetPath}"`, {
        encoding: 'utf-8',
        timeout: 60000,
        stdio: ['pipe', 'pipe', 'pipe'],
      });
      const envelope = JSON.parse(result);
      findings = [];
      for (const scanner of Object.values(envelope.scanners || {})) {
        if (Array.isArray(scanner.findings)) {
          findings.push(...scanner.findings);
        }
      }
    } catch (e) {
      console.error(`Orchestrator failed: ${e.message}`);
      process.exit(1);
    }
  }

  process.stderr.write(
    `[auto-cleaner] ${findings.length} findings loaded. ` +
    `Mode: ${dryRun ? 'DRY-RUN' : 'LIVE'}. Target: ${targetPath}\n`
  );

  // Classify and count tiers
  const tiers = { auto: 0, semi_auto: 0, manual: 0, skip: 0 };
  for (const f of findings) {
    tiers[classifyFinding(f)]++;
  }
  process.stderr.write(
    `[auto-cleaner] Classification: ${tiers.auto} auto, ${tiers.semi_auto} semi-auto, ` +
    `${tiers.manual} manual, ${tiers.skip} skip\n`
  );

  // Apply fixes
  const startMs = Date.now();
  const { fixes, errors } = await applyFixes(targetPath, findings, dryRun);
  const durationMs = Date.now() - startMs;

  // Build output envelope
  const output = cleanEnvelope(targetPath, dryRun, fixes, errors, durationMs);

  // JSON to stdout
  process.stdout.write(JSON.stringify(output, null, 2) + '\n');

  // Summary to stderr
  const s = output.summary;
  process.stderr.write(
    `\n[auto-cleaner] === COMPLETE ===\n` +
    `[auto-cleaner] Applied: ${s.fixes_applied} | Skipped: ${s.fixes_skipped} | ` +
    `Failed: ${s.fixes_failed} | Files modified: ${s.files_modified}\n` +
    `[auto-cleaner] Duration: ${durationMs}ms\n`
  );

  process.exit(errors.length > 0 ? 1 : 0);
}

// Only run CLI when executed directly, not when imported for testing
const isMain = process.argv[1] &&
  (process.argv[1].endsWith('auto-cleaner.mjs') || process.argv[1] === new URL(import.meta.url).pathname);

if (isMain) {
  main().catch(err => {
    console.error(`Fatal error: ${err.message}`);
    process.exit(1);
  });
}

// Export for testing
export { classifyFinding, FIX_OPS, opsForFinding, applyFixes };