#!/usr/bin/env node // scan-orchestrator.mjs — Entry point for deterministic deep-scan // Single Node.js process. Imports all 7 scanners, runs them sequentially, // shares file discovery, outputs JSON envelope to stdout. // Zero external dependencies. import { resolve, join, dirname } from 'node:path'; import { existsSync, readFileSync, writeFileSync, appendFileSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { tmpdir } from 'node:os'; import { discoverFiles } from './lib/file-discovery.mjs'; import { envelope, resetCounter } from './lib/output.mjs'; import { saveBaseline, diffAgainstBaseline, extractFindings } from './lib/diff-engine.mjs'; import { toSARIF } from './lib/sarif-formatter.mjs'; import { loadPolicy } from './lib/policy-loader.mjs'; const FAIL_ON_LEVELS = ['critical', 'high', 'medium', 'low']; // --------------------------------------------------------------------------- // .llm-security-ignore support // Format: one rule per line. Blank lines and # comments ignored. // SCANNER:glob — ignore findings from SCANNER matching file glob // glob — ignore findings from ALL scanners matching file glob // Globs use minimatch-style: * matches within path segment, ** across segments. // --------------------------------------------------------------------------- function loadIgnoreRules(targetPath) { const ignoreFile = join(targetPath, '.llm-security-ignore'); if (!existsSync(ignoreFile)) return []; const lines = readFileSync(ignoreFile, 'utf8').split('\n'); const rules = []; for (const raw of lines) { const line = raw.trim(); if (!line || line.startsWith('#')) continue; const colonIdx = line.indexOf(':'); // Check if before colon is a known scanner prefix (3 uppercase letters) if (colonIdx > 0 && colonIdx <= 3 && /^[A-Z]+$/.test(line.slice(0, colonIdx))) { rules.push({ scanner: line.slice(0, colonIdx), pattern: line.slice(colonIdx + 1) }); } else { rules.push({ scanner: null, pattern: line }); } } return rules; } function globToRegex(glob) { let regex = '^'; let i = 0; while (i < glob.length) { const c = glob[i]; if (c === '*' && glob[i + 1] === '*') { regex += '.*'; i += 2; if (glob[i] === '/') i++; // skip trailing slash after ** } else if (c === '*') { regex += '[^/]*'; i++; } else if (c === '?') { regex += '[^/]'; i++; } else if ('.+^${}()|[]\\'.includes(c)) { regex += '\\' + c; i++; } else { regex += c; i++; } } regex += '$'; return new RegExp(regex); } function applyIgnoreRules(scannerResults, rules) { if (rules.length === 0) return 0; const compiled = rules.map(r => ({ scanner: r.scanner, regex: globToRegex(r.pattern) })); let suppressed = 0; for (const [name, result] of Object.entries(scannerResults)) { const before = result.findings.length; result.findings = result.findings.filter(f => { const file = f.file || ''; const findingPrefix = f.scanner || name.toUpperCase().slice(0, 3); for (const rule of compiled) { if (rule.scanner && rule.scanner !== findingPrefix) continue; if (rule.regex.test(file)) return false; } return true; }); const removed = before - result.findings.length; suppressed += removed; // Recount severities if (removed > 0) { result.counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }; for (const f of result.findings) { result.counts[f.severity] = (result.counts[f.severity] || 0) + 1; } } } return suppressed; } // Import all scanners import { scan as unicodeScan } from './unicode-scanner.mjs'; import { scan as entropyScan } from './entropy-scanner.mjs'; import { scan as permissionScan } from './permission-mapper.mjs'; import { scan as depScan } from './dep-auditor.mjs'; import { scan as taintScan } from './taint-tracer.mjs'; import { scan as gitScan } from './git-forensics.mjs'; import { scan as networkScan } from './network-mapper.mjs'; import { scan as memoryScan } from './memory-poisoning-scanner.mjs'; import { scan as supplyChainScan } from './supply-chain-recheck.mjs'; import { scan as workflowScan } from './workflow-scanner.mjs'; import { scan as tfaScan } from './toxic-flow-analyzer.mjs'; const SCANNERS = [ { name: 'unicode', fn: unicodeScan }, { name: 'entropy', fn: entropyScan }, { name: 'permission', fn: permissionScan }, { name: 'dep', fn: depScan }, { name: 'taint', fn: taintScan }, { name: 'git', fn: gitScan }, { name: 'network', fn: networkScan }, { name: 'memory', fn: memoryScan }, { name: 'supply-chain', fn: supplyChainScan }, { name: 'workflow', fn: workflowScan }, { name: 'toxic-flow', fn: tfaScan, requiresPriorResults: true }, ]; // --------------------------------------------------------------------------- // CLI arg parsing — supports --log-file // --------------------------------------------------------------------------- function parseArgs(argv) { const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false, format: 'json', failOn: null, compact: false }; for (let i = 2; i < argv.length; i++) { if (argv[i] === '--log-file' && argv[i + 1]) { args.logFile = argv[++i]; } else if (argv[i] === '--output-file' && argv[i + 1]) { args.outputFile = argv[++i]; } else if (argv[i] === '--format' && argv[i + 1]) { args.format = argv[++i]; } else if (argv[i] === '--baseline') { args.baseline = true; } else if (argv[i] === '--save-baseline') { args.saveBaseline = true; } else if (argv[i] === '--fail-on' && argv[i + 1]) { args.failOn = argv[++i].toLowerCase(); } else if (argv[i] === '--compact') { args.compact = true; } else if (!args.target) { args.target = argv[i]; } } return args; } async function main() { const args = parseArgs(process.argv); // Policy fallback for CI settings (CLI args take precedence) try { const policyRoot = args.target ? resolve(args.target) : process.cwd(); const policy = loadPolicy(policyRoot); if (args.failOn === null && policy.ci && policy.ci.failOn) { args.failOn = policy.ci.failOn; } if (!args.compact && policy.ci && policy.ci.compact) { args.compact = true; } } catch { /* policy loading is best-effort */ } // Validate --fail-on value if (args.failOn !== null && !FAIL_ON_LEVELS.includes(args.failOn)) { console.error(`--fail-on must be one of: ${FAIL_ON_LEVELS.join(', ')} (got: ${args.failOn})`); process.exit(1); } if (!args.target) { console.error('Usage: node scan-orchestrator.mjs [--log-file ]'); process.exit(1); } const targetPath = resolve(args.target); if (!existsSync(targetPath)) { console.error(`Target path does not exist: ${targetPath}`); process.exit(1); } // Set up cross-platform log file (writes to both stderr and file) const logFilePath = args.logFile || join(tmpdir(), `llm-security-scan-${Date.now()}.log`); writeFileSync(logFilePath, ''); // create/truncate function log(msg) { process.stderr.write(msg); appendFileSync(logFilePath, msg); } const totalStart = Date.now(); // Shared file discovery — done once, passed to all scanners let discovery; try { discovery = await discoverFiles(targetPath); // Log discovery summary to stderr (stdout is reserved for JSON) log( `[deep-scan] Discovered ${discovery.files.length} files` + ` (${discovery.skipped} skipped${discovery.truncated ? ', TRUNCATED' : ''})\n` ); } catch (err) { console.error(`File discovery failed: ${err.message}`); process.exit(1); } // Run each scanner sequentially, catching errors per-scanner. // Scanners with requiresPriorResults receive accumulated results as 3rd arg. const results = {}; for (const { name, fn, requiresPriorResults } of SCANNERS) { resetCounter(); // Reset finding counter per scanner for clean IDs log(`[deep-scan] Running ${name} scanner...\n`); try { results[name] = requiresPriorResults ? await fn(targetPath, discovery, results) : await fn(targetPath, discovery); const r = results[name]; log( `[deep-scan] ${name}: ${r.status} — ${r.findings.length} findings in ${r.duration_ms}ms\n` ); } catch (err) { results[name] = { scanner: `${name}-scanner`, status: 'error', files_scanned: 0, duration_ms: 0, findings: [], counts: { critical: 0, high: 0, medium: 0, low: 0, info: 0 }, error: err.message, }; log(`[deep-scan] ${name}: ERROR — ${err.message}\n`); } } // Apply .llm-security-ignore rules const ignoreRules = loadIgnoreRules(targetPath); const suppressed = applyIgnoreRules(results, ignoreRules); if (suppressed > 0) { log(`[deep-scan] Suppressed ${suppressed} finding(s) via .llm-security-ignore\n`); } const totalDuration = Date.now() - totalStart; const output = envelope(targetPath, results, totalDuration); if (suppressed > 0) output.suppressed = suppressed; // Include log file path in JSON output (cross-platform — no shell redirect needed) output.log_file = logFilePath; // --------------------------------------------------------------------------- // Baseline diffing — compare against stored baseline and/or save new one // --------------------------------------------------------------------------- const pluginRoot = dirname(dirname(fileURLToPath(import.meta.url))); const baselinesDir = join(pluginRoot, 'reports', 'baselines'); if (args.baseline) { const diff = diffAgainstBaseline(baselinesDir, targetPath, output); if (diff) { output.diff = diff; log( `[deep-scan] Baseline diff: ${diff.summary.new} new, ${diff.summary.resolved} resolved, ` + `${diff.summary.unchanged} unchanged, ${diff.summary.moved} moved ` + `(baseline from ${diff.summary.baseline_timestamp})\n` ); } else { log(`[deep-scan] No baseline found for this target. Use --save-baseline to create one.\n`); output.diff = null; } } if (args.saveBaseline) { const savedPath = saveBaseline(baselinesDir, targetPath, output); output.baseline_saved = savedPath; log(`[deep-scan] Baseline saved: ${savedPath}\n`); } // Output: SARIF or JSON, to file (--output-file) or stdout const finalOutput = args.format === 'sarif' ? toSARIF(output) : output; const jsonStr = JSON.stringify(finalOutput, null, 2) + '\n'; if (args.outputFile) { writeFileSync(args.outputFile, jsonStr); output.output_file = args.outputFile; if (args.compact) { for (const r of Object.values(results)) { for (const f of r.findings) { const loc = f.file ? ` (${f.file}${f.line ? ':' + f.line : ''})` : ''; process.stderr.write(`[${f.severity.toUpperCase()}] ${f.scanner}: ${f.title}${loc}\n`); } } } process.stdout.write(JSON.stringify({ aggregate: output.aggregate, output_file: args.outputFile }) + '\n'); } else if (args.compact && args.format === 'json') { for (const r of Object.values(results)) { for (const f of r.findings) { const loc = f.file ? ` (${f.file}${f.line ? ':' + f.line : ''})` : ''; process.stdout.write(`[${f.severity.toUpperCase()}] ${f.scanner}: ${f.title}${loc}\n`); } } const a = output.aggregate; process.stdout.write( `---\nVerdict: ${a.verdict} | Risk: ${a.risk_score}/100 | ` + `Findings: ${a.total_findings} (${a.counts.critical}C ${a.counts.high}H ${a.counts.medium}M ${a.counts.low}L ${a.counts.info}I) | ` + `Duration: ${totalDuration}ms\n` ); } else { process.stdout.write(jsonStr); } // Summary banner to stderr + log file const agg = output.aggregate; log( `\n[deep-scan] === COMPLETE ===\n` + `[deep-scan] Verdict: ${agg.verdict} | Risk Score: ${agg.risk_score}/100\n` + `[deep-scan] Findings: ${agg.total_findings} total ` + `(${agg.counts.critical}C ${agg.counts.high}H ${agg.counts.medium}M ${agg.counts.low}L ${agg.counts.info}I)\n` + `[deep-scan] Scanners: ${agg.scanners_ok} ok, ${agg.scanners_error} error, ${agg.scanners_skipped} skipped\n` + `[deep-scan] Duration: ${totalDuration}ms\n` ); // Exit code — use exitCode instead of exit() to allow stdout pipe buffers // to drain fully (process.exit() truncates >64KB on macOS) if (args.failOn !== null) { const threshold = FAIL_ON_LEVELS.indexOf(args.failOn); const exceeded = FAIL_ON_LEVELS.slice(0, threshold + 1) .some(sev => (agg.counts[sev] || 0) > 0); process.exitCode = exceeded ? 1 : 0; } else { if (agg.verdict === 'BLOCK') process.exitCode = 2; else if (agg.verdict === 'WARNING') process.exitCode = 1; else process.exitCode = 0; } } main().catch(err => { console.error(`Fatal error: ${err.message}`); process.exit(1); });