New sarif-formatter.mjs converts scan envelope to OASIS SARIF 2.1.0 standard. Maps severity to SARIF levels, findings to results with locations and rules. scan-orchestrator accepts --format sarif|json (default: json). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
283 lines
10 KiB
JavaScript
283 lines
10 KiB
JavaScript
#!/usr/bin/env node
|
|
// scan-orchestrator.mjs — Entry point for deterministic deep-scan
|
|
// Single Node.js process. Imports all 7 scanners, runs them sequentially,
|
|
// shares file discovery, outputs JSON envelope to stdout.
|
|
// Zero external dependencies.
|
|
|
|
import { resolve, join, dirname } from 'node:path';
|
|
import { existsSync, readFileSync, writeFileSync, appendFileSync } from 'node:fs';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { tmpdir } from 'node:os';
|
|
import { discoverFiles } from './lib/file-discovery.mjs';
|
|
import { envelope, resetCounter } from './lib/output.mjs';
|
|
import { saveBaseline, diffAgainstBaseline, extractFindings } from './lib/diff-engine.mjs';
|
|
import { toSARIF } from './lib/sarif-formatter.mjs';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// .llm-security-ignore support
|
|
// Format: one rule per line. Blank lines and # comments ignored.
|
|
// SCANNER:glob — ignore findings from SCANNER matching file glob
|
|
// glob — ignore findings from ALL scanners matching file glob
|
|
// Globs use minimatch-style: * matches within path segment, ** across segments.
|
|
// ---------------------------------------------------------------------------
|
|
function loadIgnoreRules(targetPath) {
|
|
const ignoreFile = join(targetPath, '.llm-security-ignore');
|
|
if (!existsSync(ignoreFile)) return [];
|
|
const lines = readFileSync(ignoreFile, 'utf8').split('\n');
|
|
const rules = [];
|
|
for (const raw of lines) {
|
|
const line = raw.trim();
|
|
if (!line || line.startsWith('#')) continue;
|
|
const colonIdx = line.indexOf(':');
|
|
// Check if before colon is a known scanner prefix (3 uppercase letters)
|
|
if (colonIdx > 0 && colonIdx <= 3 && /^[A-Z]+$/.test(line.slice(0, colonIdx))) {
|
|
rules.push({ scanner: line.slice(0, colonIdx), pattern: line.slice(colonIdx + 1) });
|
|
} else {
|
|
rules.push({ scanner: null, pattern: line });
|
|
}
|
|
}
|
|
return rules;
|
|
}
|
|
|
|
function globToRegex(glob) {
|
|
let regex = '^';
|
|
let i = 0;
|
|
while (i < glob.length) {
|
|
const c = glob[i];
|
|
if (c === '*' && glob[i + 1] === '*') {
|
|
regex += '.*';
|
|
i += 2;
|
|
if (glob[i] === '/') i++; // skip trailing slash after **
|
|
} else if (c === '*') {
|
|
regex += '[^/]*';
|
|
i++;
|
|
} else if (c === '?') {
|
|
regex += '[^/]';
|
|
i++;
|
|
} else if ('.+^${}()|[]\\'.includes(c)) {
|
|
regex += '\\' + c;
|
|
i++;
|
|
} else {
|
|
regex += c;
|
|
i++;
|
|
}
|
|
}
|
|
regex += '$';
|
|
return new RegExp(regex);
|
|
}
|
|
|
|
function applyIgnoreRules(scannerResults, rules) {
|
|
if (rules.length === 0) return 0;
|
|
const compiled = rules.map(r => ({ scanner: r.scanner, regex: globToRegex(r.pattern) }));
|
|
let suppressed = 0;
|
|
for (const [name, result] of Object.entries(scannerResults)) {
|
|
const before = result.findings.length;
|
|
result.findings = result.findings.filter(f => {
|
|
const file = f.file || '';
|
|
const findingPrefix = f.scanner || name.toUpperCase().slice(0, 3);
|
|
for (const rule of compiled) {
|
|
if (rule.scanner && rule.scanner !== findingPrefix) continue;
|
|
if (rule.regex.test(file)) return false;
|
|
}
|
|
return true;
|
|
});
|
|
const removed = before - result.findings.length;
|
|
suppressed += removed;
|
|
// Recount severities
|
|
if (removed > 0) {
|
|
result.counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
|
|
for (const f of result.findings) {
|
|
result.counts[f.severity] = (result.counts[f.severity] || 0) + 1;
|
|
}
|
|
}
|
|
}
|
|
return suppressed;
|
|
}
|
|
|
|
// Import all scanners
|
|
import { scan as unicodeScan } from './unicode-scanner.mjs';
|
|
import { scan as entropyScan } from './entropy-scanner.mjs';
|
|
import { scan as permissionScan } from './permission-mapper.mjs';
|
|
import { scan as depScan } from './dep-auditor.mjs';
|
|
import { scan as taintScan } from './taint-tracer.mjs';
|
|
import { scan as gitScan } from './git-forensics.mjs';
|
|
import { scan as networkScan } from './network-mapper.mjs';
|
|
import { scan as memoryScan } from './memory-poisoning-scanner.mjs';
|
|
import { scan as supplyChainScan } from './supply-chain-recheck.mjs';
|
|
import { scan as tfaScan } from './toxic-flow-analyzer.mjs';
|
|
|
|
const SCANNERS = [
|
|
{ name: 'unicode', fn: unicodeScan },
|
|
{ name: 'entropy', fn: entropyScan },
|
|
{ name: 'permission', fn: permissionScan },
|
|
{ name: 'dep', fn: depScan },
|
|
{ name: 'taint', fn: taintScan },
|
|
{ name: 'git', fn: gitScan },
|
|
{ name: 'network', fn: networkScan },
|
|
{ name: 'memory', fn: memoryScan },
|
|
{ name: 'supply-chain', fn: supplyChainScan },
|
|
{ name: 'toxic-flow', fn: tfaScan, requiresPriorResults: true },
|
|
];
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// CLI arg parsing — supports --log-file <path>
|
|
// ---------------------------------------------------------------------------
|
|
function parseArgs(argv) {
|
|
const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false, format: 'json' };
|
|
for (let i = 2; i < argv.length; i++) {
|
|
if (argv[i] === '--log-file' && argv[i + 1]) {
|
|
args.logFile = argv[++i];
|
|
} else if (argv[i] === '--output-file' && argv[i + 1]) {
|
|
args.outputFile = argv[++i];
|
|
} else if (argv[i] === '--format' && argv[i + 1]) {
|
|
args.format = argv[++i];
|
|
} else if (argv[i] === '--baseline') {
|
|
args.baseline = true;
|
|
} else if (argv[i] === '--save-baseline') {
|
|
args.saveBaseline = true;
|
|
} else if (!args.target) {
|
|
args.target = argv[i];
|
|
}
|
|
}
|
|
return args;
|
|
}
|
|
|
|
async function main() {
|
|
const args = parseArgs(process.argv);
|
|
if (!args.target) {
|
|
console.error('Usage: node scan-orchestrator.mjs <target-path> [--log-file <path>]');
|
|
process.exit(1);
|
|
}
|
|
|
|
const targetPath = resolve(args.target);
|
|
if (!existsSync(targetPath)) {
|
|
console.error(`Target path does not exist: ${targetPath}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Set up cross-platform log file (writes to both stderr and file)
|
|
const logFilePath = args.logFile || join(tmpdir(), `llm-security-scan-${Date.now()}.log`);
|
|
writeFileSync(logFilePath, ''); // create/truncate
|
|
function log(msg) {
|
|
process.stderr.write(msg);
|
|
appendFileSync(logFilePath, msg);
|
|
}
|
|
|
|
const totalStart = Date.now();
|
|
|
|
// Shared file discovery — done once, passed to all scanners
|
|
let discovery;
|
|
try {
|
|
discovery = await discoverFiles(targetPath);
|
|
// Log discovery summary to stderr (stdout is reserved for JSON)
|
|
log(
|
|
`[deep-scan] Discovered ${discovery.files.length} files` +
|
|
` (${discovery.skipped} skipped${discovery.truncated ? ', TRUNCATED' : ''})\n`
|
|
);
|
|
} catch (err) {
|
|
console.error(`File discovery failed: ${err.message}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Run each scanner sequentially, catching errors per-scanner.
|
|
// Scanners with requiresPriorResults receive accumulated results as 3rd arg.
|
|
const results = {};
|
|
for (const { name, fn, requiresPriorResults } of SCANNERS) {
|
|
resetCounter(); // Reset finding counter per scanner for clean IDs
|
|
log(`[deep-scan] Running ${name} scanner...\n`);
|
|
try {
|
|
results[name] = requiresPriorResults
|
|
? await fn(targetPath, discovery, results)
|
|
: await fn(targetPath, discovery);
|
|
const r = results[name];
|
|
log(
|
|
`[deep-scan] ${name}: ${r.status} — ${r.findings.length} findings in ${r.duration_ms}ms\n`
|
|
);
|
|
} catch (err) {
|
|
results[name] = {
|
|
scanner: `${name}-scanner`,
|
|
status: 'error',
|
|
files_scanned: 0,
|
|
duration_ms: 0,
|
|
findings: [],
|
|
counts: { critical: 0, high: 0, medium: 0, low: 0, info: 0 },
|
|
error: err.message,
|
|
};
|
|
log(`[deep-scan] ${name}: ERROR — ${err.message}\n`);
|
|
}
|
|
}
|
|
|
|
// Apply .llm-security-ignore rules
|
|
const ignoreRules = loadIgnoreRules(targetPath);
|
|
const suppressed = applyIgnoreRules(results, ignoreRules);
|
|
if (suppressed > 0) {
|
|
log(`[deep-scan] Suppressed ${suppressed} finding(s) via .llm-security-ignore\n`);
|
|
}
|
|
|
|
const totalDuration = Date.now() - totalStart;
|
|
const output = envelope(targetPath, results, totalDuration);
|
|
if (suppressed > 0) output.suppressed = suppressed;
|
|
|
|
// Include log file path in JSON output (cross-platform — no shell redirect needed)
|
|
output.log_file = logFilePath;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Baseline diffing — compare against stored baseline and/or save new one
|
|
// ---------------------------------------------------------------------------
|
|
const pluginRoot = dirname(dirname(fileURLToPath(import.meta.url)));
|
|
const baselinesDir = join(pluginRoot, 'reports', 'baselines');
|
|
|
|
if (args.baseline) {
|
|
const diff = diffAgainstBaseline(baselinesDir, targetPath, output);
|
|
if (diff) {
|
|
output.diff = diff;
|
|
log(
|
|
`[deep-scan] Baseline diff: ${diff.summary.new} new, ${diff.summary.resolved} resolved, ` +
|
|
`${diff.summary.unchanged} unchanged, ${diff.summary.moved} moved ` +
|
|
`(baseline from ${diff.summary.baseline_timestamp})\n`
|
|
);
|
|
} else {
|
|
log(`[deep-scan] No baseline found for this target. Use --save-baseline to create one.\n`);
|
|
output.diff = null;
|
|
}
|
|
}
|
|
|
|
if (args.saveBaseline) {
|
|
const savedPath = saveBaseline(baselinesDir, targetPath, output);
|
|
output.baseline_saved = savedPath;
|
|
log(`[deep-scan] Baseline saved: ${savedPath}\n`);
|
|
}
|
|
|
|
// Output: SARIF or JSON, to file (--output-file) or stdout
|
|
const finalOutput = args.format === 'sarif' ? toSARIF(output) : output;
|
|
const jsonStr = JSON.stringify(finalOutput, null, 2) + '\n';
|
|
if (args.outputFile) {
|
|
writeFileSync(args.outputFile, jsonStr);
|
|
output.output_file = args.outputFile;
|
|
// Stdout gets only the compact aggregate (keeps caller context small)
|
|
process.stdout.write(JSON.stringify({ aggregate: output.aggregate, output_file: args.outputFile }) + '\n');
|
|
} else {
|
|
process.stdout.write(jsonStr);
|
|
}
|
|
|
|
// Summary banner to stderr + log file
|
|
const agg = output.aggregate;
|
|
log(
|
|
`\n[deep-scan] === COMPLETE ===\n` +
|
|
`[deep-scan] Verdict: ${agg.verdict} | Risk Score: ${agg.risk_score}/100\n` +
|
|
`[deep-scan] Findings: ${agg.total_findings} total ` +
|
|
`(${agg.counts.critical}C ${agg.counts.high}H ${agg.counts.medium}M ${agg.counts.low}L ${agg.counts.info}I)\n` +
|
|
`[deep-scan] Scanners: ${agg.scanners_ok} ok, ${agg.scanners_error} error, ${agg.scanners_skipped} skipped\n` +
|
|
`[deep-scan] Duration: ${totalDuration}ms\n`
|
|
);
|
|
|
|
// Exit code based on verdict
|
|
if (agg.verdict === 'BLOCK') process.exit(2);
|
|
if (agg.verdict === 'WARNING') process.exit(1);
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error(`Fatal error: ${err.message}`);
|
|
process.exit(1);
|
|
});
|