ktg-plugin-marketplace/plugins/llm-security/scanners/scan-orchestrator.mjs

279 lines
10 KiB
JavaScript

#!/usr/bin/env node
// scan-orchestrator.mjs — Entry point for deterministic deep-scan
// Single Node.js process. Imports all 7 scanners, runs them sequentially,
// shares file discovery, outputs JSON envelope to stdout.
// Zero external dependencies.
import { resolve, join, dirname } from 'node:path';
import { existsSync, readFileSync, writeFileSync, appendFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { tmpdir } from 'node:os';
import { discoverFiles } from './lib/file-discovery.mjs';
import { envelope, resetCounter } from './lib/output.mjs';
import { saveBaseline, diffAgainstBaseline, extractFindings } from './lib/diff-engine.mjs';
// ---------------------------------------------------------------------------
// .llm-security-ignore support
// Format: one rule per line. Blank lines and # comments ignored.
// SCANNER:glob — ignore findings from SCANNER matching file glob
// glob — ignore findings from ALL scanners matching file glob
// Globs use minimatch-style: * matches within path segment, ** across segments.
// ---------------------------------------------------------------------------
function loadIgnoreRules(targetPath) {
const ignoreFile = join(targetPath, '.llm-security-ignore');
if (!existsSync(ignoreFile)) return [];
const lines = readFileSync(ignoreFile, 'utf8').split('\n');
const rules = [];
for (const raw of lines) {
const line = raw.trim();
if (!line || line.startsWith('#')) continue;
const colonIdx = line.indexOf(':');
// Check if before colon is a known scanner prefix (3 uppercase letters)
if (colonIdx > 0 && colonIdx <= 3 && /^[A-Z]+$/.test(line.slice(0, colonIdx))) {
rules.push({ scanner: line.slice(0, colonIdx), pattern: line.slice(colonIdx + 1) });
} else {
rules.push({ scanner: null, pattern: line });
}
}
return rules;
}
function globToRegex(glob) {
let regex = '^';
let i = 0;
while (i < glob.length) {
const c = glob[i];
if (c === '*' && glob[i + 1] === '*') {
regex += '.*';
i += 2;
if (glob[i] === '/') i++; // skip trailing slash after **
} else if (c === '*') {
regex += '[^/]*';
i++;
} else if (c === '?') {
regex += '[^/]';
i++;
} else if ('.+^${}()|[]\\'.includes(c)) {
regex += '\\' + c;
i++;
} else {
regex += c;
i++;
}
}
regex += '$';
return new RegExp(regex);
}
function applyIgnoreRules(scannerResults, rules) {
if (rules.length === 0) return 0;
const compiled = rules.map(r => ({ scanner: r.scanner, regex: globToRegex(r.pattern) }));
let suppressed = 0;
for (const [name, result] of Object.entries(scannerResults)) {
const before = result.findings.length;
result.findings = result.findings.filter(f => {
const file = f.file || '';
const findingPrefix = f.scanner || name.toUpperCase().slice(0, 3);
for (const rule of compiled) {
if (rule.scanner && rule.scanner !== findingPrefix) continue;
if (rule.regex.test(file)) return false;
}
return true;
});
const removed = before - result.findings.length;
suppressed += removed;
// Recount severities
if (removed > 0) {
result.counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
for (const f of result.findings) {
result.counts[f.severity] = (result.counts[f.severity] || 0) + 1;
}
}
}
return suppressed;
}
// Import all scanners
import { scan as unicodeScan } from './unicode-scanner.mjs';
import { scan as entropyScan } from './entropy-scanner.mjs';
import { scan as permissionScan } from './permission-mapper.mjs';
import { scan as depScan } from './dep-auditor.mjs';
import { scan as taintScan } from './taint-tracer.mjs';
import { scan as gitScan } from './git-forensics.mjs';
import { scan as networkScan } from './network-mapper.mjs';
import { scan as memoryScan } from './memory-poisoning-scanner.mjs';
import { scan as supplyChainScan } from './supply-chain-recheck.mjs';
import { scan as tfaScan } from './toxic-flow-analyzer.mjs';
const SCANNERS = [
{ name: 'unicode', fn: unicodeScan },
{ name: 'entropy', fn: entropyScan },
{ name: 'permission', fn: permissionScan },
{ name: 'dep', fn: depScan },
{ name: 'taint', fn: taintScan },
{ name: 'git', fn: gitScan },
{ name: 'network', fn: networkScan },
{ name: 'memory', fn: memoryScan },
{ name: 'supply-chain', fn: supplyChainScan },
{ name: 'toxic-flow', fn: tfaScan, requiresPriorResults: true },
];
// ---------------------------------------------------------------------------
// CLI arg parsing — supports --log-file <path>
// ---------------------------------------------------------------------------
function parseArgs(argv) {
const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false };
for (let i = 2; i < argv.length; i++) {
if (argv[i] === '--log-file' && argv[i + 1]) {
args.logFile = argv[++i];
} else if (argv[i] === '--output-file' && argv[i + 1]) {
args.outputFile = argv[++i];
} else if (argv[i] === '--baseline') {
args.baseline = true;
} else if (argv[i] === '--save-baseline') {
args.saveBaseline = true;
} else if (!args.target) {
args.target = argv[i];
}
}
return args;
}
async function main() {
const args = parseArgs(process.argv);
if (!args.target) {
console.error('Usage: node scan-orchestrator.mjs <target-path> [--log-file <path>]');
process.exit(1);
}
const targetPath = resolve(args.target);
if (!existsSync(targetPath)) {
console.error(`Target path does not exist: ${targetPath}`);
process.exit(1);
}
// Set up cross-platform log file (writes to both stderr and file)
const logFilePath = args.logFile || join(tmpdir(), `llm-security-scan-${Date.now()}.log`);
writeFileSync(logFilePath, ''); // create/truncate
function log(msg) {
process.stderr.write(msg);
appendFileSync(logFilePath, msg);
}
const totalStart = Date.now();
// Shared file discovery — done once, passed to all scanners
let discovery;
try {
discovery = await discoverFiles(targetPath);
// Log discovery summary to stderr (stdout is reserved for JSON)
log(
`[deep-scan] Discovered ${discovery.files.length} files` +
` (${discovery.skipped} skipped${discovery.truncated ? ', TRUNCATED' : ''})\n`
);
} catch (err) {
console.error(`File discovery failed: ${err.message}`);
process.exit(1);
}
// Run each scanner sequentially, catching errors per-scanner.
// Scanners with requiresPriorResults receive accumulated results as 3rd arg.
const results = {};
for (const { name, fn, requiresPriorResults } of SCANNERS) {
resetCounter(); // Reset finding counter per scanner for clean IDs
log(`[deep-scan] Running ${name} scanner...\n`);
try {
results[name] = requiresPriorResults
? await fn(targetPath, discovery, results)
: await fn(targetPath, discovery);
const r = results[name];
log(
`[deep-scan] ${name}: ${r.status}${r.findings.length} findings in ${r.duration_ms}ms\n`
);
} catch (err) {
results[name] = {
scanner: `${name}-scanner`,
status: 'error',
files_scanned: 0,
duration_ms: 0,
findings: [],
counts: { critical: 0, high: 0, medium: 0, low: 0, info: 0 },
error: err.message,
};
log(`[deep-scan] ${name}: ERROR — ${err.message}\n`);
}
}
// Apply .llm-security-ignore rules
const ignoreRules = loadIgnoreRules(targetPath);
const suppressed = applyIgnoreRules(results, ignoreRules);
if (suppressed > 0) {
log(`[deep-scan] Suppressed ${suppressed} finding(s) via .llm-security-ignore\n`);
}
const totalDuration = Date.now() - totalStart;
const output = envelope(targetPath, results, totalDuration);
if (suppressed > 0) output.suppressed = suppressed;
// Include log file path in JSON output (cross-platform — no shell redirect needed)
output.log_file = logFilePath;
// ---------------------------------------------------------------------------
// Baseline diffing — compare against stored baseline and/or save new one
// ---------------------------------------------------------------------------
const pluginRoot = dirname(dirname(fileURLToPath(import.meta.url)));
const baselinesDir = join(pluginRoot, 'reports', 'baselines');
if (args.baseline) {
const diff = diffAgainstBaseline(baselinesDir, targetPath, output);
if (diff) {
output.diff = diff;
log(
`[deep-scan] Baseline diff: ${diff.summary.new} new, ${diff.summary.resolved} resolved, ` +
`${diff.summary.unchanged} unchanged, ${diff.summary.moved} moved ` +
`(baseline from ${diff.summary.baseline_timestamp})\n`
);
} else {
log(`[deep-scan] No baseline found for this target. Use --save-baseline to create one.\n`);
output.diff = null;
}
}
if (args.saveBaseline) {
const savedPath = saveBaseline(baselinesDir, targetPath, output);
output.baseline_saved = savedPath;
log(`[deep-scan] Baseline saved: ${savedPath}\n`);
}
// Output JSON: to file (--output-file) or stdout
const jsonStr = JSON.stringify(output, null, 2) + '\n';
if (args.outputFile) {
writeFileSync(args.outputFile, jsonStr);
output.output_file = args.outputFile;
// Stdout gets only the compact aggregate (keeps caller context small)
process.stdout.write(JSON.stringify({ aggregate: output.aggregate, output_file: args.outputFile }) + '\n');
} else {
process.stdout.write(jsonStr);
}
// Summary banner to stderr + log file
const agg = output.aggregate;
log(
`\n[deep-scan] === COMPLETE ===\n` +
`[deep-scan] Verdict: ${agg.verdict} | Risk Score: ${agg.risk_score}/100\n` +
`[deep-scan] Findings: ${agg.total_findings} total ` +
`(${agg.counts.critical}C ${agg.counts.high}H ${agg.counts.medium}M ${agg.counts.low}L ${agg.counts.info}I)\n` +
`[deep-scan] Scanners: ${agg.scanners_ok} ok, ${agg.scanners_error} error, ${agg.scanners_skipped} skipped\n` +
`[deep-scan] Duration: ${totalDuration}ms\n`
);
// Exit code based on verdict
if (agg.verdict === 'BLOCK') process.exit(2);
if (agg.verdict === 'WARNING') process.exit(1);
process.exit(0);
}
main().catch(err => {
console.error(`Fatal error: ${err.message}`);
process.exit(1);
});