From 2116e702df45a1bbc3b75ee97ff4f22681f2a758 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 10 Apr 2026 13:22:59 +0200 Subject: [PATCH] feat(scanner): add SARIF 2.1.0 output format to scan-orchestrator (--format sarif) New sarif-formatter.mjs converts scan envelope to OASIS SARIF 2.1.0 standard. Maps severity to SARIF levels, findings to results with locations and rules. scan-orchestrator accepts --format sarif|json (default: json). Co-Authored-By: Claude Opus 4.6 --- .../scanners/lib/sarif-formatter.mjs | 129 +++++++++++++ .../scanners/scan-orchestrator.mjs | 10 +- .../tests/scanners/sarif.test.mjs | 169 ++++++++++++++++++ 3 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 plugins/llm-security/scanners/lib/sarif-formatter.mjs create mode 100644 plugins/llm-security/tests/scanners/sarif.test.mjs diff --git a/plugins/llm-security/scanners/lib/sarif-formatter.mjs b/plugins/llm-security/scanners/lib/sarif-formatter.mjs new file mode 100644 index 0000000..3ba7292 --- /dev/null +++ b/plugins/llm-security/scanners/lib/sarif-formatter.mjs @@ -0,0 +1,129 @@ +// sarif-formatter.mjs — Converts scan-orchestrator envelope to SARIF 2.1.0 +// OASIS SARIF standard: https://docs.oasis-open.org/sarif/sarif/v2.1.0/ +// Zero external dependencies. + +const SARIF_SCHEMA = 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json'; +const SARIF_VERSION = '2.1.0'; +const TOOL_NAME = 'llm-security'; +const TOOL_URI = 'https://git.fromaitochitta.com/open/claude-code-llm-security'; + +/** + * Map finding severity to SARIF level. + * @param {string} severity - critical|high|medium|low|info + * @returns {string} SARIF level: error|warning|note + */ +function toLevel(severity) { + switch (severity) { + case 'critical': + case 'high': + return 'error'; + case 'medium': + return 'warning'; + case 'low': + case 'info': + default: + return 'note'; + } +} + +/** + * Build SARIF rules array from unique finding scanner+title combos. + * @param {object[]} findings + * @returns {{ rules: object[], ruleIndex: Map }} + */ +function buildRules(findings) { + const ruleIndex = new Map(); + const rules = []; + + for (const f of findings) { + const ruleId = `${f.scanner}/${f.title.replace(/\s+/g, '-').toLowerCase()}`; + if (!ruleIndex.has(ruleId)) { + ruleIndex.set(ruleId, rules.length); + rules.push({ + id: ruleId, + name: f.title, + shortDescription: { text: f.title }, + fullDescription: { text: f.description || f.title }, + defaultConfiguration: { level: toLevel(f.severity) }, + properties: { + tags: f.owasp ? [f.owasp] : [], + }, + }); + } + } + + return { rules, ruleIndex }; +} + +/** + * Convert scan-orchestrator envelope JSON to SARIF 2.1.0 format. + * @param {object} envelopeData - The full scan-orchestrator output + * @param {string} [version='6.0.0'] - Tool version + * @returns {object} SARIF 2.1.0 JSON + */ +export function toSARIF(envelopeData, version = '6.0.0') { + // Collect all findings from all scanners + const allFindings = []; + if (envelopeData.scanners) { + for (const scannerResult of Object.values(envelopeData.scanners)) { + if (scannerResult.findings) { + allFindings.push(...scannerResult.findings); + } + } + } + + const { rules, ruleIndex } = buildRules(allFindings); + + // Build SARIF results + const results = allFindings.map(f => { + const ruleId = `${f.scanner}/${f.title.replace(/\s+/g, '-').toLowerCase()}`; + const result = { + ruleId, + ruleIndex: ruleIndex.get(ruleId), + level: toLevel(f.severity), + message: { text: f.description || f.title }, + properties: {}, + }; + + // Add OWASP tags + if (f.owasp) { + result.properties.tags = [f.owasp]; + } + + // Add recommendation + if (f.recommendation) { + result.properties.recommendation = f.recommendation; + } + + // Add location if file is present + if (f.file) { + const location = { + physicalLocation: { + artifactLocation: { uri: f.file }, + }, + }; + if (f.line) { + location.physicalLocation.region = { startLine: f.line }; + } + result.locations = [location]; + } + + return result; + }); + + return { + $schema: SARIF_SCHEMA, + version: SARIF_VERSION, + runs: [{ + tool: { + driver: { + name: TOOL_NAME, + version, + informationUri: TOOL_URI, + rules, + }, + }, + results, + }], + }; +} diff --git a/plugins/llm-security/scanners/scan-orchestrator.mjs b/plugins/llm-security/scanners/scan-orchestrator.mjs index 0f3db50..8a1edc3 100644 --- a/plugins/llm-security/scanners/scan-orchestrator.mjs +++ b/plugins/llm-security/scanners/scan-orchestrator.mjs @@ -11,6 +11,7 @@ import { tmpdir } from 'node:os'; import { discoverFiles } from './lib/file-discovery.mjs'; import { envelope, resetCounter } from './lib/output.mjs'; import { saveBaseline, diffAgainstBaseline, extractFindings } from './lib/diff-engine.mjs'; +import { toSARIF } from './lib/sarif-formatter.mjs'; // --------------------------------------------------------------------------- // .llm-security-ignore support @@ -122,12 +123,14 @@ const SCANNERS = [ // CLI arg parsing — supports --log-file // --------------------------------------------------------------------------- function parseArgs(argv) { - const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false }; + const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false, format: 'json' }; for (let i = 2; i < argv.length; i++) { if (argv[i] === '--log-file' && argv[i + 1]) { args.logFile = argv[++i]; } else if (argv[i] === '--output-file' && argv[i + 1]) { args.outputFile = argv[++i]; + } else if (argv[i] === '--format' && argv[i + 1]) { + args.format = argv[++i]; } else if (argv[i] === '--baseline') { args.baseline = true; } else if (argv[i] === '--save-baseline') { @@ -245,8 +248,9 @@ async function main() { log(`[deep-scan] Baseline saved: ${savedPath}\n`); } - // Output JSON: to file (--output-file) or stdout - const jsonStr = JSON.stringify(output, null, 2) + '\n'; + // Output: SARIF or JSON, to file (--output-file) or stdout + const finalOutput = args.format === 'sarif' ? toSARIF(output) : output; + const jsonStr = JSON.stringify(finalOutput, null, 2) + '\n'; if (args.outputFile) { writeFileSync(args.outputFile, jsonStr); output.output_file = args.outputFile; diff --git a/plugins/llm-security/tests/scanners/sarif.test.mjs b/plugins/llm-security/tests/scanners/sarif.test.mjs new file mode 100644 index 0000000..6dac009 --- /dev/null +++ b/plugins/llm-security/tests/scanners/sarif.test.mjs @@ -0,0 +1,169 @@ +// sarif.test.mjs — Tests for SARIF 2.1.0 output formatter + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { toSARIF } from '../../scanners/lib/sarif-formatter.mjs'; + +// --------------------------------------------------------------------------- +// Fixture: minimal scan-orchestrator envelope +// --------------------------------------------------------------------------- + +const EMPTY_ENVELOPE = { + meta: { target: '/tmp/test', timestamp: '2026-04-10T12:00:00.000Z' }, + scanners: {}, + aggregate: { total_findings: 0, counts: { critical: 0, high: 0, medium: 0, low: 0, info: 0 } }, +}; + +const ENVELOPE_WITH_FINDINGS = { + meta: { target: '/tmp/test', timestamp: '2026-04-10T12:00:00.000Z' }, + scanners: { + unicode: { + scanner: 'unicode', + status: 'ok', + findings: [ + { + id: 'DS-UNI-001', + scanner: 'UNI', + severity: 'critical', + title: 'Invisible Unicode characters detected', + description: 'File contains zero-width joiners that may hide malicious intent.', + file: 'src/hook.mjs', + line: 42, + owasp: 'LLM01', + recommendation: 'Remove invisible characters.', + }, + { + id: 'DS-UNI-002', + scanner: 'UNI', + severity: 'medium', + title: 'Homoglyph characters detected', + description: 'Cyrillic characters mixed with Latin.', + file: 'src/config.mjs', + line: null, + owasp: 'LLM01', + recommendation: 'Replace with ASCII equivalents.', + }, + ], + counts: { critical: 1, high: 0, medium: 1, low: 0, info: 0 }, + }, + entropy: { + scanner: 'entropy', + status: 'ok', + findings: [ + { + id: 'DS-ENT-001', + scanner: 'ENT', + severity: 'high', + title: 'High-entropy string detected', + description: 'Possible hardcoded API key.', + file: '.env.example', + line: 5, + owasp: 'LLM03', + recommendation: 'Move secrets to environment variables.', + }, + ], + counts: { critical: 0, high: 1, medium: 0, low: 0, info: 0 }, + }, + }, + aggregate: { total_findings: 3 }, +}; + +// --------------------------------------------------------------------------- +// SARIF structure tests +// --------------------------------------------------------------------------- + +describe('sarif-formatter: structure', () => { + it('produces valid SARIF 2.1.0 shell', () => { + const sarif = toSARIF(EMPTY_ENVELOPE); + assert.equal(sarif.version, '2.1.0'); + assert.ok(sarif.$schema.includes('sarif-schema-2.1.0')); + assert.ok(Array.isArray(sarif.runs)); + assert.equal(sarif.runs.length, 1); + }); + + it('has tool driver info', () => { + const sarif = toSARIF(EMPTY_ENVELOPE); + const driver = sarif.runs[0].tool.driver; + assert.equal(driver.name, 'llm-security'); + assert.ok(driver.version); + assert.ok(driver.informationUri); + }); + + it('empty findings produce empty results array', () => { + const sarif = toSARIF(EMPTY_ENVELOPE); + assert.deepEqual(sarif.runs[0].results, []); + assert.deepEqual(sarif.runs[0].tool.driver.rules, []); + }); +}); + +// --------------------------------------------------------------------------- +// Findings conversion tests +// --------------------------------------------------------------------------- + +describe('sarif-formatter: findings', () => { + it('converts all findings to results', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + assert.equal(sarif.runs[0].results.length, 3); + }); + + it('maps critical/high severity to error level', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const critResult = sarif.runs[0].results[0]; // critical + const highResult = sarif.runs[0].results[2]; // high + assert.equal(critResult.level, 'error'); + assert.equal(highResult.level, 'error'); + }); + + it('maps medium severity to warning level', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const medResult = sarif.runs[0].results[1]; // medium + assert.equal(medResult.level, 'warning'); + }); + + it('includes file as artifact location URI', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const result = sarif.runs[0].results[0]; + assert.equal(result.locations[0].physicalLocation.artifactLocation.uri, 'src/hook.mjs'); + }); + + it('includes line number as region startLine', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const result = sarif.runs[0].results[0]; + assert.equal(result.locations[0].physicalLocation.region.startLine, 42); + }); + + it('omits region when line is null', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const result = sarif.runs[0].results[1]; // line: null + assert.ok(result.locations[0].physicalLocation.artifactLocation); + assert.equal(result.locations[0].physicalLocation.region, undefined); + }); + + it('includes OWASP tags in properties', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const result = sarif.runs[0].results[0]; + assert.deepEqual(result.properties.tags, ['LLM01']); + }); + + it('generates unique rules from findings', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + const rules = sarif.runs[0].tool.driver.rules; + assert.equal(rules.length, 3); // 3 unique title+scanner combos + assert.ok(rules[0].id.startsWith('UNI/')); + assert.ok(rules[2].id.startsWith('ENT/')); + }); + + it('results reference correct rule index', () => { + const sarif = toSARIF(ENVELOPE_WITH_FINDINGS); + for (const result of sarif.runs[0].results) { + assert.ok(typeof result.ruleIndex === 'number'); + assert.ok(result.ruleIndex >= 0); + assert.ok(result.ruleIndex < sarif.runs[0].tool.driver.rules.length); + } + }); + + it('accepts custom version', () => { + const sarif = toSARIF(EMPTY_ENVELOPE, '7.0.0'); + assert.equal(sarif.runs[0].tool.driver.version, '7.0.0'); + }); +});