feat(scanner): add --benchmark mode to attack-simulator with structured reporting
This commit is contained in:
parent
e2c8924074
commit
0765a5595e
2 changed files with 164 additions and 0 deletions
|
|
@ -629,6 +629,58 @@ function formatAdaptiveJson(fixedResults, adaptiveResults, durationMs) {
|
|||
return base;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmark report formatting (v6.0)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function formatBenchmarkJson(fixedResults, adaptiveResults, durationMs) {
|
||||
const total = fixedResults.length;
|
||||
const blocked = fixedResults.filter(r => r.passed).length;
|
||||
const bypassed = total - blocked;
|
||||
const blockRate = total > 0 ? blocked / total : 0;
|
||||
|
||||
// Per-category breakdown
|
||||
const categories = {};
|
||||
for (const r of fixedResults) {
|
||||
if (!categories[r.category]) categories[r.category] = { scenarios: 0, blocked: 0, bypassed: 0, block_rate: 0 };
|
||||
categories[r.category].scenarios++;
|
||||
if (r.passed) categories[r.category].blocked++;
|
||||
else categories[r.category].bypassed++;
|
||||
}
|
||||
for (const cat of Object.values(categories)) {
|
||||
cat.block_rate = cat.scenarios > 0 ? cat.blocked / cat.scenarios : 0;
|
||||
}
|
||||
|
||||
// Adaptive stats
|
||||
const adaptiveBypasses = adaptiveResults.filter(r => r.bypassed).length;
|
||||
const adaptiveTotal = blocked * 5; // 5 mutation rounds per blocked scenario
|
||||
const adaptiveBlockRate = adaptiveTotal > 0 ? 1 - (adaptiveBypasses / adaptiveTotal) : 1;
|
||||
|
||||
return {
|
||||
meta: {
|
||||
timestamp: new Date().toISOString(),
|
||||
version: '6.0.0',
|
||||
node_version: process.version,
|
||||
scenarios_total: total,
|
||||
adaptive_rounds: 5,
|
||||
duration_ms: durationMs,
|
||||
mode: 'benchmark',
|
||||
},
|
||||
summary: {
|
||||
block_rate: Math.round(blockRate * 1000) / 1000,
|
||||
adaptive_block_rate: Math.round(adaptiveBlockRate * 1000) / 1000,
|
||||
total_blocked: blocked,
|
||||
total_bypassed: bypassed,
|
||||
adaptive_bypasses: adaptiveBypasses,
|
||||
},
|
||||
categories,
|
||||
methodology: `Data-driven simulation using ${total} scenarios across ${Object.keys(categories).length} categories. ` +
|
||||
'Fixed mode tests each scenario with original payloads. Adaptive mode applies 5 mutation rounds ' +
|
||||
'(homoglyph, encoding, zero-width, case alternation, synonym) to each blocked scenario. ' +
|
||||
'Block rate = blocked / total. Adaptive block rate = 1 - (adaptive_bypasses / (blocked * rounds)).',
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cleanup & CLI
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -650,6 +702,7 @@ async function main() {
|
|||
const jsonMode = args.includes('--json');
|
||||
const verbose = args.includes('--verbose');
|
||||
const adaptive = args.includes('--adaptive');
|
||||
const benchmark = args.includes('--benchmark');
|
||||
|
||||
const valid = ['secrets', 'destructive', 'supply-chain', 'prompt-injection',
|
||||
'pathguard', 'mcp-output', 'session-trifecta', 'hybrid',
|
||||
|
|
@ -662,6 +715,46 @@ async function main() {
|
|||
const scenarios = loadScenarios(category);
|
||||
if (!scenarios.length) { process.stderr.write('No scenarios found.\n'); process.exit(1); }
|
||||
|
||||
// Benchmark mode: run all scenarios in fixed + adaptive, produce structured report
|
||||
if (benchmark) {
|
||||
if (!jsonMode) process.stderr.write(`Benchmark: running ${scenarios.length} scenarios (fixed + adaptive)...\n`);
|
||||
const start = Date.now();
|
||||
cleanupSessionState();
|
||||
|
||||
// Fixed run
|
||||
const fixedResults = [];
|
||||
for (const s of scenarios) {
|
||||
const r = await runScenario(s);
|
||||
fixedResults.push(r);
|
||||
}
|
||||
|
||||
// Adaptive run on blocked scenarios
|
||||
const adaptiveResults = [];
|
||||
const blockedScenarios = scenarios.filter((_, i) => fixedResults[i].passed);
|
||||
if (blockedScenarios.length > 0) {
|
||||
const { adaptiveResults: ar } = await runAdaptive(blockedScenarios, false, true);
|
||||
adaptiveResults.push(...ar);
|
||||
}
|
||||
|
||||
cleanupSessionState();
|
||||
const dur = Date.now() - start;
|
||||
const report = formatBenchmarkJson(fixedResults, adaptiveResults, dur);
|
||||
|
||||
if (jsonMode) {
|
||||
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
|
||||
} else {
|
||||
// Human-readable summary to stderr, JSON to stdout
|
||||
const s = report.summary;
|
||||
process.stderr.write(`\nBenchmark Results:\n`);
|
||||
process.stderr.write(` Block rate (fixed): ${(s.block_rate * 100).toFixed(1)}% (${s.total_blocked}/${scenarios.length})\n`);
|
||||
process.stderr.write(` Block rate (adaptive): ${(s.adaptive_block_rate * 100).toFixed(1)}%\n`);
|
||||
process.stderr.write(` Adaptive bypasses: ${s.adaptive_bypasses}\n`);
|
||||
process.stderr.write(` Duration: ${dur}ms\n`);
|
||||
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (adaptive) {
|
||||
if (!jsonMode) process.stderr.write(`Running ${scenarios.length} attack scenarios in adaptive mode...\n`);
|
||||
const start = Date.now();
|
||||
|
|
@ -712,6 +805,8 @@ export {
|
|||
mutateHomoglyph, mutateEncoding, mutateZeroWidth, mutateCaseAlternation, mutateSynonym,
|
||||
MUTATION_FNS, applyMutationDeep, runAdaptiveMutations, loadMutationRules,
|
||||
formatAdaptiveReport, formatAdaptiveJson,
|
||||
// Benchmark export (v6.0)
|
||||
formatBenchmarkJson,
|
||||
};
|
||||
|
||||
const isDirectRun = process.argv[1] && resolve(process.argv[1]) === __filename;
|
||||
|
|
|
|||
69
plugins/llm-security/tests/scanners/benchmark.test.mjs
Normal file
69
plugins/llm-security/tests/scanners/benchmark.test.mjs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
// benchmark.test.mjs — Tests for --benchmark mode in attack-simulator.mjs
|
||||
// Verifies: flag parsing, report schema, block rates are valid numbers
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { resolve } from 'node:path';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { formatBenchmarkJson } from '../../scanners/attack-simulator.mjs';
|
||||
|
||||
const __dirname = fileURLToPath(new URL('.', import.meta.url));
|
||||
const SIMULATOR = resolve(__dirname, '../../scanners/attack-simulator.mjs');
|
||||
|
||||
function run(args) {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile('node', [SIMULATOR, ...args], { timeout: 120_000 }, (err, stdout, stderr) => {
|
||||
resolve({ err, stdout, stderr, code: err?.code ?? 0 });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe('attack-simulator --benchmark', () => {
|
||||
it('--benchmark --json produces valid JSON with required fields', async () => {
|
||||
const { stdout } = await run(['--benchmark', '--json']);
|
||||
const d = JSON.parse(stdout);
|
||||
|
||||
// Meta fields
|
||||
assert.ok(d.meta, 'Should have meta');
|
||||
assert.ok(d.meta.timestamp, 'Should have timestamp');
|
||||
assert.ok(d.meta.version, 'Should have version');
|
||||
assert.ok(d.meta.node_version, 'Should have node_version');
|
||||
assert.equal(typeof d.meta.scenarios_total, 'number');
|
||||
|
||||
// Summary fields
|
||||
assert.ok(d.summary, 'Should have summary');
|
||||
assert.equal(typeof d.summary.block_rate, 'number');
|
||||
assert.ok(d.summary.block_rate >= 0 && d.summary.block_rate <= 1, 'block_rate should be 0-1');
|
||||
assert.equal(typeof d.summary.adaptive_block_rate, 'number');
|
||||
assert.ok(d.summary.adaptive_block_rate >= 0 && d.summary.adaptive_block_rate <= 1);
|
||||
assert.equal(typeof d.summary.total_blocked, 'number');
|
||||
assert.equal(typeof d.summary.total_bypassed, 'number');
|
||||
|
||||
// Categories
|
||||
assert.ok(d.categories, 'Should have categories');
|
||||
assert.equal(typeof d.categories, 'object');
|
||||
|
||||
// Methodology
|
||||
assert.ok(d.methodology, 'Should have methodology string');
|
||||
assert.equal(typeof d.methodology, 'string');
|
||||
});
|
||||
|
||||
it('formatBenchmarkJson returns valid structure from mock data', () => {
|
||||
const mockFixed = [
|
||||
{ id: 'S01', name: 'test', category: 'secrets', passed: true, detail: '' },
|
||||
{ id: 'S02', name: 'test2', category: 'secrets', passed: false, detail: 'gap' },
|
||||
];
|
||||
const mockAdaptive = [
|
||||
{ scenarioId: 'S01', mutation: 'homoglyph', round: 2, bypassed: true },
|
||||
];
|
||||
const result = formatBenchmarkJson(mockFixed, mockAdaptive, 1000);
|
||||
|
||||
assert.equal(result.meta.mode, 'benchmark');
|
||||
assert.equal(result.summary.block_rate, 0.5);
|
||||
assert.equal(typeof result.summary.adaptive_block_rate, 'number');
|
||||
assert.ok(result.categories.secrets);
|
||||
assert.equal(result.categories.secrets.scenarios, 2);
|
||||
assert.equal(result.categories.secrets.blocked, 1);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue