ktg-plugin-marketplace/plugins/llm-security/scanners/lib/severity.mjs
Kjell Tore Guttormsen 4aa5318bcb fix(llm-security): A2 batch — JSDoc arithmetic + co-monotonicity test + CaMeL nedton
Closes A2 of v7.1.0 critical-review patch (docs/critical-review-2026-04-20.md):

- B4 (severity JSDoc): 4 critical = 93, not 90. Fixed in scanners/lib/severity.mjs:23
  and CHANGELOG.md v7.0.0 tier description. The actual computation has always been
  93 (70 + log2(5)*10 = 93.22 → round); only the docs were wrong.

- §5.4 co-monotonicity: new sweep test in tests/lib/severity.test.mjs over 15
  representative count vectors. Asserts that (verdict, riskBand) agree under the
  v7.0.0 contract for every case — catches future drift between riskScore tiers,
  verdict cutoffs, and riskBand cutoffs. Includes a B4 anchor test (riskScore
  {critical: 4} === 93) so doc/code drift fails loudly.

- B8 (CaMeL claims toned down): post-session-guard.mjs:646 comment block and
  CLAUDE.md:184 Defense Philosophy bullet now describe the implementation
  honestly — opportunistic byte-matching of truncated output fingerprints
  (first 200 bytes, SHA-256/16-hex), not semantic data-flow tracking.
  Trivially bypassed by mutation, summarisation, or re-encoding. Inspired by
  CaMeL (DeepMind 2025), but not a CaMeL capability-tracking implementation.

Tests: 1495 → 1511 (+16: 15 sweep cases + 1 B4 anchor). All green.
2026-04-29 11:49:08 +02:00

224 lines
7.2 KiB
JavaScript

// severity.mjs — Constants, risk score calculation, verdict logic
// Zero dependencies. Used by all scanners and the orchestrator.
export const SEVERITY = Object.freeze({
CRITICAL: 'critical',
HIGH: 'high',
MEDIUM: 'medium',
LOW: 'low',
INFO: 'info',
});
// Legacy weights — used only by riskScoreV1() for backwards-compat reference.
const SEVERITY_WEIGHTS_V1 = { critical: 25, high: 10, medium: 4, low: 1, info: 0 };
/**
* Calculate aggregate risk score from severity counts (v2 model — v7.0.0+).
*
* Severity-dominated, log-scaled within tier. Replaces the v1 sum-and-cap
* formula which collapsed every non-trivial scan to 100/Extreme regardless
* of actual risk distribution.
*
* Tiers:
* Critical present → 70-95 (1=80, 2=86, 4=93, 10=95)
* High only → 40-65 (1=48, 5=60, 17=65)
* Medium only → 15-35 (1=20, 5=28, 50=33)
* Low only → 1-11 (1=4, 10=11)
* None → 0
*
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {number} 0-100 risk score
*/
export function riskScore(counts) {
const critical = counts.critical || 0;
const high = counts.high || 0;
const medium = counts.medium || 0;
const low = counts.low || 0;
let base;
if (critical > 0) base = 70 + Math.min(25, Math.log2(critical + 1) * 10);
else if (high > 0) base = 40 + Math.min(25, Math.log2(high + 1) * 8);
else if (medium > 0) base = 15 + Math.min(20, Math.log2(medium + 1) * 5);
else if (low > 0) base = 1 + Math.min(10, Math.log2(low + 1) * 3);
else base = 0;
return Math.round(Math.min(100, base));
}
/**
* Legacy v1 risk score formula — kept for diff/comparison only.
* Not exported in production paths; reference for CI re-calibration.
*
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {number} 0-100 capped score (sum-and-cap model)
*/
export function riskScoreV1(counts) {
const raw =
(counts.critical || 0) * SEVERITY_WEIGHTS_V1.critical +
(counts.high || 0) * SEVERITY_WEIGHTS_V1.high +
(counts.medium || 0) * SEVERITY_WEIGHTS_V1.medium +
(counts.low || 0) * SEVERITY_WEIGHTS_V1.low +
(counts.info || 0) * SEVERITY_WEIGHTS_V1.info;
return Math.min(raw, 100);
}
/**
* Derive verdict from severity counts and risk score (v7.0.0 thresholds).
* Aligned to v2 riskBand cutoffs so verdict and band are co-monotonic:
* BLOCK if critical >= 1 OR score >= 65 (Critical/Extreme band)
* WARNING if high >= 1 OR score >= 15 (Medium/High band)
* ALLOW otherwise (Low band)
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {'BLOCK' | 'WARNING' | 'ALLOW'}
*/
export function verdict(counts) {
const score = riskScore(counts);
if ((counts.critical || 0) >= 1 || score >= 65) return 'BLOCK';
if ((counts.high || 0) >= 1 || score >= 15) return 'WARNING';
return 'ALLOW';
}
/**
* Map a 0-100 risk score to a human-readable risk band.
* Cutoffs aligned to v2 riskScore() tier structure (v7.0.0+):
* 0-14 Low (no findings, or low-tier only)
* 15-39 Medium (medium-tier dominant)
* 40-64 High (high-tier dominant)
* 65-84 Critical (1 critical, or many high)
* 85-100 Extreme (multiple critical findings)
*
* @param {number} score - 0-100 risk score
* @returns {'Low' | 'Medium' | 'High' | 'Critical' | 'Extreme'}
*/
export function riskBand(score) {
if (score <= 14) return 'Low';
if (score <= 39) return 'Medium';
if (score <= 64) return 'High';
if (score <= 84) return 'Critical';
return 'Extreme';
}
/**
* Calculate A-F grade from posture/audit pass rate.
* @param {number} passRate - 0.0 to 1.0
* @param {number} failsInCritCats - Number of FAIL results in critical categories (1, 2, 5)
* @param {number} critCount - Number of Critical-severity findings
* @returns {'A' | 'B' | 'C' | 'D' | 'F'}
*/
export function gradeFromPassRate(passRate, failsInCritCats = 0, critCount = 0) {
if (passRate < 0.33 || critCount >= 3) return 'F';
if (passRate >= 0.89 && failsInCritCats === 0 && critCount === 0) return 'A';
if (passRate >= 0.72 && critCount === 0) return 'B';
if (passRate >= 0.56) return 'C';
if (passRate >= 0.33) return 'D';
return 'F';
}
/**
* Scanner prefix to OWASP LLM Top 10 category mapping.
*/
export const OWASP_MAP = Object.freeze({
UNI: ['LLM01'],
ENT: ['LLM01', 'LLM03'],
PRM: ['LLM06'],
DEP: ['LLM03'],
TNT: ['LLM01', 'LLM02'],
GIT: ['LLM03'],
NET: ['LLM02', 'LLM03'],
TFA: ['LLM01', 'LLM02', 'LLM06'],
MCI: ['LLM01', 'LLM02'],
MEM: ['LLM01'],
SCR: ['LLM03'],
PST: ['LLM01', 'LLM06'],
});
/**
* Scanner prefix to OWASP Agentic AI Top 10 (ASI) category mapping.
*/
export const OWASP_AGENTIC_MAP = Object.freeze({
UNI: ['ASI01'],
ENT: ['ASI01', 'ASI04'],
PRM: ['ASI02', 'ASI03'],
DEP: ['ASI04'],
TNT: ['ASI01', 'ASI05'],
GIT: ['ASI04'],
NET: ['ASI02', 'ASI05'],
TFA: ['ASI01', 'ASI02', 'ASI05'],
MCI: ['ASI01', 'ASI04'],
MEM: ['ASI01', 'ASI02'],
SCR: ['ASI04'],
PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'],
});
/**
* Scanner prefix to OWASP Skills Top 10 (AST) category mapping.
*/
export const OWASP_SKILLS_MAP = Object.freeze({
UNI: ['AST05'],
ENT: ['AST02', 'AST05'],
PRM: ['AST03'],
DEP: ['AST06'],
TNT: ['AST01', 'AST02'],
GIT: ['AST06'],
NET: ['AST02'],
TFA: ['AST01', 'AST02', 'AST03'],
MCI: ['AST01', 'AST02'],
MEM: ['AST01', 'AST05'],
SCR: ['AST06'],
PST: ['AST01', 'AST03'],
});
/**
* Scanner prefix to OWASP MCP Top 10 category mapping.
*/
export const OWASP_MCP_MAP = Object.freeze({
UNI: ['MCP06'],
ENT: ['MCP01', 'MCP06'],
PRM: ['MCP02', 'MCP07'],
DEP: ['MCP04'],
TNT: ['MCP05', 'MCP06'],
GIT: ['MCP04'],
NET: ['MCP02', 'MCP10'],
TFA: ['MCP03', 'MCP06'],
MCI: ['MCP03', 'MCP06', 'MCP09'],
MEM: ['MCP05', 'MCP06'],
SCR: ['MCP04'],
PST: ['MCP02', 'MCP07'],
});
/**
* Regex matching all supported OWASP framework prefixes:
* LLM01-LLM10, ASI01-ASI10, AST01-AST10, MCP01-MCP10 (MCP1-MCP10 also accepted).
*/
const OWASP_PREFIX_RE = /(?:LLM|ASI|AST|MCP)\d{1,2}/g;
/**
* Group findings by OWASP category across all frameworks.
* Uses each finding's `owasp` field if present, otherwise falls back to OWASP_MAP by scanner prefix.
* Recognizes LLM, ASI, AST, and MCP prefixes.
* @param {object[]} findings - Array of finding objects with scanner, owasp, and severity fields
* @returns {Record<string, { count: number, critical: number, high: number, medium: number, low: number, info: number }>}
*/
export function owaspCategorize(findings) {
const cats = {};
for (const f of findings) {
const categories = [];
if (f.owasp) {
const match = f.owasp.match(OWASP_PREFIX_RE);
if (match) categories.push(...match);
}
if (categories.length === 0 && f.scanner && OWASP_MAP[f.scanner]) {
categories.push(...OWASP_MAP[f.scanner]);
}
if (categories.length === 0) categories.push('Unmapped');
for (const cat of categories) {
if (!cats[cat]) cats[cat] = { count: 0, critical: 0, high: 0, medium: 0, low: 0, info: 0 };
cats[cat].count++;
if (f.severity && cats[cat][f.severity] !== undefined) {
cats[cat][f.severity]++;
}
}
}
return cats;
}