ktg-plugin-marketplace/plugins/llm-security/scanners/lib/severity.mjs
Kjell Tore Guttormsen 6f86de937a feat(llm-security)!: v7.0.0 commit 6 — tests, docs, version bump
Final commit in the trustworthy-scoring series. Bundles verdict cutoff
alignment, the last suite of tests, and all documentation touch-points
that quote version numbers or describe v7.0.0 behaviour.

Verdict/band co-monotonicity
- `scanners/lib/severity.mjs` — verdict cutoffs moved from 61/21 to 65/15
  so `BLOCK >= 65`, `WARNING >= 15` locks onto the v2 riskBand() boundaries.
  Prevents "BLOCK / Medium band" contradictions under the v2 formula.

Scanner hardening (bug fixes from v7.0.0 testing)
- `scanners/entropy-scanner.mjs` — `policy_source` now uses
  `existsSync('.llm-security/policy.json')` instead of value-based check.
  Old heuristic always reported 'policy.json' because DEFAULT_POLICY now
  carries an `entropy.thresholds` section.
- `scanners/lib/file-discovery.mjs` — `.sass` and GPU shader extensions
  (`.glsl, .frag, .vert, .shader, .wgsl`) added to TEXT_EXTENSIONS. Without
  this, shader files were invisible to file-discovery, so they were never
  counted as skipped by the entropy-scanner extension filter.

Tests
- `tests/scanners/entropy-context.test.mjs` (new, 24 tests) — A. File-ext
  skip (4), B. Line-level rules 11-17 (8), C. Policy overrides (3).
  Fixtures generate 80-char base64 payloads at runtime via
  `crypto.randomBytes` to dodge the plugin's own pre-edit credential hook
  on the test source.
- `tests/lib/severity.test.mjs` — rewritten with v2 scoring table (70
  tests total, was 52).
- `tests/lib/output.test.mjs:243` — "1 critical = score 80" under v2
  (was 25 under v1).
- Full suite: 1485/1485 green (was 1461).

Docs
- `CHANGELOG.md` — v7.0.0 entry with BREAKING CHANGES section.
- `README.md` (plugin + marketplace root) — version badge, history table,
  plugin-card version string, test count.
- `CLAUDE.md` — header version, "v7.0.0 — Trustworthy scoring" summary
  paragraph at the top.
- `docs/security-hardening-guide.md` — new section 6 "Calibration & false
  positives" documenting v2 formula, context-aware entropy scanner,
  typosquat allowlist, and §6.4 tuning workflow. Existing "Recommended
  baseline" section renumbered to §7.

Version bump
- `6.6.0 -> 7.0.0` across package.json, .claude-plugin/plugin.json,
  scanners/ide-extension-scanner.mjs VERSION const, README badge,
  CLAUDE.md header, marketplace root README card.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-19 22:26:35 +02:00

224 lines
7.2 KiB
JavaScript

// severity.mjs — Constants, risk score calculation, verdict logic
// Zero dependencies. Used by all scanners and the orchestrator.
export const SEVERITY = Object.freeze({
CRITICAL: 'critical',
HIGH: 'high',
MEDIUM: 'medium',
LOW: 'low',
INFO: 'info',
});
// Legacy weights — used only by riskScoreV1() for backwards-compat reference.
const SEVERITY_WEIGHTS_V1 = { critical: 25, high: 10, medium: 4, low: 1, info: 0 };
/**
* Calculate aggregate risk score from severity counts (v2 model — v7.0.0+).
*
* Severity-dominated, log-scaled within tier. Replaces the v1 sum-and-cap
* formula which collapsed every non-trivial scan to 100/Extreme regardless
* of actual risk distribution.
*
* Tiers:
* Critical present → 70-95 (1=80, 2=86, 4=90, 10=95)
* High only → 40-65 (1=48, 5=60, 17=65)
* Medium only → 15-35 (1=20, 5=28, 50=33)
* Low only → 1-11 (1=4, 10=11)
* None → 0
*
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {number} 0-100 risk score
*/
export function riskScore(counts) {
const critical = counts.critical || 0;
const high = counts.high || 0;
const medium = counts.medium || 0;
const low = counts.low || 0;
let base;
if (critical > 0) base = 70 + Math.min(25, Math.log2(critical + 1) * 10);
else if (high > 0) base = 40 + Math.min(25, Math.log2(high + 1) * 8);
else if (medium > 0) base = 15 + Math.min(20, Math.log2(medium + 1) * 5);
else if (low > 0) base = 1 + Math.min(10, Math.log2(low + 1) * 3);
else base = 0;
return Math.round(Math.min(100, base));
}
/**
* Legacy v1 risk score formula — kept for diff/comparison only.
* Not exported in production paths; reference for CI re-calibration.
*
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {number} 0-100 capped score (sum-and-cap model)
*/
export function riskScoreV1(counts) {
const raw =
(counts.critical || 0) * SEVERITY_WEIGHTS_V1.critical +
(counts.high || 0) * SEVERITY_WEIGHTS_V1.high +
(counts.medium || 0) * SEVERITY_WEIGHTS_V1.medium +
(counts.low || 0) * SEVERITY_WEIGHTS_V1.low +
(counts.info || 0) * SEVERITY_WEIGHTS_V1.info;
return Math.min(raw, 100);
}
/**
* Derive verdict from severity counts and risk score (v7.0.0 thresholds).
* Aligned to v2 riskBand cutoffs so verdict and band are co-monotonic:
* BLOCK if critical >= 1 OR score >= 65 (Critical/Extreme band)
* WARNING if high >= 1 OR score >= 15 (Medium/High band)
* ALLOW otherwise (Low band)
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {'BLOCK' | 'WARNING' | 'ALLOW'}
*/
export function verdict(counts) {
const score = riskScore(counts);
if ((counts.critical || 0) >= 1 || score >= 65) return 'BLOCK';
if ((counts.high || 0) >= 1 || score >= 15) return 'WARNING';
return 'ALLOW';
}
/**
* Map a 0-100 risk score to a human-readable risk band.
* Cutoffs aligned to v2 riskScore() tier structure (v7.0.0+):
* 0-14 Low (no findings, or low-tier only)
* 15-39 Medium (medium-tier dominant)
* 40-64 High (high-tier dominant)
* 65-84 Critical (1 critical, or many high)
* 85-100 Extreme (multiple critical findings)
*
* @param {number} score - 0-100 risk score
* @returns {'Low' | 'Medium' | 'High' | 'Critical' | 'Extreme'}
*/
export function riskBand(score) {
if (score <= 14) return 'Low';
if (score <= 39) return 'Medium';
if (score <= 64) return 'High';
if (score <= 84) return 'Critical';
return 'Extreme';
}
/**
* Calculate A-F grade from posture/audit pass rate.
* @param {number} passRate - 0.0 to 1.0
* @param {number} failsInCritCats - Number of FAIL results in critical categories (1, 2, 5)
* @param {number} critCount - Number of Critical-severity findings
* @returns {'A' | 'B' | 'C' | 'D' | 'F'}
*/
export function gradeFromPassRate(passRate, failsInCritCats = 0, critCount = 0) {
if (passRate < 0.33 || critCount >= 3) return 'F';
if (passRate >= 0.89 && failsInCritCats === 0 && critCount === 0) return 'A';
if (passRate >= 0.72 && critCount === 0) return 'B';
if (passRate >= 0.56) return 'C';
if (passRate >= 0.33) return 'D';
return 'F';
}
/**
* Scanner prefix to OWASP LLM Top 10 category mapping.
*/
export const OWASP_MAP = Object.freeze({
UNI: ['LLM01'],
ENT: ['LLM01', 'LLM03'],
PRM: ['LLM06'],
DEP: ['LLM03'],
TNT: ['LLM01', 'LLM02'],
GIT: ['LLM03'],
NET: ['LLM02', 'LLM03'],
TFA: ['LLM01', 'LLM02', 'LLM06'],
MCI: ['LLM01', 'LLM02'],
MEM: ['LLM01'],
SCR: ['LLM03'],
PST: ['LLM01', 'LLM06'],
});
/**
* Scanner prefix to OWASP Agentic AI Top 10 (ASI) category mapping.
*/
export const OWASP_AGENTIC_MAP = Object.freeze({
UNI: ['ASI01'],
ENT: ['ASI01', 'ASI04'],
PRM: ['ASI02', 'ASI03'],
DEP: ['ASI04'],
TNT: ['ASI01', 'ASI05'],
GIT: ['ASI04'],
NET: ['ASI02', 'ASI05'],
TFA: ['ASI01', 'ASI02', 'ASI05'],
MCI: ['ASI01', 'ASI04'],
MEM: ['ASI01', 'ASI02'],
SCR: ['ASI04'],
PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'],
});
/**
* Scanner prefix to OWASP Skills Top 10 (AST) category mapping.
*/
export const OWASP_SKILLS_MAP = Object.freeze({
UNI: ['AST05'],
ENT: ['AST02', 'AST05'],
PRM: ['AST03'],
DEP: ['AST06'],
TNT: ['AST01', 'AST02'],
GIT: ['AST06'],
NET: ['AST02'],
TFA: ['AST01', 'AST02', 'AST03'],
MCI: ['AST01', 'AST02'],
MEM: ['AST01', 'AST05'],
SCR: ['AST06'],
PST: ['AST01', 'AST03'],
});
/**
* Scanner prefix to OWASP MCP Top 10 category mapping.
*/
export const OWASP_MCP_MAP = Object.freeze({
UNI: ['MCP06'],
ENT: ['MCP01', 'MCP06'],
PRM: ['MCP02', 'MCP07'],
DEP: ['MCP04'],
TNT: ['MCP05', 'MCP06'],
GIT: ['MCP04'],
NET: ['MCP02', 'MCP10'],
TFA: ['MCP03', 'MCP06'],
MCI: ['MCP03', 'MCP06', 'MCP09'],
MEM: ['MCP05', 'MCP06'],
SCR: ['MCP04'],
PST: ['MCP02', 'MCP07'],
});
/**
* Regex matching all supported OWASP framework prefixes:
* LLM01-LLM10, ASI01-ASI10, AST01-AST10, MCP01-MCP10 (MCP1-MCP10 also accepted).
*/
const OWASP_PREFIX_RE = /(?:LLM|ASI|AST|MCP)\d{1,2}/g;
/**
* Group findings by OWASP category across all frameworks.
* Uses each finding's `owasp` field if present, otherwise falls back to OWASP_MAP by scanner prefix.
* Recognizes LLM, ASI, AST, and MCP prefixes.
* @param {object[]} findings - Array of finding objects with scanner, owasp, and severity fields
* @returns {Record<string, { count: number, critical: number, high: number, medium: number, low: number, info: number }>}
*/
export function owaspCategorize(findings) {
const cats = {};
for (const f of findings) {
const categories = [];
if (f.owasp) {
const match = f.owasp.match(OWASP_PREFIX_RE);
if (match) categories.push(...match);
}
if (categories.length === 0 && f.scanner && OWASP_MAP[f.scanner]) {
categories.push(...OWASP_MAP[f.scanner]);
}
if (categories.length === 0) categories.push('Unmapped');
for (const cat of categories) {
if (!cats[cat]) cats[cat] = { count: 0, critical: 0, high: 0, medium: 0, low: 0, info: 0 };
cats[cat].count++;
if (f.severity && cats[cat][f.severity] !== undefined) {
cats[cat][f.severity]++;
}
}
}
return cats;
}