From a9e377570c6a3cbc770e008ffb56f6b1019664d0 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sun, 19 Apr 2026 22:02:52 +0200 Subject: [PATCH] =?UTF-8?q?feat(llm-security):=20v7.0.0=20commit=203=20?= =?UTF-8?q?=E2=80=94=20policy-driven=20entropy=20thresholds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds entropy section to DEFAULT_POLICY and wires it into entropy-scanner. Users can now tune false-positive tradeoffs without forking the scanner. Policy shape (.llm-security/policy.json): entropy: thresholds.{critical,high,medium}.{entropy,minLen} — numeric overrides suppress_extensions[] — additive ext skip suppress_line_patterns[] — additional regex suppress_paths[] — relPath substrings Wiring: entropy-scanner calls loadPolicy(targetPath) at scan entry (not orchestrator-passed — avoids signature churn across 10 scanners). Module- level state is reset per scan invocation. Scanner envelope now includes calibration.{policy_source, thresholds, files_skipped_by_*} for synthesizer transparency (Commit 5). Malformed user regex silently skipped. Missing policy.json → built-in defaults (backwards-compatible). entropy.test.mjs: 9/9 still green. Co-Authored-By: Claude Opus 4.7 --- .../llm-security/scanners/entropy-scanner.mjs | 119 +++++++++++++++++- .../scanners/lib/policy-loader.mjs | 13 ++ 2 files changed, 129 insertions(+), 3 deletions(-) diff --git a/plugins/llm-security/scanners/entropy-scanner.mjs b/plugins/llm-security/scanners/entropy-scanner.mjs index d521135..36c4017 100644 --- a/plugins/llm-security/scanners/entropy-scanner.mjs +++ b/plugins/llm-security/scanners/entropy-scanner.mjs @@ -14,6 +14,7 @@ import { readTextFile } from './lib/file-discovery.mjs'; import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs'; +import { loadPolicy } from './lib/policy-loader.mjs'; // --------------------------------------------------------------------------- // File-extension suppression (context-aware, v7.0.0+) @@ -37,7 +38,23 @@ const ENTROPY_SKIP_EXTENSIONS = new Set([ function shouldSkipByExtension(fileInfo) { const lowerPath = (fileInfo.relPath || '').toLowerCase(); if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true; - return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase()); + const ext = (fileInfo.ext || '').toLowerCase(); + if (ENTROPY_SKIP_EXTENSIONS.has(ext)) return true; + if (USER_SUPPRESS_EXTENSIONS.has(ext)) return true; + return false; +} + +/** + * @param {{ relPath: string }} fileInfo + * @returns {boolean} true if the file's relative path matches any user-policy skip-path substring. + */ +function shouldSkipByPath(fileInfo) { + if (USER_SUPPRESS_PATHS.length === 0) return false; + const rel = fileInfo.relPath || ''; + for (const needle of USER_SUPPRESS_PATHS) { + if (typeof needle === 'string' && needle.length > 0 && rel.includes(needle)) return true; + } + return false; } // --------------------------------------------------------------------------- @@ -62,7 +79,7 @@ function shouldSkipByExtension(fileInfo) { * of some false positives that the analyst reviews. The false-positive suppression rules * above handle the most common benign cases. */ -const THRESHOLDS = { +const DEFAULT_THRESHOLDS = { // Large random-looking blob: very likely encoded/encrypted payload CRITICAL: { entropy: 5.4, minLen: 128 }, // Medium-sized high-entropy string: likely encoded secret or payload fragment @@ -71,6 +88,35 @@ const THRESHOLDS = { MEDIUM: { entropy: 4.7, minLen: 40 }, }; +/** + * Merge policy.entropy.thresholds over defaults. Policy keys are lowercase + * (critical/high/medium) to match other policy sections; defaults use uppercase + * internally. + * + * @param {object|undefined} policyThresholds + * @returns {typeof DEFAULT_THRESHOLDS} + */ +function resolveThresholds(policyThresholds) { + if (!policyThresholds) return DEFAULT_THRESHOLDS; + return { + CRITICAL: { ...DEFAULT_THRESHOLDS.CRITICAL, ...(policyThresholds.critical || {}) }, + HIGH: { ...DEFAULT_THRESHOLDS.HIGH, ...(policyThresholds.high || {}) }, + MEDIUM: { ...DEFAULT_THRESHOLDS.MEDIUM, ...(policyThresholds.medium || {}) }, + }; +} + +// Effective thresholds after policy-merge (set at scan() entry, read by classifyEntropy). +let THRESHOLDS = DEFAULT_THRESHOLDS; + +/** User-extensible line-level regex patterns compiled from policy. Set per scan. */ +let USER_SUPPRESS_LINE_PATTERNS = []; + +/** User-extensible relative-path substrings to skip entirely. Set per scan. */ +let USER_SUPPRESS_PATHS = []; + +/** User-extensible extension suppress list (merged with built-in). Set per scan. */ +let USER_SUPPRESS_EXTENSIONS = new Set(); + /** Known hash/checksum filename patterns — false positive suppression. */ const LOCK_FILE_PATTERN = /(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|\.lock)$/i; @@ -200,9 +246,33 @@ function isFalsePositive(str, line, absPath) { // 17. Error-message templates (throw new Error("...")) if (ERROR_TEMPLATE.test(line)) return true; + // 18. User-policy regex patterns from .llm-security/policy.json + for (const pattern of USER_SUPPRESS_LINE_PATTERNS) { + if (pattern.test(line)) return true; + } + return false; } +/** + * Compile a list of regex sources (strings) into RegExp objects. + * Invalid patterns are silently skipped (policy is best-effort). + * + * @param {string[]} sources + * @returns {RegExp[]} + */ +function compilePatterns(sources) { + if (!Array.isArray(sources)) return []; + const compiled = []; + for (const src of sources) { + if (typeof src !== 'string' || src.length === 0) continue; + try { + compiled.push(new RegExp(src)); + } catch { /* malformed regex — skip */ } + } + return compiled; +} + // --------------------------------------------------------------------------- // Severity classification // --------------------------------------------------------------------------- @@ -365,7 +435,37 @@ export async function scan(targetPath, discovery) { const allFindings = []; let filesScanned = 0; + // Load policy for this target and apply overrides to module-level state. + // Best-effort — on any error we fall back to built-in defaults. + let policySource = 'defaults'; + try { + const policy = loadPolicy(targetPath); + const ent = policy?.entropy || {}; + THRESHOLDS = resolveThresholds(ent.thresholds); + USER_SUPPRESS_LINE_PATTERNS = compilePatterns(ent.suppress_line_patterns); + USER_SUPPRESS_PATHS = Array.isArray(ent.suppress_paths) ? ent.suppress_paths.slice() : []; + USER_SUPPRESS_EXTENSIONS = new Set( + (Array.isArray(ent.suppress_extensions) ? ent.suppress_extensions : []) + .filter((e) => typeof e === 'string') + .map((e) => e.toLowerCase()), + ); + if ( + ent.thresholds || + (ent.suppress_line_patterns && ent.suppress_line_patterns.length > 0) || + (ent.suppress_paths && ent.suppress_paths.length > 0) || + (ent.suppress_extensions && ent.suppress_extensions.length > 0) + ) { + policySource = 'policy.json'; + } + } catch { + THRESHOLDS = DEFAULT_THRESHOLDS; + USER_SUPPRESS_LINE_PATTERNS = []; + USER_SUPPRESS_PATHS = []; + USER_SUPPRESS_EXTENSIONS = new Set(); + } + let filesSkippedByExtension = 0; + let filesSkippedByPath = 0; try { for (const fileInfo of discovery.files) { @@ -376,6 +476,12 @@ export async function scan(targetPath, discovery) { continue; } + // User-policy path-substring skip (additive, for project-specific noise). + if (shouldSkipByPath(fileInfo)) { + filesSkippedByPath++; + continue; + } + const content = await readTextFile(fileInfo.absPath); // readTextFile returns null for binary files or unreadable paths — skip silently @@ -391,10 +497,17 @@ export async function scan(targetPath, discovery) { const status = 'ok'; const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs); - // Calibration stats for synthesizer — how many files the ext-policy excluded. + // Calibration stats for synthesizer — suppression & policy provenance. result.calibration = { files_skipped_by_extension: filesSkippedByExtension, + files_skipped_by_path: filesSkippedByPath, skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'], + policy_source: policySource, + thresholds: { + critical: { entropy: THRESHOLDS.CRITICAL.entropy, minLen: THRESHOLDS.CRITICAL.minLen }, + high: { entropy: THRESHOLDS.HIGH.entropy, minLen: THRESHOLDS.HIGH.minLen }, + medium: { entropy: THRESHOLDS.MEDIUM.entropy, minLen: THRESHOLDS.MEDIUM.minLen }, + }, }; return result; } catch (err) { diff --git a/plugins/llm-security/scanners/lib/policy-loader.mjs b/plugins/llm-security/scanners/lib/policy-loader.mjs index 1cae6b1..56a8d93 100644 --- a/plugins/llm-security/scanners/lib/policy-loader.mjs +++ b/plugins/llm-security/scanners/lib/policy-loader.mjs @@ -50,6 +50,19 @@ const DEFAULT_POLICY = Object.freeze({ failOn: null, compact: false, }, + entropy: { + thresholds: { + critical: { entropy: 5.4, minLen: 128 }, + high: { entropy: 5.1, minLen: 64 }, + medium: { entropy: 4.7, minLen: 40 }, + }, + // User-extensible extension skip list — merged with built-in defaults. + suppress_extensions: [], + // Additional line-level regex sources (string or array of strings compiled at load). + suppress_line_patterns: [], + // Substring matches against relative path — plain contains, no glob. + suppress_paths: [], + }, }); // Cache loaded policy per project root