feat(llm-security): v7.0.0 commit 3 — policy-driven entropy thresholds

Adds entropy section to DEFAULT_POLICY and wires it into entropy-scanner. Users can now tune false-positive tradeoffs without forking the scanner. Policy shape (.llm-security/policy.json): entropy: thresholds.{critical,high,medium}.{entropy,minLen} — numeric overrides suppress_extensions[] — additive ext skip suppress_line_patterns[] — additional regex suppress_paths[] — relPath substrings Wiring: entropy-scanner calls loadPolicy(targetPath) at scan entry (not orchestrator-passed — avoids signature churn across 10 scanners). Module- level state is reset per scan invocation. Scanner envelope now includes calibration.{policy_source, thresholds, files_skipped_by_*} for synthesizer transparency (Commit 5). Malformed user regex silently skipped. Missing policy.json → built-in defaults (backwards-compatible). entropy.test.mjs: 9/9 still green. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-19 22:02:52 +02:00 · 2026-04-19 22:02:52 +02:00 · a9e377570c
commit a9e377570c
parent e7f7df0fc8
2 changed files with 129 additions and 3 deletions
--- a/plugins/llm-security/scanners/entropy-scanner.mjs
+++ b/plugins/llm-security/scanners/entropy-scanner.mjs
@ -14,6 +14,7 @@ import { readTextFile } from './lib/file-discovery.mjs';
 import { finding, scannerResult } from './lib/output.mjs';
 import { SEVERITY } from './lib/severity.mjs';
 import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
+import { loadPolicy } from './lib/policy-loader.mjs';

 // ---------------------------------------------------------------------------
 // File-extension suppression (context-aware, v7.0.0+)
@ -37,7 +38,23 @@ const ENTROPY_SKIP_EXTENSIONS = new Set([
 function shouldSkipByExtension(fileInfo) {
  const lowerPath = (fileInfo.relPath || '').toLowerCase();
  if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true;
-  return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase());
+  const ext = (fileInfo.ext || '').toLowerCase();
+  if (ENTROPY_SKIP_EXTENSIONS.has(ext)) return true;
+  if (USER_SUPPRESS_EXTENSIONS.has(ext)) return true;
+  return false;
+}
+
+/**
+ * @param {{ relPath: string }} fileInfo
+ * @returns {boolean} true if the file's relative path matches any user-policy skip-path substring.
+ */
+function shouldSkipByPath(fileInfo) {
+  if (USER_SUPPRESS_PATHS.length === 0) return false;
+  const rel = fileInfo.relPath || '';
+  for (const needle of USER_SUPPRESS_PATHS) {
+    if (typeof needle === 'string' && needle.length > 0 && rel.includes(needle)) return true;
+  }
+  return false;
 }

 // ---------------------------------------------------------------------------
@ -62,7 +79,7 @@ function shouldSkipByExtension(fileInfo) {
 *  of some false positives that the analyst reviews. The false-positive suppression rules
 *  above handle the most common benign cases.
 */
-const THRESHOLDS = {
+const DEFAULT_THRESHOLDS = {
  // Large random-looking blob: very likely encoded/encrypted payload
  CRITICAL: { entropy: 5.4, minLen: 128 },
  // Medium-sized high-entropy string: likely encoded secret or payload fragment
@ -71,6 +88,35 @@ const THRESHOLDS = {
  MEDIUM:   { entropy: 4.7, minLen: 40 },
 };

+/**
+ * Merge policy.entropy.thresholds over defaults. Policy keys are lowercase
+ * (critical/high/medium) to match other policy sections; defaults use uppercase
+ * internally.
+ *
+ * @param {object|undefined} policyThresholds
+ * @returns {typeof DEFAULT_THRESHOLDS}
+ */
+function resolveThresholds(policyThresholds) {
+  if (!policyThresholds) return DEFAULT_THRESHOLDS;
+  return {
+    CRITICAL: { ...DEFAULT_THRESHOLDS.CRITICAL, ...(policyThresholds.critical || {}) },
+    HIGH:     { ...DEFAULT_THRESHOLDS.HIGH,     ...(policyThresholds.high     || {}) },
+    MEDIUM:   { ...DEFAULT_THRESHOLDS.MEDIUM,   ...(policyThresholds.medium   || {}) },
+  };
+}
+
+// Effective thresholds after policy-merge (set at scan() entry, read by classifyEntropy).
+let THRESHOLDS = DEFAULT_THRESHOLDS;
+
+/** User-extensible line-level regex patterns compiled from policy. Set per scan. */
+let USER_SUPPRESS_LINE_PATTERNS = [];
+
+/** User-extensible relative-path substrings to skip entirely. Set per scan. */
+let USER_SUPPRESS_PATHS = [];
+
+/** User-extensible extension suppress list (merged with built-in). Set per scan. */
+let USER_SUPPRESS_EXTENSIONS = new Set();
+
 /** Known hash/checksum filename patterns — false positive suppression. */
 const LOCK_FILE_PATTERN = /(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|\.lock)$/i;

@ -200,9 +246,33 @@ function isFalsePositive(str, line, absPath) {
  // 17. Error-message templates (throw new Error("<html>...</html>"))
  if (ERROR_TEMPLATE.test(line)) return true;

+  // 18. User-policy regex patterns from .llm-security/policy.json
+  for (const pattern of USER_SUPPRESS_LINE_PATTERNS) {
+    if (pattern.test(line)) return true;
+  }
+
  return false;
 }

+/**
+ * Compile a list of regex sources (strings) into RegExp objects.
+ * Invalid patterns are silently skipped (policy is best-effort).
+ *
+ * @param {string[]} sources
+ * @returns {RegExp[]}
+ */
+function compilePatterns(sources) {
+  if (!Array.isArray(sources)) return [];
+  const compiled = [];
+  for (const src of sources) {
+    if (typeof src !== 'string' || src.length === 0) continue;
+    try {
+      compiled.push(new RegExp(src));
+    } catch { /* malformed regex — skip */ }
+  }
+  return compiled;
+}
+
 // ---------------------------------------------------------------------------
 // Severity classification
 // ---------------------------------------------------------------------------
@ -365,7 +435,37 @@ export async function scan(targetPath, discovery) {
  const allFindings = [];
  let filesScanned = 0;

+  // Load policy for this target and apply overrides to module-level state.
+  // Best-effort — on any error we fall back to built-in defaults.
+  let policySource = 'defaults';
+  try {
+    const policy = loadPolicy(targetPath);
+    const ent = policy?.entropy || {};
+    THRESHOLDS = resolveThresholds(ent.thresholds);
+    USER_SUPPRESS_LINE_PATTERNS = compilePatterns(ent.suppress_line_patterns);
+    USER_SUPPRESS_PATHS = Array.isArray(ent.suppress_paths) ? ent.suppress_paths.slice() : [];
+    USER_SUPPRESS_EXTENSIONS = new Set(
+      (Array.isArray(ent.suppress_extensions) ? ent.suppress_extensions : [])
+        .filter((e) => typeof e === 'string')
+        .map((e) => e.toLowerCase()),
+    );
+    if (
+      ent.thresholds ||
+      (ent.suppress_line_patterns && ent.suppress_line_patterns.length > 0) ||
+      (ent.suppress_paths && ent.suppress_paths.length > 0) ||
+      (ent.suppress_extensions && ent.suppress_extensions.length > 0)
+    ) {
+      policySource = 'policy.json';
+    }
+  } catch {
+    THRESHOLDS = DEFAULT_THRESHOLDS;
+    USER_SUPPRESS_LINE_PATTERNS = [];
+    USER_SUPPRESS_PATHS = [];
+    USER_SUPPRESS_EXTENSIONS = new Set();
+  }
+
  let filesSkippedByExtension = 0;
+  let filesSkippedByPath = 0;

  try {
    for (const fileInfo of discovery.files) {
@ -376,6 +476,12 @@ export async function scan(targetPath, discovery) {
        continue;
      }

+      // User-policy path-substring skip (additive, for project-specific noise).
+      if (shouldSkipByPath(fileInfo)) {
+        filesSkippedByPath++;
+        continue;
+      }
+
      const content = await readTextFile(fileInfo.absPath);

      // readTextFile returns null for binary files or unreadable paths — skip silently
@ -391,10 +497,17 @@ export async function scan(targetPath, discovery) {
    const status = 'ok';

    const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
-    // Calibration stats for synthesizer — how many files the ext-policy excluded.
+    // Calibration stats for synthesizer — suppression & policy provenance.
    result.calibration = {
      files_skipped_by_extension: filesSkippedByExtension,
+      files_skipped_by_path: filesSkippedByPath,
      skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'],
+      policy_source: policySource,
+      thresholds: {
+        critical: { entropy: THRESHOLDS.CRITICAL.entropy, minLen: THRESHOLDS.CRITICAL.minLen },
+        high:     { entropy: THRESHOLDS.HIGH.entropy,     minLen: THRESHOLDS.HIGH.minLen     },
+        medium:   { entropy: THRESHOLDS.MEDIUM.entropy,   minLen: THRESHOLDS.MEDIUM.minLen   },
+      },
    };
    return result;
  } catch (err) {
--- a/plugins/llm-security/scanners/lib/policy-loader.mjs
+++ b/plugins/llm-security/scanners/lib/policy-loader.mjs
@ -50,6 +50,19 @@ const DEFAULT_POLICY = Object.freeze({
    failOn: null,
    compact: false,
  },
+  entropy: {
+    thresholds: {
+      critical: { entropy: 5.4, minLen: 128 },
+      high:     { entropy: 5.1, minLen: 64 },
+      medium:   { entropy: 4.7, minLen: 40 },
+    },
+    // User-extensible extension skip list — merged with built-in defaults.
+    suppress_extensions: [],
+    // Additional line-level regex sources (string or array of strings compiled at load).
+    suppress_line_patterns: [],
+    // Substring matches against relative path — plain contains, no glob.
+    suppress_paths: [],
+  },
 });

 // Cache loaded policy per project root