feat(llm-security): v7.0.0 commit 3 — policy-driven entropy thresholds

Adds entropy section to DEFAULT_POLICY and wires it into entropy-scanner.
Users can now tune false-positive tradeoffs without forking the scanner.

Policy shape (.llm-security/policy.json):
  entropy:
    thresholds.{critical,high,medium}.{entropy,minLen}  — numeric overrides
    suppress_extensions[]                               — additive ext skip
    suppress_line_patterns[]                            — additional regex
    suppress_paths[]                                    — relPath substrings

Wiring: entropy-scanner calls loadPolicy(targetPath) at scan entry (not
orchestrator-passed — avoids signature churn across 10 scanners). Module-
level state is reset per scan invocation. Scanner envelope now includes
calibration.{policy_source, thresholds, files_skipped_by_*} for
synthesizer transparency (Commit 5).

Malformed user regex silently skipped. Missing policy.json → built-in
defaults (backwards-compatible).

entropy.test.mjs: 9/9 still green.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Kjell Tore Guttormsen 2026-04-19 22:02:52 +02:00
commit a9e377570c
2 changed files with 129 additions and 3 deletions

View file

@ -14,6 +14,7 @@ import { readTextFile } from './lib/file-discovery.mjs';
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
import { loadPolicy } from './lib/policy-loader.mjs';
// ---------------------------------------------------------------------------
// File-extension suppression (context-aware, v7.0.0+)
@ -37,7 +38,23 @@ const ENTROPY_SKIP_EXTENSIONS = new Set([
function shouldSkipByExtension(fileInfo) {
const lowerPath = (fileInfo.relPath || '').toLowerCase();
if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true;
return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase());
const ext = (fileInfo.ext || '').toLowerCase();
if (ENTROPY_SKIP_EXTENSIONS.has(ext)) return true;
if (USER_SUPPRESS_EXTENSIONS.has(ext)) return true;
return false;
}
/**
* @param {{ relPath: string }} fileInfo
* @returns {boolean} true if the file's relative path matches any user-policy skip-path substring.
*/
function shouldSkipByPath(fileInfo) {
if (USER_SUPPRESS_PATHS.length === 0) return false;
const rel = fileInfo.relPath || '';
for (const needle of USER_SUPPRESS_PATHS) {
if (typeof needle === 'string' && needle.length > 0 && rel.includes(needle)) return true;
}
return false;
}
// ---------------------------------------------------------------------------
@ -62,7 +79,7 @@ function shouldSkipByExtension(fileInfo) {
* of some false positives that the analyst reviews. The false-positive suppression rules
* above handle the most common benign cases.
*/
const THRESHOLDS = {
const DEFAULT_THRESHOLDS = {
// Large random-looking blob: very likely encoded/encrypted payload
CRITICAL: { entropy: 5.4, minLen: 128 },
// Medium-sized high-entropy string: likely encoded secret or payload fragment
@ -71,6 +88,35 @@ const THRESHOLDS = {
MEDIUM: { entropy: 4.7, minLen: 40 },
};
/**
* Merge policy.entropy.thresholds over defaults. Policy keys are lowercase
* (critical/high/medium) to match other policy sections; defaults use uppercase
* internally.
*
* @param {object|undefined} policyThresholds
* @returns {typeof DEFAULT_THRESHOLDS}
*/
function resolveThresholds(policyThresholds) {
if (!policyThresholds) return DEFAULT_THRESHOLDS;
return {
CRITICAL: { ...DEFAULT_THRESHOLDS.CRITICAL, ...(policyThresholds.critical || {}) },
HIGH: { ...DEFAULT_THRESHOLDS.HIGH, ...(policyThresholds.high || {}) },
MEDIUM: { ...DEFAULT_THRESHOLDS.MEDIUM, ...(policyThresholds.medium || {}) },
};
}
// Effective thresholds after policy-merge (set at scan() entry, read by classifyEntropy).
let THRESHOLDS = DEFAULT_THRESHOLDS;
/** User-extensible line-level regex patterns compiled from policy. Set per scan. */
let USER_SUPPRESS_LINE_PATTERNS = [];
/** User-extensible relative-path substrings to skip entirely. Set per scan. */
let USER_SUPPRESS_PATHS = [];
/** User-extensible extension suppress list (merged with built-in). Set per scan. */
let USER_SUPPRESS_EXTENSIONS = new Set();
/** Known hash/checksum filename patterns — false positive suppression. */
const LOCK_FILE_PATTERN = /(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|\.lock)$/i;
@ -200,9 +246,33 @@ function isFalsePositive(str, line, absPath) {
// 17. Error-message templates (throw new Error("<html>...</html>"))
if (ERROR_TEMPLATE.test(line)) return true;
// 18. User-policy regex patterns from .llm-security/policy.json
for (const pattern of USER_SUPPRESS_LINE_PATTERNS) {
if (pattern.test(line)) return true;
}
return false;
}
/**
* Compile a list of regex sources (strings) into RegExp objects.
* Invalid patterns are silently skipped (policy is best-effort).
*
* @param {string[]} sources
* @returns {RegExp[]}
*/
function compilePatterns(sources) {
if (!Array.isArray(sources)) return [];
const compiled = [];
for (const src of sources) {
if (typeof src !== 'string' || src.length === 0) continue;
try {
compiled.push(new RegExp(src));
} catch { /* malformed regex — skip */ }
}
return compiled;
}
// ---------------------------------------------------------------------------
// Severity classification
// ---------------------------------------------------------------------------
@ -365,7 +435,37 @@ export async function scan(targetPath, discovery) {
const allFindings = [];
let filesScanned = 0;
// Load policy for this target and apply overrides to module-level state.
// Best-effort — on any error we fall back to built-in defaults.
let policySource = 'defaults';
try {
const policy = loadPolicy(targetPath);
const ent = policy?.entropy || {};
THRESHOLDS = resolveThresholds(ent.thresholds);
USER_SUPPRESS_LINE_PATTERNS = compilePatterns(ent.suppress_line_patterns);
USER_SUPPRESS_PATHS = Array.isArray(ent.suppress_paths) ? ent.suppress_paths.slice() : [];
USER_SUPPRESS_EXTENSIONS = new Set(
(Array.isArray(ent.suppress_extensions) ? ent.suppress_extensions : [])
.filter((e) => typeof e === 'string')
.map((e) => e.toLowerCase()),
);
if (
ent.thresholds ||
(ent.suppress_line_patterns && ent.suppress_line_patterns.length > 0) ||
(ent.suppress_paths && ent.suppress_paths.length > 0) ||
(ent.suppress_extensions && ent.suppress_extensions.length > 0)
) {
policySource = 'policy.json';
}
} catch {
THRESHOLDS = DEFAULT_THRESHOLDS;
USER_SUPPRESS_LINE_PATTERNS = [];
USER_SUPPRESS_PATHS = [];
USER_SUPPRESS_EXTENSIONS = new Set();
}
let filesSkippedByExtension = 0;
let filesSkippedByPath = 0;
try {
for (const fileInfo of discovery.files) {
@ -376,6 +476,12 @@ export async function scan(targetPath, discovery) {
continue;
}
// User-policy path-substring skip (additive, for project-specific noise).
if (shouldSkipByPath(fileInfo)) {
filesSkippedByPath++;
continue;
}
const content = await readTextFile(fileInfo.absPath);
// readTextFile returns null for binary files or unreadable paths — skip silently
@ -391,10 +497,17 @@ export async function scan(targetPath, discovery) {
const status = 'ok';
const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
// Calibration stats for synthesizer — how many files the ext-policy excluded.
// Calibration stats for synthesizer — suppression & policy provenance.
result.calibration = {
files_skipped_by_extension: filesSkippedByExtension,
files_skipped_by_path: filesSkippedByPath,
skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'],
policy_source: policySource,
thresholds: {
critical: { entropy: THRESHOLDS.CRITICAL.entropy, minLen: THRESHOLDS.CRITICAL.minLen },
high: { entropy: THRESHOLDS.HIGH.entropy, minLen: THRESHOLDS.HIGH.minLen },
medium: { entropy: THRESHOLDS.MEDIUM.entropy, minLen: THRESHOLDS.MEDIUM.minLen },
},
};
return result;
} catch (err) {

View file

@ -50,6 +50,19 @@ const DEFAULT_POLICY = Object.freeze({
failOn: null,
compact: false,
},
entropy: {
thresholds: {
critical: { entropy: 5.4, minLen: 128 },
high: { entropy: 5.1, minLen: 64 },
medium: { entropy: 4.7, minLen: 40 },
},
// User-extensible extension skip list — merged with built-in defaults.
suppress_extensions: [],
// Additional line-level regex sources (string or array of strings compiled at load).
suppress_line_patterns: [],
// Substring matches against relative path — plain contains, no glob.
suppress_paths: [],
},
});
// Cache loaded policy per project root