feat(llm-security): v7.0.0 commit 3 — policy-driven entropy thresholds
Adds entropy section to DEFAULT_POLICY and wires it into entropy-scanner.
Users can now tune false-positive tradeoffs without forking the scanner.
Policy shape (.llm-security/policy.json):
entropy:
thresholds.{critical,high,medium}.{entropy,minLen} — numeric overrides
suppress_extensions[] — additive ext skip
suppress_line_patterns[] — additional regex
suppress_paths[] — relPath substrings
Wiring: entropy-scanner calls loadPolicy(targetPath) at scan entry (not
orchestrator-passed — avoids signature churn across 10 scanners). Module-
level state is reset per scan invocation. Scanner envelope now includes
calibration.{policy_source, thresholds, files_skipped_by_*} for
synthesizer transparency (Commit 5).
Malformed user regex silently skipped. Missing policy.json → built-in
defaults (backwards-compatible).
entropy.test.mjs: 9/9 still green.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e7f7df0fc8
commit
a9e377570c
2 changed files with 129 additions and 3 deletions
|
|
@ -14,6 +14,7 @@ import { readTextFile } from './lib/file-discovery.mjs';
|
|||
import { finding, scannerResult } from './lib/output.mjs';
|
||||
import { SEVERITY } from './lib/severity.mjs';
|
||||
import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
|
||||
import { loadPolicy } from './lib/policy-loader.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File-extension suppression (context-aware, v7.0.0+)
|
||||
|
|
@ -37,7 +38,23 @@ const ENTROPY_SKIP_EXTENSIONS = new Set([
|
|||
function shouldSkipByExtension(fileInfo) {
|
||||
const lowerPath = (fileInfo.relPath || '').toLowerCase();
|
||||
if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true;
|
||||
return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase());
|
||||
const ext = (fileInfo.ext || '').toLowerCase();
|
||||
if (ENTROPY_SKIP_EXTENSIONS.has(ext)) return true;
|
||||
if (USER_SUPPRESS_EXTENSIONS.has(ext)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{ relPath: string }} fileInfo
|
||||
* @returns {boolean} true if the file's relative path matches any user-policy skip-path substring.
|
||||
*/
|
||||
function shouldSkipByPath(fileInfo) {
|
||||
if (USER_SUPPRESS_PATHS.length === 0) return false;
|
||||
const rel = fileInfo.relPath || '';
|
||||
for (const needle of USER_SUPPRESS_PATHS) {
|
||||
if (typeof needle === 'string' && needle.length > 0 && rel.includes(needle)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -62,7 +79,7 @@ function shouldSkipByExtension(fileInfo) {
|
|||
* of some false positives that the analyst reviews. The false-positive suppression rules
|
||||
* above handle the most common benign cases.
|
||||
*/
|
||||
const THRESHOLDS = {
|
||||
const DEFAULT_THRESHOLDS = {
|
||||
// Large random-looking blob: very likely encoded/encrypted payload
|
||||
CRITICAL: { entropy: 5.4, minLen: 128 },
|
||||
// Medium-sized high-entropy string: likely encoded secret or payload fragment
|
||||
|
|
@ -71,6 +88,35 @@ const THRESHOLDS = {
|
|||
MEDIUM: { entropy: 4.7, minLen: 40 },
|
||||
};
|
||||
|
||||
/**
|
||||
* Merge policy.entropy.thresholds over defaults. Policy keys are lowercase
|
||||
* (critical/high/medium) to match other policy sections; defaults use uppercase
|
||||
* internally.
|
||||
*
|
||||
* @param {object|undefined} policyThresholds
|
||||
* @returns {typeof DEFAULT_THRESHOLDS}
|
||||
*/
|
||||
function resolveThresholds(policyThresholds) {
|
||||
if (!policyThresholds) return DEFAULT_THRESHOLDS;
|
||||
return {
|
||||
CRITICAL: { ...DEFAULT_THRESHOLDS.CRITICAL, ...(policyThresholds.critical || {}) },
|
||||
HIGH: { ...DEFAULT_THRESHOLDS.HIGH, ...(policyThresholds.high || {}) },
|
||||
MEDIUM: { ...DEFAULT_THRESHOLDS.MEDIUM, ...(policyThresholds.medium || {}) },
|
||||
};
|
||||
}
|
||||
|
||||
// Effective thresholds after policy-merge (set at scan() entry, read by classifyEntropy).
|
||||
let THRESHOLDS = DEFAULT_THRESHOLDS;
|
||||
|
||||
/** User-extensible line-level regex patterns compiled from policy. Set per scan. */
|
||||
let USER_SUPPRESS_LINE_PATTERNS = [];
|
||||
|
||||
/** User-extensible relative-path substrings to skip entirely. Set per scan. */
|
||||
let USER_SUPPRESS_PATHS = [];
|
||||
|
||||
/** User-extensible extension suppress list (merged with built-in). Set per scan. */
|
||||
let USER_SUPPRESS_EXTENSIONS = new Set();
|
||||
|
||||
/** Known hash/checksum filename patterns — false positive suppression. */
|
||||
const LOCK_FILE_PATTERN = /(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|\.lock)$/i;
|
||||
|
||||
|
|
@ -200,9 +246,33 @@ function isFalsePositive(str, line, absPath) {
|
|||
// 17. Error-message templates (throw new Error("<html>...</html>"))
|
||||
if (ERROR_TEMPLATE.test(line)) return true;
|
||||
|
||||
// 18. User-policy regex patterns from .llm-security/policy.json
|
||||
for (const pattern of USER_SUPPRESS_LINE_PATTERNS) {
|
||||
if (pattern.test(line)) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile a list of regex sources (strings) into RegExp objects.
|
||||
* Invalid patterns are silently skipped (policy is best-effort).
|
||||
*
|
||||
* @param {string[]} sources
|
||||
* @returns {RegExp[]}
|
||||
*/
|
||||
function compilePatterns(sources) {
|
||||
if (!Array.isArray(sources)) return [];
|
||||
const compiled = [];
|
||||
for (const src of sources) {
|
||||
if (typeof src !== 'string' || src.length === 0) continue;
|
||||
try {
|
||||
compiled.push(new RegExp(src));
|
||||
} catch { /* malformed regex — skip */ }
|
||||
}
|
||||
return compiled;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Severity classification
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -365,7 +435,37 @@ export async function scan(targetPath, discovery) {
|
|||
const allFindings = [];
|
||||
let filesScanned = 0;
|
||||
|
||||
// Load policy for this target and apply overrides to module-level state.
|
||||
// Best-effort — on any error we fall back to built-in defaults.
|
||||
let policySource = 'defaults';
|
||||
try {
|
||||
const policy = loadPolicy(targetPath);
|
||||
const ent = policy?.entropy || {};
|
||||
THRESHOLDS = resolveThresholds(ent.thresholds);
|
||||
USER_SUPPRESS_LINE_PATTERNS = compilePatterns(ent.suppress_line_patterns);
|
||||
USER_SUPPRESS_PATHS = Array.isArray(ent.suppress_paths) ? ent.suppress_paths.slice() : [];
|
||||
USER_SUPPRESS_EXTENSIONS = new Set(
|
||||
(Array.isArray(ent.suppress_extensions) ? ent.suppress_extensions : [])
|
||||
.filter((e) => typeof e === 'string')
|
||||
.map((e) => e.toLowerCase()),
|
||||
);
|
||||
if (
|
||||
ent.thresholds ||
|
||||
(ent.suppress_line_patterns && ent.suppress_line_patterns.length > 0) ||
|
||||
(ent.suppress_paths && ent.suppress_paths.length > 0) ||
|
||||
(ent.suppress_extensions && ent.suppress_extensions.length > 0)
|
||||
) {
|
||||
policySource = 'policy.json';
|
||||
}
|
||||
} catch {
|
||||
THRESHOLDS = DEFAULT_THRESHOLDS;
|
||||
USER_SUPPRESS_LINE_PATTERNS = [];
|
||||
USER_SUPPRESS_PATHS = [];
|
||||
USER_SUPPRESS_EXTENSIONS = new Set();
|
||||
}
|
||||
|
||||
let filesSkippedByExtension = 0;
|
||||
let filesSkippedByPath = 0;
|
||||
|
||||
try {
|
||||
for (const fileInfo of discovery.files) {
|
||||
|
|
@ -376,6 +476,12 @@ export async function scan(targetPath, discovery) {
|
|||
continue;
|
||||
}
|
||||
|
||||
// User-policy path-substring skip (additive, for project-specific noise).
|
||||
if (shouldSkipByPath(fileInfo)) {
|
||||
filesSkippedByPath++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = await readTextFile(fileInfo.absPath);
|
||||
|
||||
// readTextFile returns null for binary files or unreadable paths — skip silently
|
||||
|
|
@ -391,10 +497,17 @@ export async function scan(targetPath, discovery) {
|
|||
const status = 'ok';
|
||||
|
||||
const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
|
||||
// Calibration stats for synthesizer — how many files the ext-policy excluded.
|
||||
// Calibration stats for synthesizer — suppression & policy provenance.
|
||||
result.calibration = {
|
||||
files_skipped_by_extension: filesSkippedByExtension,
|
||||
files_skipped_by_path: filesSkippedByPath,
|
||||
skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'],
|
||||
policy_source: policySource,
|
||||
thresholds: {
|
||||
critical: { entropy: THRESHOLDS.CRITICAL.entropy, minLen: THRESHOLDS.CRITICAL.minLen },
|
||||
high: { entropy: THRESHOLDS.HIGH.entropy, minLen: THRESHOLDS.HIGH.minLen },
|
||||
medium: { entropy: THRESHOLDS.MEDIUM.entropy, minLen: THRESHOLDS.MEDIUM.minLen },
|
||||
},
|
||||
};
|
||||
return result;
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -50,6 +50,19 @@ const DEFAULT_POLICY = Object.freeze({
|
|||
failOn: null,
|
||||
compact: false,
|
||||
},
|
||||
entropy: {
|
||||
thresholds: {
|
||||
critical: { entropy: 5.4, minLen: 128 },
|
||||
high: { entropy: 5.1, minLen: 64 },
|
||||
medium: { entropy: 4.7, minLen: 40 },
|
||||
},
|
||||
// User-extensible extension skip list — merged with built-in defaults.
|
||||
suppress_extensions: [],
|
||||
// Additional line-level regex sources (string or array of strings compiled at load).
|
||||
suppress_line_patterns: [],
|
||||
// Substring matches against relative path — plain contains, no glob.
|
||||
suppress_paths: [],
|
||||
},
|
||||
});
|
||||
|
||||
// Cache loaded policy per project root
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue