feat(llm-security)!: v7.0.0 commit 2 — context-aware entropy scanner
Observed 70% false-positive rate on renderer/shader codebases (hyperframes): GLSL, CSS-in-JS, inline HTML/SVG, ffmpeg filter-strings, hardcoded User-Agent strings all matched base64-like entropy thresholds. This commit adds two suppression layers before classification. Layer A — file-extension skip: .glsl/.frag/.vert/.shader/.wgsl (shaders), .css/.scss/.sass/.less (stylesheets), .svg (markup), .min.js/.min.css (minified bundles). Tracked via new calibration.files_skipped_by_extension field on scanner envelope for synthesizer stats. Layer B — seven new line-level suppression rules in isFalsePositive() (rules 11-17): GLSL/WGSL keywords, CSS-in-JS (styled/emotion/@keyframes), inline HTML/SVG markup, ffmpeg filter-graph syntax, browser User-Agent, SQL DDL/DML, error-message templates with embedded HTML. Existing entropy.test.mjs: 9/9 still green — known bad base64 payload in telemetry.mjs fixture still detected. Policy-driven thresholds wired in Commit 3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
d83424a782
commit
e7f7df0fc8
1 changed files with 83 additions and 1 deletions
|
|
@ -15,6 +15,31 @@ import { finding, scannerResult } from './lib/output.mjs';
|
|||
import { SEVERITY } from './lib/severity.mjs';
|
||||
import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File-extension suppression (context-aware, v7.0.0+)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extensions whose contents are almost always benign high-entropy noise:
|
||||
* GPU shaders, stylesheets, SVG markup. Scanning these produces massive
|
||||
* false-positive rates (observed 70% FP on hyperframes renderer codebase).
|
||||
*/
|
||||
const ENTROPY_SKIP_EXTENSIONS = new Set([
|
||||
'.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shaders
|
||||
'.css', '.scss', '.sass', '.less', // stylesheets
|
||||
'.svg', // SVG markup
|
||||
]);
|
||||
|
||||
/**
|
||||
* @param {{ relPath: string, ext: string }} fileInfo
|
||||
* @returns {boolean} true if the file should be skipped entirely
|
||||
*/
|
||||
function shouldSkipByExtension(fileInfo) {
|
||||
const lowerPath = (fileInfo.relPath || '').toLowerCase();
|
||||
if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true;
|
||||
return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase());
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -75,6 +100,27 @@ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{
|
|||
/** Pure lowercase hex that could be a hash digest (not obfuscated code). */
|
||||
const HEX_HASH_PATTERN = /^[a-f0-9]{32,128}$/i;
|
||||
|
||||
/** GLSL/WGSL shader keywords — suppress inline shader source (gl_Position, vec3, uniform, ...). */
|
||||
const GLSL_KEYWORDS = /\b(?:gl_(?:Position|FragColor|FragCoord|PointSize|PointCoord)|vec[234]|mat[234]|uniform|varying|attribute|precision\s+(?:high|medium|low)p|smoothstep|mix|clamp|texture2D|textureCube|sampler[123]D)\b/;
|
||||
|
||||
/** CSS-in-JS patterns (styled-components, emotion, vanilla-extract, @keyframes). */
|
||||
const CSS_IN_JS_PATTERN = /\b(?:styled\.[a-z]+|css)\s*`|@(?:keyframes|media|supports)\s|:\s*(?:hover|focus|active|before|after|visited|root)\b/;
|
||||
|
||||
/** Inline HTML/SVG markup in source (tags with attributes on the same line). */
|
||||
const INLINE_MARKUP = /<(?:svg|path|defs|g\s|rect\s|circle\s|polygon|polyline|ellipse|line\s|use\s|symbol\s|clipPath|linearGradient|radialGradient|div\s+[a-z-]+|span\s+[a-z-]+|style>|script>|template\s)/i;
|
||||
|
||||
/** ffmpeg filter-graph syntax (stream selectors + filter chains). */
|
||||
const FFMPEG_SYNTAX = /\[\d+:[avs]\]|(?:scale|crop|concat|overlay|psnr|drawtext|setpts|atempo|filter_complex|format|pad|trim|setdar|setsar)\s*=/;
|
||||
|
||||
/** Browser User-Agent strings (hardcoded in source — long but structured, not encoded). */
|
||||
const USER_AGENT_PATTERN = /Mozilla\/\d|AppleWebKit|Chrome\/\d+|Safari\/\d+|Firefox\/\d+|Edg\/\d+|OPR\/\d+/;
|
||||
|
||||
/** SQL DDL/DML statements (long structured strings, not encoded payloads). */
|
||||
const SQL_STATEMENT = /^\s*(?:SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|WITH|DROP|TRUNCATE|GRANT|REVOKE)\s+/i;
|
||||
|
||||
/** Error-message templates with embedded HTML/markup (throw new Error("<div>...</div>")). */
|
||||
const ERROR_TEMPLATE = /(?:throw\s+new\s+(?:Error|TypeError|RangeError|SyntaxError)|new\s+Error\s*\()\s*[`'"]/;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// False-positive suppression helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -133,6 +179,27 @@ function isFalsePositive(str, line, absPath) {
|
|||
if (str.startsWith(prefix)) return true;
|
||||
}
|
||||
|
||||
// 11. GLSL/WGSL shader keywords on the line — inline shader source
|
||||
if (GLSL_KEYWORDS.test(line)) return true;
|
||||
|
||||
// 12. CSS-in-JS (styled-components, emotion, vanilla-extract)
|
||||
if (CSS_IN_JS_PATTERN.test(line)) return true;
|
||||
|
||||
// 13. Inline HTML/SVG markup — React/Vue components, email templates
|
||||
if (INLINE_MARKUP.test(line)) return true;
|
||||
|
||||
// 14. ffmpeg filter-graph syntax — long structured strings, not encoded
|
||||
if (FFMPEG_SYNTAX.test(line)) return true;
|
||||
|
||||
// 15. Browser User-Agent strings — hardcoded but structured, not a payload
|
||||
if (USER_AGENT_PATTERN.test(line)) return true;
|
||||
|
||||
// 16. SQL DDL/DML — long SELECT/INSERT/... lines
|
||||
if (SQL_STATEMENT.test(line)) return true;
|
||||
|
||||
// 17. Error-message templates (throw new Error("<html>...</html>"))
|
||||
if (ERROR_TEMPLATE.test(line)) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -298,8 +365,17 @@ export async function scan(targetPath, discovery) {
|
|||
const allFindings = [];
|
||||
let filesScanned = 0;
|
||||
|
||||
let filesSkippedByExtension = 0;
|
||||
|
||||
try {
|
||||
for (const fileInfo of discovery.files) {
|
||||
// Context-aware skip: GPU shaders, stylesheets, SVG, minified bundles.
|
||||
// These file types produce ~70% false-positive rate on real codebases.
|
||||
if (shouldSkipByExtension(fileInfo)) {
|
||||
filesSkippedByExtension++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = await readTextFile(fileInfo.absPath);
|
||||
|
||||
// readTextFile returns null for binary files or unreadable paths — skip silently
|
||||
|
|
@ -314,7 +390,13 @@ export async function scan(targetPath, discovery) {
|
|||
const durationMs = Date.now() - startMs;
|
||||
const status = 'ok';
|
||||
|
||||
return scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
|
||||
const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
|
||||
// Calibration stats for synthesizer — how many files the ext-policy excluded.
|
||||
result.calibration = {
|
||||
files_skipped_by_extension: filesSkippedByExtension,
|
||||
skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'],
|
||||
};
|
||||
return result;
|
||||
} catch (err) {
|
||||
const durationMs = Date.now() - startMs;
|
||||
return scannerResult(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue