diff --git a/plugins/llm-security/scanners/entropy-scanner.mjs b/plugins/llm-security/scanners/entropy-scanner.mjs index f69684f..d521135 100644 --- a/plugins/llm-security/scanners/entropy-scanner.mjs +++ b/plugins/llm-security/scanners/entropy-scanner.mjs @@ -15,6 +15,31 @@ import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs'; +// --------------------------------------------------------------------------- +// File-extension suppression (context-aware, v7.0.0+) +// --------------------------------------------------------------------------- + +/** + * Extensions whose contents are almost always benign high-entropy noise: + * GPU shaders, stylesheets, SVG markup. Scanning these produces massive + * false-positive rates (observed 70% FP on hyperframes renderer codebase). + */ +const ENTROPY_SKIP_EXTENSIONS = new Set([ + '.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shaders + '.css', '.scss', '.sass', '.less', // stylesheets + '.svg', // SVG markup +]); + +/** + * @param {{ relPath: string, ext: string }} fileInfo + * @returns {boolean} true if the file should be skipped entirely + */ +function shouldSkipByExtension(fileInfo) { + const lowerPath = (fileInfo.relPath || '').toLowerCase(); + if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true; + return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase()); +} + // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- @@ -75,6 +100,27 @@ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{ /** Pure lowercase hex that could be a hash digest (not obfuscated code). */ const HEX_HASH_PATTERN = /^[a-f0-9]{32,128}$/i; +/** GLSL/WGSL shader keywords — suppress inline shader source (gl_Position, vec3, uniform, ...). */ +const GLSL_KEYWORDS = /\b(?:gl_(?:Position|FragColor|FragCoord|PointSize|PointCoord)|vec[234]|mat[234]|uniform|varying|attribute|precision\s+(?:high|medium|low)p|smoothstep|mix|clamp|texture2D|textureCube|sampler[123]D)\b/; + +/** CSS-in-JS patterns (styled-components, emotion, vanilla-extract, @keyframes). */ +const CSS_IN_JS_PATTERN = /\b(?:styled\.[a-z]+|css)\s*`|@(?:keyframes|media|supports)\s|:\s*(?:hover|focus|active|before|after|visited|root)\b/; + +/** Inline HTML/SVG markup in source (tags with attributes on the same line). */ +const INLINE_MARKUP = /<(?:svg|path|defs|g\s|rect\s|circle\s|polygon|polyline|ellipse|line\s|use\s|symbol\s|clipPath|linearGradient|radialGradient|div\s+[a-z-]+|span\s+[a-z-]+|style>|script>|template\s)/i; + +/** ffmpeg filter-graph syntax (stream selectors + filter chains). */ +const FFMPEG_SYNTAX = /\[\d+:[avs]\]|(?:scale|crop|concat|overlay|psnr|drawtext|setpts|atempo|filter_complex|format|pad|trim|setdar|setsar)\s*=/; + +/** Browser User-Agent strings (hardcoded in source — long but structured, not encoded). */ +const USER_AGENT_PATTERN = /Mozilla\/\d|AppleWebKit|Chrome\/\d+|Safari\/\d+|Firefox\/\d+|Edg\/\d+|OPR\/\d+/; + +/** SQL DDL/DML statements (long structured strings, not encoded payloads). */ +const SQL_STATEMENT = /^\s*(?:SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|WITH|DROP|TRUNCATE|GRANT|REVOKE)\s+/i; + +/** Error-message templates with embedded HTML/markup (throw new Error("