From e7f7df0fc8a18dfe65e3bd3e0d525b102267d664 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sun, 19 Apr 2026 22:00:42 +0200 Subject: [PATCH] =?UTF-8?q?feat(llm-security)!:=20v7.0.0=20commit=202=20?= =?UTF-8?q?=E2=80=94=20context-aware=20entropy=20scanner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Observed 70% false-positive rate on renderer/shader codebases (hyperframes): GLSL, CSS-in-JS, inline HTML/SVG, ffmpeg filter-strings, hardcoded User-Agent strings all matched base64-like entropy thresholds. This commit adds two suppression layers before classification. Layer A — file-extension skip: .glsl/.frag/.vert/.shader/.wgsl (shaders), .css/.scss/.sass/.less (stylesheets), .svg (markup), .min.js/.min.css (minified bundles). Tracked via new calibration.files_skipped_by_extension field on scanner envelope for synthesizer stats. Layer B — seven new line-level suppression rules in isFalsePositive() (rules 11-17): GLSL/WGSL keywords, CSS-in-JS (styled/emotion/@keyframes), inline HTML/SVG markup, ffmpeg filter-graph syntax, browser User-Agent, SQL DDL/DML, error-message templates with embedded HTML. Existing entropy.test.mjs: 9/9 still green — known bad base64 payload in telemetry.mjs fixture still detected. Policy-driven thresholds wired in Commit 3. Co-Authored-By: Claude Opus 4.7 --- .../llm-security/scanners/entropy-scanner.mjs | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/plugins/llm-security/scanners/entropy-scanner.mjs b/plugins/llm-security/scanners/entropy-scanner.mjs index f69684f..d521135 100644 --- a/plugins/llm-security/scanners/entropy-scanner.mjs +++ b/plugins/llm-security/scanners/entropy-scanner.mjs @@ -15,6 +15,31 @@ import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs'; +// --------------------------------------------------------------------------- +// File-extension suppression (context-aware, v7.0.0+) +// --------------------------------------------------------------------------- + +/** + * Extensions whose contents are almost always benign high-entropy noise: + * GPU shaders, stylesheets, SVG markup. Scanning these produces massive + * false-positive rates (observed 70% FP on hyperframes renderer codebase). + */ +const ENTROPY_SKIP_EXTENSIONS = new Set([ + '.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shaders + '.css', '.scss', '.sass', '.less', // stylesheets + '.svg', // SVG markup +]); + +/** + * @param {{ relPath: string, ext: string }} fileInfo + * @returns {boolean} true if the file should be skipped entirely + */ +function shouldSkipByExtension(fileInfo) { + const lowerPath = (fileInfo.relPath || '').toLowerCase(); + if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true; + return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase()); +} + // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- @@ -75,6 +100,27 @@ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{ /** Pure lowercase hex that could be a hash digest (not obfuscated code). */ const HEX_HASH_PATTERN = /^[a-f0-9]{32,128}$/i; +/** GLSL/WGSL shader keywords — suppress inline shader source (gl_Position, vec3, uniform, ...). */ +const GLSL_KEYWORDS = /\b(?:gl_(?:Position|FragColor|FragCoord|PointSize|PointCoord)|vec[234]|mat[234]|uniform|varying|attribute|precision\s+(?:high|medium|low)p|smoothstep|mix|clamp|texture2D|textureCube|sampler[123]D)\b/; + +/** CSS-in-JS patterns (styled-components, emotion, vanilla-extract, @keyframes). */ +const CSS_IN_JS_PATTERN = /\b(?:styled\.[a-z]+|css)\s*`|@(?:keyframes|media|supports)\s|:\s*(?:hover|focus|active|before|after|visited|root)\b/; + +/** Inline HTML/SVG markup in source (tags with attributes on the same line). */ +const INLINE_MARKUP = /<(?:svg|path|defs|g\s|rect\s|circle\s|polygon|polyline|ellipse|line\s|use\s|symbol\s|clipPath|linearGradient|radialGradient|div\s+[a-z-]+|span\s+[a-z-]+|style>|script>|template\s)/i; + +/** ffmpeg filter-graph syntax (stream selectors + filter chains). */ +const FFMPEG_SYNTAX = /\[\d+:[avs]\]|(?:scale|crop|concat|overlay|psnr|drawtext|setpts|atempo|filter_complex|format|pad|trim|setdar|setsar)\s*=/; + +/** Browser User-Agent strings (hardcoded in source — long but structured, not encoded). */ +const USER_AGENT_PATTERN = /Mozilla\/\d|AppleWebKit|Chrome\/\d+|Safari\/\d+|Firefox\/\d+|Edg\/\d+|OPR\/\d+/; + +/** SQL DDL/DML statements (long structured strings, not encoded payloads). */ +const SQL_STATEMENT = /^\s*(?:SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|WITH|DROP|TRUNCATE|GRANT|REVOKE)\s+/i; + +/** Error-message templates with embedded HTML/markup (throw new Error("
...
")). */ +const ERROR_TEMPLATE = /(?:throw\s+new\s+(?:Error|TypeError|RangeError|SyntaxError)|new\s+Error\s*\()\s*[`'"]/; + // --------------------------------------------------------------------------- // False-positive suppression helpers // --------------------------------------------------------------------------- @@ -133,6 +179,27 @@ function isFalsePositive(str, line, absPath) { if (str.startsWith(prefix)) return true; } + // 11. GLSL/WGSL shader keywords on the line — inline shader source + if (GLSL_KEYWORDS.test(line)) return true; + + // 12. CSS-in-JS (styled-components, emotion, vanilla-extract) + if (CSS_IN_JS_PATTERN.test(line)) return true; + + // 13. Inline HTML/SVG markup — React/Vue components, email templates + if (INLINE_MARKUP.test(line)) return true; + + // 14. ffmpeg filter-graph syntax — long structured strings, not encoded + if (FFMPEG_SYNTAX.test(line)) return true; + + // 15. Browser User-Agent strings — hardcoded but structured, not a payload + if (USER_AGENT_PATTERN.test(line)) return true; + + // 16. SQL DDL/DML — long SELECT/INSERT/... lines + if (SQL_STATEMENT.test(line)) return true; + + // 17. Error-message templates (throw new Error("...")) + if (ERROR_TEMPLATE.test(line)) return true; + return false; } @@ -298,8 +365,17 @@ export async function scan(targetPath, discovery) { const allFindings = []; let filesScanned = 0; + let filesSkippedByExtension = 0; + try { for (const fileInfo of discovery.files) { + // Context-aware skip: GPU shaders, stylesheets, SVG, minified bundles. + // These file types produce ~70% false-positive rate on real codebases. + if (shouldSkipByExtension(fileInfo)) { + filesSkippedByExtension++; + continue; + } + const content = await readTextFile(fileInfo.absPath); // readTextFile returns null for binary files or unreadable paths — skip silently @@ -314,7 +390,13 @@ export async function scan(targetPath, discovery) { const durationMs = Date.now() - startMs; const status = 'ok'; - return scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs); + const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs); + // Calibration stats for synthesizer — how many files the ext-policy excluded. + result.calibration = { + files_skipped_by_extension: filesSkippedByExtension, + skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'], + }; + return result; } catch (err) { const durationMs = Date.now() - startMs; return scannerResult(