feat(llm-security)!: v7.0.0 commit 2 — context-aware entropy scanner
Observed 70% false-positive rate on renderer/shader codebases (hyperframes): GLSL, CSS-in-JS, inline HTML/SVG, ffmpeg filter-strings, hardcoded User-Agent strings all matched base64-like entropy thresholds. This commit adds two suppression layers before classification. Layer A — file-extension skip: .glsl/.frag/.vert/.shader/.wgsl (shaders), .css/.scss/.sass/.less (stylesheets), .svg (markup), .min.js/.min.css (minified bundles). Tracked via new calibration.files_skipped_by_extension field on scanner envelope for synthesizer stats. Layer B — seven new line-level suppression rules in isFalsePositive() (rules 11-17): GLSL/WGSL keywords, CSS-in-JS (styled/emotion/@keyframes), inline HTML/SVG markup, ffmpeg filter-graph syntax, browser User-Agent, SQL DDL/DML, error-message templates with embedded HTML. Existing entropy.test.mjs: 9/9 still green — known bad base64 payload in telemetry.mjs fixture still detected. Policy-driven thresholds wired in Commit 3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
d83424a782
commit
e7f7df0fc8
1 changed files with 83 additions and 1 deletions
|
|
@ -15,6 +15,31 @@ import { finding, scannerResult } from './lib/output.mjs';
|
||||||
import { SEVERITY } from './lib/severity.mjs';
|
import { SEVERITY } from './lib/severity.mjs';
|
||||||
import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
|
import { shannonEntropy, extractStringLiterals, isBase64Like, isHexBlob, redact } from './lib/string-utils.mjs';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// File-extension suppression (context-aware, v7.0.0+)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extensions whose contents are almost always benign high-entropy noise:
|
||||||
|
* GPU shaders, stylesheets, SVG markup. Scanning these produces massive
|
||||||
|
* false-positive rates (observed 70% FP on hyperframes renderer codebase).
|
||||||
|
*/
|
||||||
|
const ENTROPY_SKIP_EXTENSIONS = new Set([
|
||||||
|
'.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shaders
|
||||||
|
'.css', '.scss', '.sass', '.less', // stylesheets
|
||||||
|
'.svg', // SVG markup
|
||||||
|
]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {{ relPath: string, ext: string }} fileInfo
|
||||||
|
* @returns {boolean} true if the file should be skipped entirely
|
||||||
|
*/
|
||||||
|
function shouldSkipByExtension(fileInfo) {
|
||||||
|
const lowerPath = (fileInfo.relPath || '').toLowerCase();
|
||||||
|
if (lowerPath.endsWith('.min.js') || lowerPath.endsWith('.min.css')) return true;
|
||||||
|
return ENTROPY_SKIP_EXTENSIONS.has((fileInfo.ext || '').toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Constants
|
// Constants
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -75,6 +100,27 @@ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{
|
||||||
/** Pure lowercase hex that could be a hash digest (not obfuscated code). */
|
/** Pure lowercase hex that could be a hash digest (not obfuscated code). */
|
||||||
const HEX_HASH_PATTERN = /^[a-f0-9]{32,128}$/i;
|
const HEX_HASH_PATTERN = /^[a-f0-9]{32,128}$/i;
|
||||||
|
|
||||||
|
/** GLSL/WGSL shader keywords — suppress inline shader source (gl_Position, vec3, uniform, ...). */
|
||||||
|
const GLSL_KEYWORDS = /\b(?:gl_(?:Position|FragColor|FragCoord|PointSize|PointCoord)|vec[234]|mat[234]|uniform|varying|attribute|precision\s+(?:high|medium|low)p|smoothstep|mix|clamp|texture2D|textureCube|sampler[123]D)\b/;
|
||||||
|
|
||||||
|
/** CSS-in-JS patterns (styled-components, emotion, vanilla-extract, @keyframes). */
|
||||||
|
const CSS_IN_JS_PATTERN = /\b(?:styled\.[a-z]+|css)\s*`|@(?:keyframes|media|supports)\s|:\s*(?:hover|focus|active|before|after|visited|root)\b/;
|
||||||
|
|
||||||
|
/** Inline HTML/SVG markup in source (tags with attributes on the same line). */
|
||||||
|
const INLINE_MARKUP = /<(?:svg|path|defs|g\s|rect\s|circle\s|polygon|polyline|ellipse|line\s|use\s|symbol\s|clipPath|linearGradient|radialGradient|div\s+[a-z-]+|span\s+[a-z-]+|style>|script>|template\s)/i;
|
||||||
|
|
||||||
|
/** ffmpeg filter-graph syntax (stream selectors + filter chains). */
|
||||||
|
const FFMPEG_SYNTAX = /\[\d+:[avs]\]|(?:scale|crop|concat|overlay|psnr|drawtext|setpts|atempo|filter_complex|format|pad|trim|setdar|setsar)\s*=/;
|
||||||
|
|
||||||
|
/** Browser User-Agent strings (hardcoded in source — long but structured, not encoded). */
|
||||||
|
const USER_AGENT_PATTERN = /Mozilla\/\d|AppleWebKit|Chrome\/\d+|Safari\/\d+|Firefox\/\d+|Edg\/\d+|OPR\/\d+/;
|
||||||
|
|
||||||
|
/** SQL DDL/DML statements (long structured strings, not encoded payloads). */
|
||||||
|
const SQL_STATEMENT = /^\s*(?:SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|WITH|DROP|TRUNCATE|GRANT|REVOKE)\s+/i;
|
||||||
|
|
||||||
|
/** Error-message templates with embedded HTML/markup (throw new Error("<div>...</div>")). */
|
||||||
|
const ERROR_TEMPLATE = /(?:throw\s+new\s+(?:Error|TypeError|RangeError|SyntaxError)|new\s+Error\s*\()\s*[`'"]/;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// False-positive suppression helpers
|
// False-positive suppression helpers
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -133,6 +179,27 @@ function isFalsePositive(str, line, absPath) {
|
||||||
if (str.startsWith(prefix)) return true;
|
if (str.startsWith(prefix)) return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 11. GLSL/WGSL shader keywords on the line — inline shader source
|
||||||
|
if (GLSL_KEYWORDS.test(line)) return true;
|
||||||
|
|
||||||
|
// 12. CSS-in-JS (styled-components, emotion, vanilla-extract)
|
||||||
|
if (CSS_IN_JS_PATTERN.test(line)) return true;
|
||||||
|
|
||||||
|
// 13. Inline HTML/SVG markup — React/Vue components, email templates
|
||||||
|
if (INLINE_MARKUP.test(line)) return true;
|
||||||
|
|
||||||
|
// 14. ffmpeg filter-graph syntax — long structured strings, not encoded
|
||||||
|
if (FFMPEG_SYNTAX.test(line)) return true;
|
||||||
|
|
||||||
|
// 15. Browser User-Agent strings — hardcoded but structured, not a payload
|
||||||
|
if (USER_AGENT_PATTERN.test(line)) return true;
|
||||||
|
|
||||||
|
// 16. SQL DDL/DML — long SELECT/INSERT/... lines
|
||||||
|
if (SQL_STATEMENT.test(line)) return true;
|
||||||
|
|
||||||
|
// 17. Error-message templates (throw new Error("<html>...</html>"))
|
||||||
|
if (ERROR_TEMPLATE.test(line)) return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -298,8 +365,17 @@ export async function scan(targetPath, discovery) {
|
||||||
const allFindings = [];
|
const allFindings = [];
|
||||||
let filesScanned = 0;
|
let filesScanned = 0;
|
||||||
|
|
||||||
|
let filesSkippedByExtension = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (const fileInfo of discovery.files) {
|
for (const fileInfo of discovery.files) {
|
||||||
|
// Context-aware skip: GPU shaders, stylesheets, SVG, minified bundles.
|
||||||
|
// These file types produce ~70% false-positive rate on real codebases.
|
||||||
|
if (shouldSkipByExtension(fileInfo)) {
|
||||||
|
filesSkippedByExtension++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const content = await readTextFile(fileInfo.absPath);
|
const content = await readTextFile(fileInfo.absPath);
|
||||||
|
|
||||||
// readTextFile returns null for binary files or unreadable paths — skip silently
|
// readTextFile returns null for binary files or unreadable paths — skip silently
|
||||||
|
|
@ -314,7 +390,13 @@ export async function scan(targetPath, discovery) {
|
||||||
const durationMs = Date.now() - startMs;
|
const durationMs = Date.now() - startMs;
|
||||||
const status = 'ok';
|
const status = 'ok';
|
||||||
|
|
||||||
return scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
|
const result = scannerResult('entropy-scanner', status, allFindings, filesScanned, durationMs);
|
||||||
|
// Calibration stats for synthesizer — how many files the ext-policy excluded.
|
||||||
|
result.calibration = {
|
||||||
|
files_skipped_by_extension: filesSkippedByExtension,
|
||||||
|
skip_extensions: [...ENTROPY_SKIP_EXTENSIONS, '.min.js', '.min.css'],
|
||||||
|
};
|
||||||
|
return result;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const durationMs = Date.now() - startMs;
|
const durationMs = Date.now() - startMs;
|
||||||
return scannerResult(
|
return scannerResult(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue