refactor(entropy): B5 — two-stage context-classified suppression pipeline
The v7.0.0 entropy-scanner ran rules 11-13 (GLSL/CSS-in-JS/inline-markup
line-proximity suppressions) for every line regardless of file type. A
polyglot `.ts` file with an embedded fragment-shader template literal
could therefore mask a real high-entropy credential when the credential
literal happened to share a line with a GLSL keyword. Critical-review
B5 documented the false-negative class.
Refactor:
* New `classifyFileContext(absPath, lines)` returns
`'shader-dominant' | 'markup-dominant' | 'code-dominant' | 'mixed'`,
keyed off file extension with a content-density fallback for
code-extension files (≥50% of sampled non-blank lines matching
GLSL/inline-markup → downgrade to `mixed`).
* `isFalsePositive(str, line, absPath, context)` gates rules 11-13
on `context !== 'code-dominant'`. Rules 1-10 and 14-19 still run
unconditionally, so URL/path/test-fixture/ffmpeg/UA/SQL/error-
template suppression behaves identically.
* `scanFileContent` computes `fileContext` once per file and threads
it through every per-string suppression check.
Conservative defaults to keep the regression surface minimal:
* Files with `<5` sampled non-blank lines fall back to `mixed`
(preserves the existing rule-11/12/13 behaviour for the single-
line .js fixtures used by entropy-context.test.mjs).
* Unknown extensions fall back to `mixed`.
* Code-extension files densely populated with shader/markup
content fall back to `mixed`.
Net effect: a `.ts` file with an embedded GLSL block but mostly TS
code on the surrounding lines now surfaces credentials that the
v7.0.0 line-proximity heuristic suppressed. Pure shader/markup
files are unaffected (extension skip / mixed default).
New fixture: tests/fixtures/entropy/polyglot-ts-with-glsl.ts (with
runtime placeholder so it does not commit a high-entropy literal).
+3 tests in tests/scanners/entropy-context.test.mjs (26 → 29).
Existing entropy.test.mjs and entropy-context.test.mjs all remain
green. Full suite 1658 → 1661.
Refs: Batch B Wave 5 / Step 12 / v7.2.0
critical-review-2026-04-20.md §B5
This commit is contained in:
parent
d441abba20
commit
04f1593df3
3 changed files with 197 additions and 8 deletions
|
|
@ -176,6 +176,69 @@ const ERROR_TEMPLATE = /(?:throw\s+new\s+(?:Error|TypeError|RangeError|SyntaxErr
|
||||||
*/
|
*/
|
||||||
const MARKDOWN_IMAGE = /!\[[^\]]*\]\(\s*https?:\/\//;
|
const MARKDOWN_IMAGE = /!\[[^\]]*\]\(\s*https?:\/\//;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// File-context classification (B5, v7.2.0)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** File extensions treated as pure shader/markup/code by classifyFileContext. */
|
||||||
|
const SHADER_EXTENSIONS = new Set(['.glsl', '.frag', '.vert', '.shader', '.wgsl']);
|
||||||
|
const MARKUP_EXTENSIONS = new Set(['.html', '.htm', '.svg', '.xml', '.md', '.markdown', '.mdx']);
|
||||||
|
const CODE_EXTENSIONS = new Set([
|
||||||
|
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
||||||
|
'.py', '.go', '.rs', '.rb', '.java', '.cs',
|
||||||
|
'.kt', '.scala', '.swift', '.cpp', '.c', '.h', '.hpp', '.php',
|
||||||
|
]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classify a file as shader-dominant, markup-dominant, code-dominant, or mixed.
|
||||||
|
*
|
||||||
|
* Used by isFalsePositive() to gate rules 11-13 (GLSL/CSS-in-JS/inline-markup
|
||||||
|
* line-proximity suppressions). Those rules fire only when context !== 'code-dominant'
|
||||||
|
* — preventing the v7.0.0 polyglot false-negative (a real credential on a line
|
||||||
|
* with an inline GLSL keyword would be incorrectly suppressed).
|
||||||
|
*
|
||||||
|
* Conservative defaults to minimize regression risk:
|
||||||
|
* - Unknown extensions → 'mixed' (all rules apply)
|
||||||
|
* - Code-extension files with very few non-blank lines (<5 sampled) → 'mixed'
|
||||||
|
* - Code-extension files where ≥50% of sampled lines match GLSL/inline-markup → 'mixed'
|
||||||
|
* - Code-extension files otherwise → 'code-dominant'
|
||||||
|
*
|
||||||
|
* @param {string} absPath
|
||||||
|
* @param {string[]} lines
|
||||||
|
* @returns {'shader-dominant'|'markup-dominant'|'code-dominant'|'mixed'}
|
||||||
|
*/
|
||||||
|
function classifyFileContext(absPath, lines) {
|
||||||
|
const lower = absPath.toLowerCase();
|
||||||
|
// Pull the actual extension; supports compound names by taking last dot.
|
||||||
|
const slashIdx = Math.max(lower.lastIndexOf('/'), lower.lastIndexOf('\\'));
|
||||||
|
const baseName = slashIdx >= 0 ? lower.slice(slashIdx + 1) : lower;
|
||||||
|
const dotIdx = baseName.lastIndexOf('.');
|
||||||
|
const ext = dotIdx >= 0 ? baseName.slice(dotIdx) : '';
|
||||||
|
|
||||||
|
if (SHADER_EXTENSIONS.has(ext)) return 'shader-dominant';
|
||||||
|
if (MARKUP_EXTENSIONS.has(ext)) return 'markup-dominant';
|
||||||
|
|
||||||
|
if (CODE_EXTENSIONS.has(ext)) {
|
||||||
|
let sampled = 0;
|
||||||
|
let suppressionHits = 0;
|
||||||
|
for (let i = 0; i < lines.length && sampled < 50; i++) {
|
||||||
|
const trimmed = lines[i].trim();
|
||||||
|
if (trimmed.length === 0) continue;
|
||||||
|
sampled++;
|
||||||
|
if (GLSL_KEYWORDS.test(trimmed) || INLINE_MARKUP.test(trimmed)) {
|
||||||
|
suppressionHits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Too few non-blank lines to classify confidently → conservative default.
|
||||||
|
if (sampled < 5) return 'mixed';
|
||||||
|
// Mostly shader/markup despite the code extension → conservative default.
|
||||||
|
if (suppressionHits / sampled >= 0.5) return 'mixed';
|
||||||
|
return 'code-dominant';
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'mixed';
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// False-positive suppression helpers
|
// False-positive suppression helpers
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -183,12 +246,20 @@ const MARKDOWN_IMAGE = /!\[[^\]]*\]\(\s*https?:\/\//;
|
||||||
/**
|
/**
|
||||||
* Decide whether a candidate string should be suppressed (likely a false positive).
|
* Decide whether a candidate string should be suppressed (likely a false positive).
|
||||||
*
|
*
|
||||||
|
* v7.2.0 (B5): rules 11-13 (GLSL/CSS-in-JS/inline-markup line-proximity) are
|
||||||
|
* gated on `context !== 'code-dominant'`. In code-dominant files, an inline
|
||||||
|
* shader keyword next to a credential-shaped string is no longer a reason
|
||||||
|
* to suppress — that was the v7.0.0 polyglot false-negative (e.g. a `.ts`
|
||||||
|
* file with embedded GLSL block hiding a real secret on the next line).
|
||||||
|
*
|
||||||
* @param {string} str - The extracted string literal value
|
* @param {string} str - The extracted string literal value
|
||||||
* @param {string} line - The full source line it came from
|
* @param {string} line - The full source line it came from
|
||||||
* @param {string} absPath - Absolute file path
|
* @param {string} absPath - Absolute file path
|
||||||
|
* @param {'shader-dominant'|'markup-dominant'|'code-dominant'|'mixed'} [context='mixed']
|
||||||
|
* File-level classification from classifyFileContext.
|
||||||
* @returns {boolean} - true if this string should be skipped
|
* @returns {boolean} - true if this string should be skipped
|
||||||
*/
|
*/
|
||||||
function isFalsePositive(str, line, absPath) {
|
function isFalsePositive(str, line, absPath, context = 'mixed') {
|
||||||
// 1. URLs — entropy is misleading for long query strings / JWTs in URLs
|
// 1. URLs — entropy is misleading for long query strings / JWTs in URLs
|
||||||
if (str.startsWith('http://') || str.startsWith('https://')) return true;
|
if (str.startsWith('http://') || str.startsWith('https://')) return true;
|
||||||
|
|
||||||
|
|
@ -234,14 +305,19 @@ function isFalsePositive(str, line, absPath) {
|
||||||
if (str.startsWith(prefix)) return true;
|
if (str.startsWith(prefix)) return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 11. GLSL/WGSL shader keywords on the line — inline shader source
|
// Rules 11-13 (v7.2.0 B5): line-proximity suppressions for shader/CSS/markup.
|
||||||
if (GLSL_KEYWORDS.test(line)) return true;
|
// Gated on context !== 'code-dominant' so that a credential adjacent to an
|
||||||
|
// inline GLSL keyword in a `.ts` file is no longer suppressed.
|
||||||
|
if (context !== 'code-dominant') {
|
||||||
|
// 11. GLSL/WGSL shader keywords on the line — inline shader source
|
||||||
|
if (GLSL_KEYWORDS.test(line)) return true;
|
||||||
|
|
||||||
// 12. CSS-in-JS (styled-components, emotion, vanilla-extract)
|
// 12. CSS-in-JS (styled-components, emotion, vanilla-extract)
|
||||||
if (CSS_IN_JS_PATTERN.test(line)) return true;
|
if (CSS_IN_JS_PATTERN.test(line)) return true;
|
||||||
|
|
||||||
// 13. Inline HTML/SVG markup — React/Vue components, email templates
|
// 13. Inline HTML/SVG markup — React/Vue components, email templates
|
||||||
if (INLINE_MARKUP.test(line)) return true;
|
if (INLINE_MARKUP.test(line)) return true;
|
||||||
|
}
|
||||||
|
|
||||||
// 14. ffmpeg filter-graph syntax — long structured strings, not encoded
|
// 14. ffmpeg filter-graph syntax — long structured strings, not encoded
|
||||||
if (FFMPEG_SYNTAX.test(line)) return true;
|
if (FFMPEG_SYNTAX.test(line)) return true;
|
||||||
|
|
@ -337,6 +413,9 @@ function maxSeverity(a, b) {
|
||||||
function scanFileContent(content, absPath, relPath) {
|
function scanFileContent(content, absPath, relPath) {
|
||||||
const findings = [];
|
const findings = [];
|
||||||
const lines = content.split('\n');
|
const lines = content.split('\n');
|
||||||
|
// v7.2.0 (B5): classify the file once per scan; rules 11-13 inside
|
||||||
|
// isFalsePositive are gated on this context.
|
||||||
|
const fileContext = classifyFileContext(absPath, lines);
|
||||||
|
|
||||||
// De-duplicate: track (line, evidence) pairs to avoid reporting the same
|
// De-duplicate: track (line, evidence) pairs to avoid reporting the same
|
||||||
// string twice when it appears in both extractStringLiterals and assignment
|
// string twice when it appears in both extractStringLiterals and assignment
|
||||||
|
|
@ -364,7 +443,7 @@ function scanFileContent(content, absPath, relPath) {
|
||||||
if (!str || str.length < 10) continue;
|
if (!str || str.length < 10) continue;
|
||||||
|
|
||||||
// False positive suppression
|
// False positive suppression
|
||||||
if (isFalsePositive(str, line, absPath)) continue;
|
if (isFalsePositive(str, line, absPath, fileContext)) continue;
|
||||||
|
|
||||||
const H = shannonEntropy(str);
|
const H = shannonEntropy(str);
|
||||||
let severity = classifyEntropy(H, str.length);
|
let severity = classifyEntropy(H, str.length);
|
||||||
|
|
|
||||||
32
plugins/llm-security/tests/fixtures/entropy/polyglot-ts-with-glsl.ts
vendored
Normal file
32
plugins/llm-security/tests/fixtures/entropy/polyglot-ts-with-glsl.ts
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
// Polyglot TypeScript fixture for the entropy-scanner B5 regression.
|
||||||
|
//
|
||||||
|
// Pre-B5 behaviour: rule 11 (GLSL_KEYWORDS line-proximity) suppressed any
|
||||||
|
// high-entropy string that happened to share a line with shader keywords.
|
||||||
|
// In a `.ts` file with an embedded fragment-shader template literal, a real
|
||||||
|
// credential on the closing brace line would be silently dismissed.
|
||||||
|
//
|
||||||
|
// Post-B5 behaviour: classifyFileContext returns 'code-dominant' for `.ts`
|
||||||
|
// files (unless the file is overwhelmingly shader/markup), which disables
|
||||||
|
// rules 11-13. The credential below is therefore detected.
|
||||||
|
//
|
||||||
|
// The placeholder __ENTROPY_PAYLOAD_PLACEHOLDER__ is replaced at test time
|
||||||
|
// with a randomly generated high-entropy string. The static fixture stays
|
||||||
|
// out of the pre-edit-secrets hook because no real high-entropy literal is
|
||||||
|
// committed to disk.
|
||||||
|
|
||||||
|
const fragmentShader = `
|
||||||
|
precision highp float;
|
||||||
|
uniform vec3 u_resolution;
|
||||||
|
uniform float u_time;
|
||||||
|
varying vec2 v_uv;
|
||||||
|
void main() {
|
||||||
|
vec3 color = vec3(v_uv, sin(u_time));
|
||||||
|
gl_FragColor = vec4(color, 1.0);
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
// The next line ends a uniform vec3 declaration AND carries the placeholder
|
||||||
|
// — exactly the kind of GLSL-adjacent line that rule 11 used to suppress.
|
||||||
|
const placeholder = "__ENTROPY_PAYLOAD_PLACEHOLDER__"; // uniform vec3 normal;
|
||||||
|
|
||||||
|
export { fragmentShader, placeholder };
|
||||||
|
|
@ -262,4 +262,82 @@ describe('entropy-scanner context suppression (v7.0.0+)', () => {
|
||||||
await rm(fx, { recursive: true, force: true });
|
await rm(fx, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('D. B5 file-context classification (v7.2.0)', () => {
|
||||||
|
it('B5 regression: code-dominant .ts file with embedded GLSL — credential adjacent to shader is detected', async () => {
|
||||||
|
// Polyglot TS file: many code lines, a few GLSL lines inside a template
|
||||||
|
// literal, and a credential-shaped string on a line that happens to
|
||||||
|
// contain GLSL keyword tokens. Pre-B5 rule 11 line-proximity suppressed
|
||||||
|
// this. Post-B5 classifyFileContext returns 'code-dominant' (sample is
|
||||||
|
// mostly TS code, <50% GLSL/markup), rules 11-13 are gated off, and
|
||||||
|
// the credential is detected.
|
||||||
|
const fx = await newRoot('ent-b5-polyglot-');
|
||||||
|
const fixtureContent = [
|
||||||
|
'import { Renderer } from "./renderer";',
|
||||||
|
'',
|
||||||
|
'const fragmentShader = `',
|
||||||
|
' precision highp float;',
|
||||||
|
' uniform vec3 u_resolution;',
|
||||||
|
' varying vec2 v_uv;',
|
||||||
|
'`;',
|
||||||
|
'',
|
||||||
|
'// Adjacent line carries GLSL tokens AND the credential payload.',
|
||||||
|
'const blob = "' + PAYLOAD + '"; // uniform vec3 normal;',
|
||||||
|
'',
|
||||||
|
'export { fragmentShader, blob };',
|
||||||
|
].join('\n');
|
||||||
|
await writeFixture(fx, 'shader-app.ts', fixtureContent);
|
||||||
|
resetCounter();
|
||||||
|
const discovery = await discoverFiles(fx);
|
||||||
|
const result = await scan(fx, discovery);
|
||||||
|
assert.ok(
|
||||||
|
result.findings.length >= 1,
|
||||||
|
'expected B5 to surface credential in code-dominant .ts despite GLSL neighbour; got ' + result.findings.length
|
||||||
|
);
|
||||||
|
await rm(fx, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('B5 control: legitimate .glsl file with high-entropy hash in shader source is still suppressed (extension skip)', async () => {
|
||||||
|
// A pure-shader file is skipped at the file-extension gate, never
|
||||||
|
// reaching classifyFileContext. This control confirms the extension
|
||||||
|
// skip still works (B5 only changed line-level rule gating).
|
||||||
|
const fx = await newRoot('ent-b5-glsl-');
|
||||||
|
await writeFixture(fx, 'noise.glsl',
|
||||||
|
'uniform vec3 u_seed;\nvec3 rand = vec3(' + PAYLOAD + ');\n');
|
||||||
|
resetCounter();
|
||||||
|
const discovery = await discoverFiles(fx);
|
||||||
|
const result = await scan(fx, discovery);
|
||||||
|
assert.equal(result.findings.length, 0, '.glsl files remain extension-skipped');
|
||||||
|
await rm(fx, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('B5 control: shader-dominant .ts file with ≥50% GLSL lines downgrades to mixed and suppresses', async () => {
|
||||||
|
// A code-extension file that is *mostly* shader template content —
|
||||||
|
// rule 11 should still fire because classifyFileContext downgrades it
|
||||||
|
// to 'mixed' (≥50% sampled lines match GLSL/INLINE_MARKUP).
|
||||||
|
const fx = await newRoot('ent-b5-shader-ts-');
|
||||||
|
const fixtureContent = [
|
||||||
|
'uniform vec3 u_resolution;',
|
||||||
|
'uniform vec3 u_camera_pos;',
|
||||||
|
'uniform float u_time;',
|
||||||
|
'varying vec2 v_uv;',
|
||||||
|
'varying vec3 v_normal;',
|
||||||
|
'attribute vec3 position;',
|
||||||
|
'attribute vec2 uv;',
|
||||||
|
'precision highp float;',
|
||||||
|
'gl_Position = vec4(position, 1.0);',
|
||||||
|
'gl_FragColor = vec4(1.0);',
|
||||||
|
'const blob = "' + PAYLOAD + '"; // uniform vec3 normal;',
|
||||||
|
].join('\n');
|
||||||
|
await writeFixture(fx, 'shader-heavy.ts', fixtureContent);
|
||||||
|
resetCounter();
|
||||||
|
const discovery = await discoverFiles(fx);
|
||||||
|
const result = await scan(fx, discovery);
|
||||||
|
assert.equal(
|
||||||
|
result.findings.length, 0,
|
||||||
|
'expected shader-dense .ts (≥50% GLSL lines) to downgrade to mixed and suppress; got ' + result.findings.length
|
||||||
|
);
|
||||||
|
await rm(fx, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue