diff --git a/plugins/llm-security/scanners/lib/bash-normalize.mjs b/plugins/llm-security/scanners/lib/bash-normalize.mjs index 6036c76..364e368 100644 --- a/plugins/llm-security/scanners/lib/bash-normalize.mjs +++ b/plugins/llm-security/scanners/lib/bash-normalize.mjs @@ -16,6 +16,9 @@ // T4 — backslash-between-words: c\u\r\l -> curl // T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf / // T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf / +// T9 — eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm +// (one-level forward-flow; T7 process-substitution + T8 base64-pipe-shell +// live in adjacent layers, see workflow-scanner / pre-bash-destructive) // // Execution order: // 1. Strip empty single-quote pairs (T1) so c''u''rl -> curl before masking. @@ -43,6 +46,40 @@ function decodeAnsiCHex(cmd) { ); } +/** + * T9 — Substitute single-level variable assignments into ${VAR} and $VAR + * references. Defeats split-and-eval evasion (X=rm; eval "$X" -rf /). + * + * One-level forward-flow only: assignments are scanned once at the prefix of + * each command segment (start of string OR after ; & |) and applied to + * later references in the same string. Multi-level chained vars + * (X=Y; Y=rm; eval "$X") are intentionally not followed. + * + * Limitations (documented for adversarial review): + * - Quoted values (X="rm -rf") are not parsed — value capture stops at + * whitespace. Unquoted single-token values are the common evasion idiom. + * - Substitution is global within the string, not scoped to eval. + * Acceptable because T3 already strips unknown ${VAR} to '', and known + * vars get substituted to their literal value before T3 runs. + */ +function decodeEvalViaVariable(cmd) { + const assignments = new Map(); + const ASSIGN_RE = /(?:^|[;&|])\s*([A-Za-z_]\w*)=([^\s;&|]+)/g; + let m; + while ((m = ASSIGN_RE.exec(cmd)) !== null) { + if (!assignments.has(m[1])) assignments.set(m[1], m[2]); + } + if (assignments.size === 0) return cmd; + let result = cmd; + for (const [name, value] of assignments) { + const curlyRe = new RegExp(`\\$\\{${name}\\}`, 'g'); + result = result.replace(curlyRe, () => value); + const bareRe = new RegExp(`\\$${name}\\b`, 'g'); + result = result.replace(bareRe, () => value); + } + return result; +} + /** * Mask non-empty single-quoted regions with placeholders. Empty '' is NOT * masked — T1 already stripped them in the previous pass. @@ -66,7 +103,7 @@ function unmaskSingleQuoted(str, placeholders) { /** * Normalize bash parameter expansion and quoting evasion in a command string. * - * Strips / rewrites (T1-T6): + * Strips / rewrites (T1-T6 + T9): * - T1 Empty single quotes: '' (e.g., w''get -> wget) * - T2 Empty double quotes: "" (e.g., r""m -> rm) * - T3 Single-char parameter expansion: ${x} -> x (c${u}rl -> curl) @@ -74,6 +111,7 @@ function unmaskSingleQuoted(str, placeholders) { * - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl) * - T5 IFS word-splitting: ${IFS} / ${IFS:0:1} / $IFS -> ' ' * - T6 ANSI-C hex quoting inside $'...' -> decoded bytes + * - T9 Eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm * - Backtick subshell with empty/whitespace content * * Does NOT rewrite: @@ -107,6 +145,10 @@ export function normalizeBashExpansion(cmd) { .replace(/\$\{IFS\}/g, ' ') .replace(/\$IFS\b/g, ' '); + // T9 — substitute one-level VAR=value assignments into ${VAR}/$VAR + // references. Must run BEFORE T3 (which strips unknown ${VAR} to ''). + result = decodeEvalViaVariable(result); + result = result // T2 Strip empty double quotes: r""m -> rm .replace(/""/g, '') diff --git a/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs b/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs new file mode 100644 index 0000000..9b5e44c --- /dev/null +++ b/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs @@ -0,0 +1,78 @@ +// bash-normalize-t7-t9.test.mjs — Tests for T7 (process substitution), +// T9 (eval-via-variable) normalizations added in v7.3.0 (Batch C). +// +// T7 lives in bash-normalize.mjs (this test exercises it via +// normalizeBashExpansion). T8 (base64-pipe-shell) lives in +// pre-bash-destructive.mjs and is covered by that hook's test file — +// not here. +// +// Includes false-positive probes to guard against over-broad expansion. + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { normalizeBashExpansion } from '../../scanners/lib/bash-normalize.mjs'; + +describe('bash-normalize T9 — eval-via-variable evasion', () => { + it('substitutes "$X" reference after X=rm assignment', () => { + // Attacker splits the destructive command name across an assignment + // and an eval. Without T9, downstream regex sees only `eval "$X"`. + const input = 'X=rm; eval "$X" -rf /'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`); + }); + + it('substitutes ${X} curly form: X=rm; eval "${X}" -rf', () => { + const input = 'X=rm; eval "${X}" -rf /'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`); + }); + + it('substitutes bare $X form (no quotes): X=rm; eval $X -rf', () => { + const input = 'X=rm; eval $X -rf /'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`); + }); + + it('one-level only — does NOT follow chained vars (Y=X; X=rm; eval "$Y")', () => { + // Multi-level chained vars are explicitly NOT followed in T9. + // Y resolves to literal "X", not to "rm". This is a documented limit; + // the test guards against accidental recursion. + const input = 'Y=X; X=rm; eval "$Y" -rf /'; + const normalized = normalizeBashExpansion(input); + // After substitution: Y=X; X=rm; eval "X" -rf / + // $Y resolves one level only — it becomes the literal "X", NOT the + // value of $X. Multi-level chained vars are not followed. + assert.match(normalized, /eval "?X"?\b/, `expected one-level eval target = literal 'X': ${normalized}`); + }); + + it('leaves unrelated $UNKNOWN_VAR alone (handled by T3)', () => { + // No assignment for $TARGET; T9 does not touch it. T3 will strip + // ${TARGET} to '' but the bare $TARGET is left as-is by T3 too + // (T3 only handles ${...} forms). T9 is a no-op for unknowns. + const input = 'eval "$TARGET" -rf /'; + const normalized = normalizeBashExpansion(input); + // T9 leaves $TARGET alone (no assignment exists). Result still + // contains the literal $TARGET reference (no substitution happened). + assert.match(normalized, /\$TARGET/, `expected unresolved $TARGET: ${normalized}`); + }); +}); + +describe('bash-normalize T9 — false-positive probes', () => { + it('does not substitute inside single-quoted literals: echo \'$X\' stays as-is', () => { + // Single-quoted literals are masked before T9 runs, so $X inside + // them is preserved. Guards the destructive-name detector from + // corrupting legitimate strings that mention bash variables. + const input = "X=rm; echo '$X is dangerous'"; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /'\$X is dangerous'/, + `expected single-quoted literal preserved: ${normalized}`); + }); + + it('handles command with no assignments — pure passthrough', () => { + const input = 'eval "$X" -rf /'; + const normalized = normalizeBashExpansion(input); + // No X=... seen, so $X is left alone. (Consumer of this layer + // sees the unresolved reference and decides what to do.) + assert.match(normalized, /\$X/, `expected $X unresolved: ${normalized}`); + }); +});