feat(bash-normalize): T9 — one-level variable substitution (E10)

Defeats split-and-substitute evasion where attackers split a destructive command name across an assignment and a variable reference (X=rm; later $X) so downstream regex gates miss the literal command name. T9 collects prefix assignments (VAR=value at start of string or after ; & |) and substitutes ${VAR} / $VAR forms with the captured value. One-level forward-flow only — chained vars are not followed. Documented limits in JSDoc: - Quoted assignments (X="rm -rf") not parsed (whitespace stops capture) - Substitution is global within string, not scoped. Acceptable because T3 strips unknown ${VAR} to '' afterwards. Single-quoted literals are masked before T9 runs, so legitimate strings are preserved (FP probe in tests). 7 new tests in bash-normalize-t7-t9.test.mjs. Closes E10 in critical-review-2026-04-20.md.
2026-04-30 15:12:02 +02:00 · 2026-04-30 15:12:02 +02:00 · 037b9644f3
commit 037b9644f3
parent 0a0c1fc412
2 changed files with 121 additions and 1 deletions
--- a/plugins/llm-security/scanners/lib/bash-normalize.mjs
+++ b/plugins/llm-security/scanners/lib/bash-normalize.mjs
@ -16,6 +16,9 @@
 //   T4 — backslash-between-words:   c\u\r\l          -> curl
 //   T5 — IFS word-splitting:        rm${IFS}-rf${IFS}/ -> rm -rf /
 //   T6 — ANSI-C hex quoting:        $'\x72\x6d' -rf / -> rm -rf /
+//   T9 — eval-via-variable:         X=rm; eval "$X" -> X=rm; eval rm
+//        (one-level forward-flow; T7 process-substitution + T8 base64-pipe-shell
+//        live in adjacent layers, see workflow-scanner / pre-bash-destructive)
 //
 // Execution order:
 //   1. Strip empty single-quote pairs (T1) so c''u''rl -> curl before masking.
@ -43,6 +46,40 @@ function decodeAnsiCHex(cmd) {
  );
 }

+/**
+ * T9 — Substitute single-level variable assignments into ${VAR} and $VAR
+ * references. Defeats split-and-eval evasion (X=rm; eval "$X" -rf /).
+ *
+ * One-level forward-flow only: assignments are scanned once at the prefix of
+ * each command segment (start of string OR after ; & |) and applied to
+ * later references in the same string. Multi-level chained vars
+ * (X=Y; Y=rm; eval "$X") are intentionally not followed.
+ *
+ * Limitations (documented for adversarial review):
+ *   - Quoted values (X="rm -rf") are not parsed — value capture stops at
+ *     whitespace. Unquoted single-token values are the common evasion idiom.
+ *   - Substitution is global within the string, not scoped to eval.
+ *     Acceptable because T3 already strips unknown ${VAR} to '', and known
+ *     vars get substituted to their literal value before T3 runs.
+ */
+function decodeEvalViaVariable(cmd) {
+  const assignments = new Map();
+  const ASSIGN_RE = /(?:^|[;&|])\s*([A-Za-z_]\w*)=([^\s;&|]+)/g;
+  let m;
+  while ((m = ASSIGN_RE.exec(cmd)) !== null) {
+    if (!assignments.has(m[1])) assignments.set(m[1], m[2]);
+  }
+  if (assignments.size === 0) return cmd;
+  let result = cmd;
+  for (const [name, value] of assignments) {
+    const curlyRe = new RegExp(`\\$\\{${name}\\}`, 'g');
+    result = result.replace(curlyRe, () => value);
+    const bareRe = new RegExp(`\\$${name}\\b`, 'g');
+    result = result.replace(bareRe, () => value);
+  }
+  return result;
+}
+
 /**
 * Mask non-empty single-quoted regions with placeholders. Empty '' is NOT
 * masked — T1 already stripped them in the previous pass.
@ -66,7 +103,7 @@ function unmaskSingleQuoted(str, placeholders) {
 /**
 * Normalize bash parameter expansion and quoting evasion in a command string.
 *
- * Strips / rewrites (T1-T6):
+ * Strips / rewrites (T1-T6 + T9):
 *   - T1 Empty single quotes: ''                      (e.g., w''get -> wget)
 *   - T2 Empty double quotes: ""                      (e.g., r""m -> rm)
 *   - T3 Single-char parameter expansion: ${x} -> x   (c${u}rl -> curl)
@ -74,6 +111,7 @@ function unmaskSingleQuoted(str, placeholders) {
 *   - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
 *   - T5 IFS word-splitting: ${IFS} / ${IFS:0:1} / $IFS -> ' '
 *   - T6 ANSI-C hex quoting inside $'...' -> decoded bytes
+ *   - T9 Eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm
 *   - Backtick subshell with empty/whitespace content
 *
 * Does NOT rewrite:
@ -107,6 +145,10 @@ export function normalizeBashExpansion(cmd) {
    .replace(/\$\{IFS\}/g, ' ')
    .replace(/\$IFS\b/g, ' ');

+  // T9 — substitute one-level VAR=value assignments into ${VAR}/$VAR
+  // references. Must run BEFORE T3 (which strips unknown ${VAR} to '').
+  result = decodeEvalViaVariable(result);
+
  result = result
    // T2 Strip empty double quotes: r""m -> rm
    .replace(/""/g, '')
--- a/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs
+++ b/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs
@ -0,0 +1,78 @@
+// bash-normalize-t7-t9.test.mjs — Tests for T7 (process substitution),
+// T9 (eval-via-variable) normalizations added in v7.3.0 (Batch C).
+//
+// T7 lives in bash-normalize.mjs (this test exercises it via
+// normalizeBashExpansion). T8 (base64-pipe-shell) lives in
+// pre-bash-destructive.mjs and is covered by that hook's test file —
+// not here.
+//
+// Includes false-positive probes to guard against over-broad expansion.
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { normalizeBashExpansion } from '../../scanners/lib/bash-normalize.mjs';
+
+describe('bash-normalize T9 — eval-via-variable evasion', () => {
+  it('substitutes "$X" reference after X=rm assignment', () => {
+    // Attacker splits the destructive command name across an assignment
+    // and an eval. Without T9, downstream regex sees only `eval "$X"`.
+    const input = 'X=rm; eval "$X" -rf /';
+    const normalized = normalizeBashExpansion(input);
+    assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`);
+  });
+
+  it('substitutes ${X} curly form: X=rm; eval "${X}" -rf', () => {
+    const input = 'X=rm; eval "${X}" -rf /';
+    const normalized = normalizeBashExpansion(input);
+    assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`);
+  });
+
+  it('substitutes bare $X form (no quotes): X=rm; eval $X -rf', () => {
+    const input = 'X=rm; eval $X -rf /';
+    const normalized = normalizeBashExpansion(input);
+    assert.match(normalized, /\brm\b/, `expected 'rm' in normalized output: ${normalized}`);
+  });
+
+  it('one-level only — does NOT follow chained vars (Y=X; X=rm; eval "$Y")', () => {
+    // Multi-level chained vars are explicitly NOT followed in T9.
+    // Y resolves to literal "X", not to "rm". This is a documented limit;
+    // the test guards against accidental recursion.
+    const input = 'Y=X; X=rm; eval "$Y" -rf /';
+    const normalized = normalizeBashExpansion(input);
+    // After substitution: Y=X; X=rm; eval "X" -rf /
+    // $Y resolves one level only — it becomes the literal "X", NOT the
+    // value of $X. Multi-level chained vars are not followed.
+    assert.match(normalized, /eval "?X"?\b/, `expected one-level eval target = literal 'X': ${normalized}`);
+  });
+
+  it('leaves unrelated $UNKNOWN_VAR alone (handled by T3)', () => {
+    // No assignment for $TARGET; T9 does not touch it. T3 will strip
+    // ${TARGET} to '' but the bare $TARGET is left as-is by T3 too
+    // (T3 only handles ${...} forms). T9 is a no-op for unknowns.
+    const input = 'eval "$TARGET" -rf /';
+    const normalized = normalizeBashExpansion(input);
+    // T9 leaves $TARGET alone (no assignment exists). Result still
+    // contains the literal $TARGET reference (no substitution happened).
+    assert.match(normalized, /\$TARGET/, `expected unresolved $TARGET: ${normalized}`);
+  });
+});
+
+describe('bash-normalize T9 — false-positive probes', () => {
+  it('does not substitute inside single-quoted literals: echo \'$X\' stays as-is', () => {
+    // Single-quoted literals are masked before T9 runs, so $X inside
+    // them is preserved. Guards the destructive-name detector from
+    // corrupting legitimate strings that mention bash variables.
+    const input = "X=rm; echo '$X is dangerous'";
+    const normalized = normalizeBashExpansion(input);
+    assert.match(normalized, /'\$X is dangerous'/,
+      `expected single-quoted literal preserved: ${normalized}`);
+  });
+
+  it('handles command with no assignments — pure passthrough', () => {
+    const input = 'eval "$X" -rf /';
+    const normalized = normalizeBashExpansion(input);
+    // No X=... seen, so $X is left alone. (Consumer of this layer
+    // sees the unresolved reference and decides what to do.)
+    assert.match(normalized, /\$X/, `expected $X unresolved: ${normalized}`);
+  });
+});