feat(bash-normalize): T9 — one-level variable substitution (E10)

Defeats split-and-substitute evasion where attackers split a destructive
command name across an assignment and a variable reference (X=rm; later
$X) so downstream regex gates miss the literal command name. T9 collects
prefix assignments (VAR=value at start of string or after ; & |) and
substitutes ${VAR} / $VAR forms with the captured value. One-level
forward-flow only — chained vars are not followed.

Documented limits in JSDoc:
- Quoted assignments (X="rm -rf") not parsed (whitespace stops capture)
- Substitution is global within string, not scoped. Acceptable because
  T3 strips unknown ${VAR} to '' afterwards.

Single-quoted literals are masked before T9 runs, so legitimate
strings are preserved (FP probe in tests).

7 new tests in bash-normalize-t7-t9.test.mjs.
Closes E10 in critical-review-2026-04-20.md.
This commit is contained in:
Kjell Tore Guttormsen 2026-04-30 15:12:02 +02:00
commit 037b9644f3
2 changed files with 121 additions and 1 deletions

View file

@ -16,6 +16,9 @@
// T4 — backslash-between-words: c\u\r\l -> curl
// T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf /
// T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf /
// T9 — eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm
// (one-level forward-flow; T7 process-substitution + T8 base64-pipe-shell
// live in adjacent layers, see workflow-scanner / pre-bash-destructive)
//
// Execution order:
// 1. Strip empty single-quote pairs (T1) so c''u''rl -> curl before masking.
@ -43,6 +46,40 @@ function decodeAnsiCHex(cmd) {
);
}
/**
* T9 Substitute single-level variable assignments into ${VAR} and $VAR
* references. Defeats split-and-eval evasion (X=rm; eval "$X" -rf /).
*
* One-level forward-flow only: assignments are scanned once at the prefix of
* each command segment (start of string OR after ; & |) and applied to
* later references in the same string. Multi-level chained vars
* (X=Y; Y=rm; eval "$X") are intentionally not followed.
*
* Limitations (documented for adversarial review):
* - Quoted values (X="rm -rf") are not parsed value capture stops at
* whitespace. Unquoted single-token values are the common evasion idiom.
* - Substitution is global within the string, not scoped to eval.
* Acceptable because T3 already strips unknown ${VAR} to '', and known
* vars get substituted to their literal value before T3 runs.
*/
function decodeEvalViaVariable(cmd) {
const assignments = new Map();
const ASSIGN_RE = /(?:^|[;&|])\s*([A-Za-z_]\w*)=([^\s;&|]+)/g;
let m;
while ((m = ASSIGN_RE.exec(cmd)) !== null) {
if (!assignments.has(m[1])) assignments.set(m[1], m[2]);
}
if (assignments.size === 0) return cmd;
let result = cmd;
for (const [name, value] of assignments) {
const curlyRe = new RegExp(`\\$\\{${name}\\}`, 'g');
result = result.replace(curlyRe, () => value);
const bareRe = new RegExp(`\\$${name}\\b`, 'g');
result = result.replace(bareRe, () => value);
}
return result;
}
/**
* Mask non-empty single-quoted regions with placeholders. Empty '' is NOT
* masked T1 already stripped them in the previous pass.
@ -66,7 +103,7 @@ function unmaskSingleQuoted(str, placeholders) {
/**
* Normalize bash parameter expansion and quoting evasion in a command string.
*
* Strips / rewrites (T1-T6):
* Strips / rewrites (T1-T6 + T9):
* - T1 Empty single quotes: '' (e.g., w''get -> wget)
* - T2 Empty double quotes: "" (e.g., r""m -> rm)
* - T3 Single-char parameter expansion: ${x} -> x (c${u}rl -> curl)
@ -74,6 +111,7 @@ function unmaskSingleQuoted(str, placeholders) {
* - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
* - T5 IFS word-splitting: ${IFS} / ${IFS:0:1} / $IFS -> ' '
* - T6 ANSI-C hex quoting inside $'...' -> decoded bytes
* - T9 Eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm
* - Backtick subshell with empty/whitespace content
*
* Does NOT rewrite:
@ -107,6 +145,10 @@ export function normalizeBashExpansion(cmd) {
.replace(/\$\{IFS\}/g, ' ')
.replace(/\$IFS\b/g, ' ');
// T9 — substitute one-level VAR=value assignments into ${VAR}/$VAR
// references. Must run BEFORE T3 (which strips unknown ${VAR} to '').
result = decodeEvalViaVariable(result);
result = result
// T2 Strip empty double quotes: r""m -> rm
.replace(/""/g, '')