// bash-normalize.mjs — Normalize bash parameter expansion evasion techniques. // // Attackers can evade command-name matching by inserting shell metacharacters // that are transparent to bash but break regex patterns. // // This module strips these constructs from command names so that downstream // pattern matching sees the canonical form. // // Exported as a shared module — used by pre-bash-destructive.mjs and // pre-install-supply-chain.mjs. // // Pipeline (defense-in-depth layer above Claude Code 2.1.98+ harness fixes): // T1 — empty single quotes: w''get -> wget // T2 — empty double quotes: r""m -> rm // T3 — parameter expansion: ${x} / ${FOO} -> x / '' // T4 — backslash-between-words: c\u\r\l -> curl // T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf / // T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf / // // T5 and T6 run before T1-T4 so their outputs feed the rest of the pipeline // in canonical form. Both preserve single-quoted literals (false-positive // probe: `echo '${IFS}'` stays untouched). /** * T5 — strip IFS-based word splitting outside single-quoted regions. * * Patterns matched: ${IFS}, ${IFS:0:1}, $IFS. Each replaced with a single * space. Content inside '...' is preserved via placeholder masking so the * literal string `'${IFS}'` never expands. */ function normalizeIFS(cmd) { const placeholders = []; const MARK = '\x00'; const masked = cmd.replace(/'[^']*'/g, (match) => { placeholders.push(match); return `${MARK}${placeholders.length - 1}${MARK}`; }); const normalized = masked .replace(/\$\{IFS:0:1\}/g, ' ') .replace(/\$\{IFS\}/g, ' ') .replace(/\$IFS\b/g, ' '); return normalized.replace( new RegExp(`${MARK}(\\d+)${MARK}`, 'g'), (_, idx) => placeholders[parseInt(idx, 10)], ); } /** * T6 — decode ANSI-C hex quoting inside `$'...'` contexts only. * * Shell treats $'\x72\x6d' as the bytes r and m. Attackers use this to * hide command names from regex gates. We decode only the \xHH escape * sequences inside the $'...' wrapper. Regular single-quoted strings * '...' are not touched. */ function normalizeAnsiCHex(cmd) { return cmd.replace(/\$'([^']*)'/g, (match, content) => { return content.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)), ); }); } /** * Normalize bash parameter expansion and quoting evasion in a command string. * * Strips (T1-T6): * - T1 Empty single quotes: '' (e.g., w''get -> wget) * - T2 Empty double quotes: "" (e.g., r""m -> rm) * - T3 Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x) * - T3 Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value) * - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl) * - T5 IFS word-splitting: ${IFS}, ${IFS:0:1}, $IFS -> ' ' * - T6 ANSI-C hex quoting inside $'...' -> decoded bytes * - Backtick subshell with empty/whitespace content * * Does NOT strip: * - Quotes around arguments (only targets empty quotes that split command names) * - $VAR without braces outside IFS (not an evasion pattern) * - Backslashes before non-word chars (\n, \t, etc.) * - Content inside single-quoted regions (T5 preserves them; `echo '${IFS}'` untouched) * * @param {string} cmd - Raw command string * @returns {string} Normalized command string */ export function normalizeBashExpansion(cmd) { if (!cmd || typeof cmd !== 'string') return cmd || ''; // T5 + T6 run first so their outputs feed the rest of the pipeline in // canonical form. Order inside T5/T6 is internal; externally we label // the full pipeline T1-T6. let result = normalizeIFS(cmd); result = normalizeAnsiCHex(result); result = result // T1 Strip empty single quotes: w''get -> wget .replace(/''/g, '') // T2 Strip empty double quotes: r""m -> rm .replace(/""/g, '') // T3 Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x) .replace(/\$\{(\w)\}/g, '$1') // T3 Multi-char ${ANYTHING} -> '' (unknown value, strip entirely) .replace(/\$\{[^}]*\}/g, '') // Strip backtick subshell with empty/whitespace content .replace(/`\s*`/g, ''); // T4 Iteratively strip backslash between word chars (c\u\r\l needs 2 passes) let prev; do { prev = result; result = result.replace(/(\w)\\(\w)/g, '$1$2'); } while (result !== prev); return result; }