diff --git a/plugins/llm-security/scanners/lib/bash-normalize.mjs b/plugins/llm-security/scanners/lib/bash-normalize.mjs index f7ad7c1..b1ce1fc 100644 --- a/plugins/llm-security/scanners/lib/bash-normalize.mjs +++ b/plugins/llm-security/scanners/lib/bash-normalize.mjs @@ -8,22 +8,77 @@ // // Exported as a shared module — used by pre-bash-destructive.mjs and // pre-install-supply-chain.mjs. +// +// Pipeline (defense-in-depth layer above Claude Code 2.1.98+ harness fixes): +// T1 — empty single quotes: w''get -> wget +// T2 — empty double quotes: r""m -> rm +// T3 — parameter expansion: ${x} / ${FOO} -> x / '' +// T4 — backslash-between-words: c\u\r\l -> curl +// T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf / +// T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf / +// +// T5 and T6 run before T1-T4 so their outputs feed the rest of the pipeline +// in canonical form. Both preserve single-quoted literals (false-positive +// probe: `echo '${IFS}'` stays untouched). + +/** + * T5 — strip IFS-based word splitting outside single-quoted regions. + * + * Patterns matched: ${IFS}, ${IFS:0:1}, $IFS. Each replaced with a single + * space. Content inside '...' is preserved via placeholder masking so the + * literal string `'${IFS}'` never expands. + */ +function normalizeIFS(cmd) { + const placeholders = []; + const MARK = '\x00'; + const masked = cmd.replace(/'[^']*'/g, (match) => { + placeholders.push(match); + return `${MARK}${placeholders.length - 1}${MARK}`; + }); + const normalized = masked + .replace(/\$\{IFS:0:1\}/g, ' ') + .replace(/\$\{IFS\}/g, ' ') + .replace(/\$IFS\b/g, ' '); + return normalized.replace( + new RegExp(`${MARK}(\\d+)${MARK}`, 'g'), + (_, idx) => placeholders[parseInt(idx, 10)], + ); +} + +/** + * T6 — decode ANSI-C hex quoting inside `$'...'` contexts only. + * + * Shell treats $'\x72\x6d' as the bytes r and m. Attackers use this to + * hide command names from regex gates. We decode only the \xHH escape + * sequences inside the $'...' wrapper. Regular single-quoted strings + * '...' are not touched. + */ +function normalizeAnsiCHex(cmd) { + return cmd.replace(/\$'([^']*)'/g, (match, content) => { + return content.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) => + String.fromCharCode(parseInt(hex, 16)), + ); + }); +} /** * Normalize bash parameter expansion and quoting evasion in a command string. * - * Strips: - * - Empty single quotes: '' (e.g., w''get -> wget) - * - Empty double quotes: "" (e.g., r""m -> rm) - * - Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x) - * - Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value) - * - Backslash escapes between word chars, iteratively (c\u\r\l -> curl) + * Strips (T1-T6): + * - T1 Empty single quotes: '' (e.g., w''get -> wget) + * - T2 Empty double quotes: "" (e.g., r""m -> rm) + * - T3 Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x) + * - T3 Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value) + * - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl) + * - T5 IFS word-splitting: ${IFS}, ${IFS:0:1}, $IFS -> ' ' + * - T6 ANSI-C hex quoting inside $'...' -> decoded bytes * - Backtick subshell with empty/whitespace content * * Does NOT strip: * - Quotes around arguments (only targets empty quotes that split command names) - * - $VAR without braces (not an evasion pattern) + * - $VAR without braces outside IFS (not an evasion pattern) * - Backslashes before non-word chars (\n, \t, etc.) + * - Content inside single-quoted regions (T5 preserves them; `echo '${IFS}'` untouched) * * @param {string} cmd - Raw command string * @returns {string} Normalized command string @@ -31,19 +86,25 @@ export function normalizeBashExpansion(cmd) { if (!cmd || typeof cmd !== 'string') return cmd || ''; - let result = cmd - // Strip empty single quotes: w''get -> wget + // T5 + T6 run first so their outputs feed the rest of the pipeline in + // canonical form. Order inside T5/T6 is internal; externally we label + // the full pipeline T1-T6. + let result = normalizeIFS(cmd); + result = normalizeAnsiCHex(result); + + result = result + // T1 Strip empty single quotes: w''get -> wget .replace(/''/g, '') - // Strip empty double quotes: r""m -> rm + // T2 Strip empty double quotes: r""m -> rm .replace(/""/g, '') - // Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x) + // T3 Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x) .replace(/\$\{(\w)\}/g, '$1') - // Multi-char ${ANYTHING} -> '' (unknown value, strip entirely) + // T3 Multi-char ${ANYTHING} -> '' (unknown value, strip entirely) .replace(/\$\{[^}]*\}/g, '') // Strip backtick subshell with empty/whitespace content .replace(/`\s*`/g, ''); - // Iteratively strip backslash between word chars (c\u\r\l needs 2 passes) + // T4 Iteratively strip backslash between word chars (c\u\r\l needs 2 passes) let prev; do { prev = result;