feat(scanners): extend bash-normalize with T5 IFS + T6 ANSI-C hex quoting
This commit is contained in:
parent
6d0e798b01
commit
05aaee0fcc
1 changed files with 74 additions and 13 deletions
|
|
@ -8,22 +8,77 @@
|
||||||
//
|
//
|
||||||
// Exported as a shared module — used by pre-bash-destructive.mjs and
|
// Exported as a shared module — used by pre-bash-destructive.mjs and
|
||||||
// pre-install-supply-chain.mjs.
|
// pre-install-supply-chain.mjs.
|
||||||
|
//
|
||||||
|
// Pipeline (defense-in-depth layer above Claude Code 2.1.98+ harness fixes):
|
||||||
|
// T1 — empty single quotes: w''get -> wget
|
||||||
|
// T2 — empty double quotes: r""m -> rm
|
||||||
|
// T3 — parameter expansion: ${x} / ${FOO} -> x / ''
|
||||||
|
// T4 — backslash-between-words: c\u\r\l -> curl
|
||||||
|
// T5 — IFS word-splitting: rm${IFS}-rf${IFS}/ -> rm -rf /
|
||||||
|
// T6 — ANSI-C hex quoting: $'\x72\x6d' -rf / -> rm -rf /
|
||||||
|
//
|
||||||
|
// T5 and T6 run before T1-T4 so their outputs feed the rest of the pipeline
|
||||||
|
// in canonical form. Both preserve single-quoted literals (false-positive
|
||||||
|
// probe: `echo '${IFS}'` stays untouched).
|
||||||
|
|
||||||
|
/**
|
||||||
|
* T5 — strip IFS-based word splitting outside single-quoted regions.
|
||||||
|
*
|
||||||
|
* Patterns matched: ${IFS}, ${IFS:0:1}, $IFS. Each replaced with a single
|
||||||
|
* space. Content inside '...' is preserved via placeholder masking so the
|
||||||
|
* literal string `'${IFS}'` never expands.
|
||||||
|
*/
|
||||||
|
function normalizeIFS(cmd) {
|
||||||
|
const placeholders = [];
|
||||||
|
const MARK = '\x00';
|
||||||
|
const masked = cmd.replace(/'[^']*'/g, (match) => {
|
||||||
|
placeholders.push(match);
|
||||||
|
return `${MARK}${placeholders.length - 1}${MARK}`;
|
||||||
|
});
|
||||||
|
const normalized = masked
|
||||||
|
.replace(/\$\{IFS:0:1\}/g, ' ')
|
||||||
|
.replace(/\$\{IFS\}/g, ' ')
|
||||||
|
.replace(/\$IFS\b/g, ' ');
|
||||||
|
return normalized.replace(
|
||||||
|
new RegExp(`${MARK}(\\d+)${MARK}`, 'g'),
|
||||||
|
(_, idx) => placeholders[parseInt(idx, 10)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* T6 — decode ANSI-C hex quoting inside `$'...'` contexts only.
|
||||||
|
*
|
||||||
|
* Shell treats $'\x72\x6d' as the bytes r and m. Attackers use this to
|
||||||
|
* hide command names from regex gates. We decode only the \xHH escape
|
||||||
|
* sequences inside the $'...' wrapper. Regular single-quoted strings
|
||||||
|
* '...' are not touched.
|
||||||
|
*/
|
||||||
|
function normalizeAnsiCHex(cmd) {
|
||||||
|
return cmd.replace(/\$'([^']*)'/g, (match, content) => {
|
||||||
|
return content.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) =>
|
||||||
|
String.fromCharCode(parseInt(hex, 16)),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize bash parameter expansion and quoting evasion in a command string.
|
* Normalize bash parameter expansion and quoting evasion in a command string.
|
||||||
*
|
*
|
||||||
* Strips:
|
* Strips (T1-T6):
|
||||||
* - Empty single quotes: '' (e.g., w''get -> wget)
|
* - T1 Empty single quotes: '' (e.g., w''get -> wget)
|
||||||
* - Empty double quotes: "" (e.g., r""m -> rm)
|
* - T2 Empty double quotes: "" (e.g., r""m -> rm)
|
||||||
* - Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x)
|
* - T3 Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x)
|
||||||
* - Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value)
|
* - T3 Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value)
|
||||||
* - Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
|
* - T4 Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
|
||||||
|
* - T5 IFS word-splitting: ${IFS}, ${IFS:0:1}, $IFS -> ' '
|
||||||
|
* - T6 ANSI-C hex quoting inside $'...' -> decoded bytes
|
||||||
* - Backtick subshell with empty/whitespace content
|
* - Backtick subshell with empty/whitespace content
|
||||||
*
|
*
|
||||||
* Does NOT strip:
|
* Does NOT strip:
|
||||||
* - Quotes around arguments (only targets empty quotes that split command names)
|
* - Quotes around arguments (only targets empty quotes that split command names)
|
||||||
* - $VAR without braces (not an evasion pattern)
|
* - $VAR without braces outside IFS (not an evasion pattern)
|
||||||
* - Backslashes before non-word chars (\n, \t, etc.)
|
* - Backslashes before non-word chars (\n, \t, etc.)
|
||||||
|
* - Content inside single-quoted regions (T5 preserves them; `echo '${IFS}'` untouched)
|
||||||
*
|
*
|
||||||
* @param {string} cmd - Raw command string
|
* @param {string} cmd - Raw command string
|
||||||
* @returns {string} Normalized command string
|
* @returns {string} Normalized command string
|
||||||
|
|
@ -31,19 +86,25 @@
|
||||||
export function normalizeBashExpansion(cmd) {
|
export function normalizeBashExpansion(cmd) {
|
||||||
if (!cmd || typeof cmd !== 'string') return cmd || '';
|
if (!cmd || typeof cmd !== 'string') return cmd || '';
|
||||||
|
|
||||||
let result = cmd
|
// T5 + T6 run first so their outputs feed the rest of the pipeline in
|
||||||
// Strip empty single quotes: w''get -> wget
|
// canonical form. Order inside T5/T6 is internal; externally we label
|
||||||
|
// the full pipeline T1-T6.
|
||||||
|
let result = normalizeIFS(cmd);
|
||||||
|
result = normalizeAnsiCHex(result);
|
||||||
|
|
||||||
|
result = result
|
||||||
|
// T1 Strip empty single quotes: w''get -> wget
|
||||||
.replace(/''/g, '')
|
.replace(/''/g, '')
|
||||||
// Strip empty double quotes: r""m -> rm
|
// T2 Strip empty double quotes: r""m -> rm
|
||||||
.replace(/""/g, '')
|
.replace(/""/g, '')
|
||||||
// Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x)
|
// T3 Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x)
|
||||||
.replace(/\$\{(\w)\}/g, '$1')
|
.replace(/\$\{(\w)\}/g, '$1')
|
||||||
// Multi-char ${ANYTHING} -> '' (unknown value, strip entirely)
|
// T3 Multi-char ${ANYTHING} -> '' (unknown value, strip entirely)
|
||||||
.replace(/\$\{[^}]*\}/g, '')
|
.replace(/\$\{[^}]*\}/g, '')
|
||||||
// Strip backtick subshell with empty/whitespace content
|
// Strip backtick subshell with empty/whitespace content
|
||||||
.replace(/`\s*`/g, '');
|
.replace(/`\s*`/g, '');
|
||||||
|
|
||||||
// Iteratively strip backslash between word chars (c\u\r\l needs 2 passes)
|
// T4 Iteratively strip backslash between word chars (c\u\r\l needs 2 passes)
|
||||||
let prev;
|
let prev;
|
||||||
do {
|
do {
|
||||||
prev = result;
|
prev = result;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue