fix(taint-tracer): B6 — recognize destructuring + spread + rest patterns

Critical-review §2 B6 finding: extractAssignedVariable handled
`const X = ...` and `X = ...` but missed every modern JS/TS
destructuring pattern. Sinks downstream of destructured/spread vars
produced false negatives at the propagation step.

Patterns now recognized:
- `const { x } = source`               object destructuring
- `const { x, y } = source`            multi-key
- `const { secret: alias } = source`   renamed (key NOT bound)
- `const { x, ...spread } = source`    object rest
- `const { a, b: { c } } = source`     nested object (key NOT bound)
- `const [a, b] = source`              array destructuring
- `const [first, ...rest] = source`    array rest
- `const [a, [b, c]] = source`         nested array
- `const { user: { id }, ...rest }`    mixed nested

Implementation: regex-based two-pass walker. Pass 1 detects whether
the LHS is a destructuring pattern (`{...}` or `[...]`). If yes, the
new `extractDestructuredNames` helper walks the pattern body via a
balanced-bracket depth counter, recurses into nested patterns, and
distinguishes keys (`key:`) from bindings. If no, the plain-decl
branch matches `\b(?:const|let|var)\s+(\w+)`.

Plain-assignment branch (`X = ...` without keyword) and Python-style
patterns are unchanged.

The function is now exported for direct unit testing — same pattern
as `_resetCacheForTest` in policy-loader. The internal walker
(`extractDestructuredNames`) remains module-private.

Tests: +19 cases in tests/scanners/taint-destructuring.test.mjs:
  - 5 pre-B6 patterns (regression guard: plain decl, plain assign,
    no-match on equality)
  - 12 destructuring patterns covering object/array/rest/nested
  - 2 non-destructuring regressions (return literal, arrow param)

Existing taint-tracer.test.mjs and taint.test.mjs unchanged — both
green (14 → 14, fixture-based integration tests not affected).

Suite: 1551 → 1570 (+19). All green.
This commit is contained in:
Kjell Tore Guttormsen 2026-04-29 14:05:34 +02:00
commit 68b9ea2692
2 changed files with 257 additions and 19 deletions

View file

@ -159,39 +159,149 @@ function downgradeSeverity(sev) {
// ---------------------------------------------------------------------------
/**
* Attempt to extract the variable name being assigned on a source line.
* Attempt to extract the variable name(s) being assigned on a source line.
* Handles:
* const/let/var X = <source>
* X = <source>
* X: <source> (Python / YAML-ish)
* (X) = <source> (destructuring approximation)
* const/let/var X = <source> (plain decl)
* X = <source> (plain assignment)
* X: <source> (Python / YAML-ish)
* const { x } = <source> (object destructuring)
* const { x, y } = <source> (multi-key)
* const { secret: alias } = <source> (renamed)
* const { a, ...spread } = <source> (object rest)
* const { a, b: { c } } = <source> (nested object)
* const [a, b] = <source> (array destructuring)
* const [first, ...rest] = <source> (array rest)
* const [a, [b, c]] = <source> (nested array)
*
* Returns an empty array if no assignment variable is found the source
* will still be tracked for same-line sink detection, but not propagated.
* Implementation: regex-based, no full JS parser. Same constraint as the
* pre-B6 extractor the goal is best-effort, not soundness. Untracked
* variables fall back to same-line sink detection (no propagation).
*
* Returns an empty array if no assignment variable is found.
*
* @param {string} line
* @returns {string[]} variable names (may be empty)
*/
function extractAssignedVariable(line) {
const names = [];
export function extractAssignedVariable(line) {
const names = new Set();
// Pattern 1: const/let/var X = ... or const/let/var { X } = ...
const declMatch = line.match(/\b(?:const|let|var)\s+\{?\s*(\w+)/);
if (declMatch) {
names.push(declMatch[1]);
// Identify a destructuring pattern boundary on the LHS of `=`.
// Match `const|let|var` followed by either `{...}` or `[...]` and `=`.
// We capture the LHS-pattern body so we can extract names without
// reading past the assignment.
const destructDecl = line.match(/\b(?:const|let|var)\s+([{[][\s\S]*?[\]}])\s*=[^=]/);
if (destructDecl) {
extractDestructuredNames(destructDecl[1], names);
} else {
// Pattern 1: const/let/var X = ... (plain identifier — keep
// existing behavior; the original pre-B6 regex tolerated optional `{`
// and silently ate the first key. Now that destructuring has its own
// branch above, the plain-decl branch only matches plain identifiers.)
const declMatch = line.match(/\b(?:const|let|var)\s+(\w+)\s*=/);
if (declMatch) {
names.add(declMatch[1]);
}
}
// Pattern 2: plain assignment X = ... (no keyword)
// Avoid matching == and ===
const assignMatch = line.match(/^\s*(\w+)\s*=[^=]/);
if (assignMatch && !names.includes(assignMatch[1])) {
names.push(assignMatch[1]);
if (assignMatch) {
names.add(assignMatch[1]);
}
// Pattern 3: Python-style keyword argument or named parameter: X = source
// Already covered by Pattern 2 above.
// Pattern 3 (Python-style `X: source`) — already covered by other patterns
// when present in YAML/Python contexts via the plain-decl branch.
return names;
return [...names];
}
/**
* Walk a destructuring pattern body (the `{...}` or `[...]` after the
* `const`/`let`/`var` keyword and before `=`) and add every bound
* identifier to `names`. Handles nested patterns and rest elements.
*
* Pure regex does not parse balanced brackets perfectly, but the
* patterns we care about (plain identifiers, renamed keys `key: alias`,
* rest `...spread`) all surface as `\w+` tokens at predictable positions
* that a simple tokenizer can extract. Edge case: shorthand keys with
* default values (`{ x = 5 }`) are handled by the identifier-before-= rule.
*
* @param {string} pattern The body including the outer brackets.
* @param {Set<string>} names Mutated.
*/
function extractDestructuredNames(pattern, names) {
// Strip outer brackets so we focus on contents.
const inner = pattern.slice(1, -1);
// Token-walk: at each position consume one of:
// - `{ ... }` or `[ ... ]` — recurse into the nested pattern
// - `key: <rhs>` — bind whatever \w+ comes from <rhs>'s leading ident
// (or recurse if <rhs> is a nested pattern)
// - `...spread` — the next ident is the rest var
// - `ident` — bound directly (shorthand or array element)
// - `ident = default` — bound (default value ignored)
// - separators (`,`, whitespace) — skip
//
// Implementation simplification: match on three regex alternatives that
// cover everything in practice. Catastrophic-backtracking-safe: every
// token consumes ≥1 character.
let i = 0;
while (i < inner.length) {
const ch = inner[i];
if (ch === '{' || ch === '[') {
// Find matching close bracket via depth counter (handles nesting).
const open = ch;
const close = open === '{' ? '}' : ']';
let depth = 1;
let j = i + 1;
while (j < inner.length && depth > 0) {
if (inner[j] === open) depth++;
else if (inner[j] === close) depth--;
j++;
}
// Recurse into the nested pattern body.
extractDestructuredNames(inner.slice(i, j), names);
i = j;
continue;
}
if (ch === ',' || /\s/.test(ch) || ch === ':' || ch === '=') {
i++;
continue;
}
if (inner.startsWith('...', i)) {
i += 3;
continue;
}
// Identifier token. After this token: either followed by `:` (then
// the RHS is the actual binding — skip this token, the bind is the
// next ident), or followed by `,`/`}`/`]`/`=`/whitespace/end (then
// this token is the bound name).
const idMatch = inner.slice(i).match(/^(\w+)/);
if (!idMatch) {
i++;
continue;
}
const ident = idMatch[1];
const next = i + ident.length;
// Skip whitespace to find the next significant character.
let k = next;
while (k < inner.length && /\s/.test(inner[k])) k++;
if (inner[k] === ':') {
// This ident is a key — the RHS is the binding. Don't add this
// ident; the loop will pick up the RHS on the next iteration.
i = k + 1;
continue;
}
// Otherwise this ident IS bound.
names.add(ident);
i = next;
}
}
// ---------------------------------------------------------------------------

View file

@ -0,0 +1,128 @@
// taint-destructuring.test.mjs — B6 (v7.2.0) — taint-tracer destructuring + spread
//
// Critical-review §2 B6 finding: extractAssignedVariable handled `const X`
// and `X = ...` but missed every modern destructuring pattern. As a result,
// taint flows through destructured/spread variables produced false negatives
// at the propagation step (same-line sink detection still worked).
//
// This test pins the B6 fix at the unit-test level. The taint-tracer now
// exports `extractAssignedVariable` for direct testing — same pattern as
// policy-loader's `_resetCacheForTest`.
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { extractAssignedVariable } from '../../scanners/taint-tracer.mjs';
describe('extractAssignedVariable — pre-B6 patterns (regression guard)', () => {
it('plain const declaration: const x = req.body', () => {
const names = extractAssignedVariable('const x = req.body;');
assert.deepEqual(names, ['x']);
});
it('plain let declaration: let y = process.argv[1]', () => {
const names = extractAssignedVariable('let y = process.argv[1];');
assert.deepEqual(names, ['y']);
});
it('plain var declaration: var z = user_input', () => {
const names = extractAssignedVariable('var z = user_input;');
assert.deepEqual(names, ['z']);
});
it('plain assignment: tainted = req.body', () => {
const names = extractAssignedVariable('tainted = req.body;');
assert.deepEqual(names, ['tainted']);
});
it('does not match equality: x == req.body', () => {
const names = extractAssignedVariable('if (x == req.body) {}');
assert.equal(names.length, 0);
});
});
describe('extractAssignedVariable — B6 destructuring patterns', () => {
it('object destructuring: const { x } = req.body', () => {
const names = extractAssignedVariable('const { x } = req.body;');
assert.deepEqual(names.sort(), ['x']);
});
it('object destructuring multi-key: const { x, y } = req.body', () => {
const names = extractAssignedVariable('const { x, y } = req.body;');
assert.deepEqual(names.sort(), ['x', 'y']);
});
it('renamed destructuring: const { secret: alias } = req.body', () => {
const names = extractAssignedVariable('const { secret: alias } = req.body;');
assert.deepEqual(names.sort(), ['alias']);
// The key `secret` is NOT a binding — only `alias` is.
assert.ok(!names.includes('secret'), 'key (secret) must not be a binding');
});
it('object rest: const { x, ...spread } = req.body', () => {
const names = extractAssignedVariable('const { x, ...spread } = req.body;');
assert.deepEqual(names.sort(), ['spread', 'x']);
});
it('nested object destructuring: const { a, b: { c } } = req.body', () => {
const names = extractAssignedVariable('const { a, b: { c } } = req.body;');
assert.deepEqual(names.sort(), ['a', 'c']);
// `b` is a key — not a binding.
assert.ok(!names.includes('b'), 'key (b) must not be a binding');
});
it('array destructuring: const [a, b] = process.argv', () => {
const names = extractAssignedVariable('const [a, b] = process.argv;');
assert.deepEqual(names.sort(), ['a', 'b']);
});
it('array rest: const [first, ...rest] = process.argv', () => {
const names = extractAssignedVariable('const [first, ...rest] = process.argv;');
assert.deepEqual(names.sort(), ['first', 'rest']);
});
it('nested array destructuring: const [a, [b, c]] = source', () => {
const names = extractAssignedVariable('const [a, [b, c]] = source;');
assert.deepEqual(names.sort(), ['a', 'b', 'c']);
});
it('mixed destructuring: const { user: { id }, ...rest } = req.body', () => {
const names = extractAssignedVariable('const { user: { id }, ...rest } = req.body;');
assert.deepEqual(names.sort(), ['id', 'rest']);
// `user` is a key — not a binding.
assert.ok(!names.includes('user'), 'key (user) must not be a binding');
});
it('let with destructuring: let { x } = source', () => {
const names = extractAssignedVariable('let { x } = source;');
assert.deepEqual(names.sort(), ['x']);
});
it('var with destructuring: var [a] = source', () => {
const names = extractAssignedVariable('var [a] = source;');
assert.deepEqual(names.sort(), ['a']);
});
it('default value in destructuring: const { x = 5 } = source', () => {
// The destructured binding `x` is still bound; the default `= 5` is on
// the RHS of `:` if absent or just trails the ident. The walker treats
// `=` and any literal that follows as separators.
const names = extractAssignedVariable('const { x = 5 } = source;');
assert.ok(names.includes('x'), `expected x in ${JSON.stringify(names)}`);
});
});
describe('extractAssignedVariable — non-destructuring pre-B6 regressions', () => {
it('does not falsely add destructure-shaped tokens from non-decl lines', () => {
// `if ({ x } === ...)` is invalid JS but the regex must not match it
// as a binding. Same for object literals on the RHS without `const`.
const names = extractAssignedVariable('return { x: 1 };');
assert.equal(names.length, 0, `non-decl produced unexpected names: ${JSON.stringify(names)}`);
});
it('does not match arrow function param destructuring without const', () => {
// `({x}) => ...` is a destructure but not an assignment — skip.
const names = extractAssignedVariable('const fn = ({x}) => x;');
// The plain-decl branch still picks `fn` (the function-binding name).
assert.deepEqual(names, ['fn']);
});
});