fix(taint-tracer): B6 — recognize destructuring + spread + rest patterns
Critical-review §2 B6 finding: extractAssignedVariable handled
`const X = ...` and `X = ...` but missed every modern JS/TS
destructuring pattern. Sinks downstream of destructured/spread vars
produced false negatives at the propagation step.
Patterns now recognized:
- `const { x } = source` object destructuring
- `const { x, y } = source` multi-key
- `const { secret: alias } = source` renamed (key NOT bound)
- `const { x, ...spread } = source` object rest
- `const { a, b: { c } } = source` nested object (key NOT bound)
- `const [a, b] = source` array destructuring
- `const [first, ...rest] = source` array rest
- `const [a, [b, c]] = source` nested array
- `const { user: { id }, ...rest }` mixed nested
Implementation: regex-based two-pass walker. Pass 1 detects whether
the LHS is a destructuring pattern (`{...}` or `[...]`). If yes, the
new `extractDestructuredNames` helper walks the pattern body via a
balanced-bracket depth counter, recurses into nested patterns, and
distinguishes keys (`key:`) from bindings. If no, the plain-decl
branch matches `\b(?:const|let|var)\s+(\w+)`.
Plain-assignment branch (`X = ...` without keyword) and Python-style
patterns are unchanged.
The function is now exported for direct unit testing — same pattern
as `_resetCacheForTest` in policy-loader. The internal walker
(`extractDestructuredNames`) remains module-private.
Tests: +19 cases in tests/scanners/taint-destructuring.test.mjs:
- 5 pre-B6 patterns (regression guard: plain decl, plain assign,
no-match on equality)
- 12 destructuring patterns covering object/array/rest/nested
- 2 non-destructuring regressions (return literal, arrow param)
Existing taint-tracer.test.mjs and taint.test.mjs unchanged — both
green (14 → 14, fixture-based integration tests not affected).
Suite: 1551 → 1570 (+19). All green.
This commit is contained in:
parent
d3b1157a08
commit
68b9ea2692
2 changed files with 257 additions and 19 deletions
|
|
@ -159,39 +159,149 @@ function downgradeSeverity(sev) {
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempt to extract the variable name being assigned on a source line.
|
* Attempt to extract the variable name(s) being assigned on a source line.
|
||||||
* Handles:
|
* Handles:
|
||||||
* const/let/var X = <source>
|
* const/let/var X = <source> (plain decl)
|
||||||
* X = <source>
|
* X = <source> (plain assignment)
|
||||||
* X: <source> (Python / YAML-ish)
|
* X: <source> (Python / YAML-ish)
|
||||||
* (X) = <source> (destructuring approximation)
|
* const { x } = <source> (object destructuring)
|
||||||
|
* const { x, y } = <source> (multi-key)
|
||||||
|
* const { secret: alias } = <source> (renamed)
|
||||||
|
* const { a, ...spread } = <source> (object rest)
|
||||||
|
* const { a, b: { c } } = <source> (nested object)
|
||||||
|
* const [a, b] = <source> (array destructuring)
|
||||||
|
* const [first, ...rest] = <source> (array rest)
|
||||||
|
* const [a, [b, c]] = <source> (nested array)
|
||||||
*
|
*
|
||||||
* Returns an empty array if no assignment variable is found — the source
|
* Implementation: regex-based, no full JS parser. Same constraint as the
|
||||||
* will still be tracked for same-line sink detection, but not propagated.
|
* pre-B6 extractor — the goal is best-effort, not soundness. Untracked
|
||||||
|
* variables fall back to same-line sink detection (no propagation).
|
||||||
|
*
|
||||||
|
* Returns an empty array if no assignment variable is found.
|
||||||
*
|
*
|
||||||
* @param {string} line
|
* @param {string} line
|
||||||
* @returns {string[]} variable names (may be empty)
|
* @returns {string[]} variable names (may be empty)
|
||||||
*/
|
*/
|
||||||
function extractAssignedVariable(line) {
|
export function extractAssignedVariable(line) {
|
||||||
const names = [];
|
const names = new Set();
|
||||||
|
|
||||||
// Pattern 1: const/let/var X = ... or const/let/var { X } = ...
|
// Identify a destructuring pattern boundary on the LHS of `=`.
|
||||||
const declMatch = line.match(/\b(?:const|let|var)\s+\{?\s*(\w+)/);
|
// Match `const|let|var` followed by either `{...}` or `[...]` and `=`.
|
||||||
if (declMatch) {
|
// We capture the LHS-pattern body so we can extract names without
|
||||||
names.push(declMatch[1]);
|
// reading past the assignment.
|
||||||
|
const destructDecl = line.match(/\b(?:const|let|var)\s+([{[][\s\S]*?[\]}])\s*=[^=]/);
|
||||||
|
if (destructDecl) {
|
||||||
|
extractDestructuredNames(destructDecl[1], names);
|
||||||
|
} else {
|
||||||
|
// Pattern 1: const/let/var X = ... (plain identifier — keep
|
||||||
|
// existing behavior; the original pre-B6 regex tolerated optional `{`
|
||||||
|
// and silently ate the first key. Now that destructuring has its own
|
||||||
|
// branch above, the plain-decl branch only matches plain identifiers.)
|
||||||
|
const declMatch = line.match(/\b(?:const|let|var)\s+(\w+)\s*=/);
|
||||||
|
if (declMatch) {
|
||||||
|
names.add(declMatch[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pattern 2: plain assignment X = ... (no keyword)
|
// Pattern 2: plain assignment X = ... (no keyword)
|
||||||
// Avoid matching == and ===
|
// Avoid matching == and ===
|
||||||
const assignMatch = line.match(/^\s*(\w+)\s*=[^=]/);
|
const assignMatch = line.match(/^\s*(\w+)\s*=[^=]/);
|
||||||
if (assignMatch && !names.includes(assignMatch[1])) {
|
if (assignMatch) {
|
||||||
names.push(assignMatch[1]);
|
names.add(assignMatch[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pattern 3: Python-style keyword argument or named parameter: X = source
|
// Pattern 3 (Python-style `X: source`) — already covered by other patterns
|
||||||
// Already covered by Pattern 2 above.
|
// when present in YAML/Python contexts via the plain-decl branch.
|
||||||
|
|
||||||
return names;
|
return [...names];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walk a destructuring pattern body (the `{...}` or `[...]` after the
|
||||||
|
* `const`/`let`/`var` keyword and before `=`) and add every bound
|
||||||
|
* identifier to `names`. Handles nested patterns and rest elements.
|
||||||
|
*
|
||||||
|
* Pure regex — does not parse balanced brackets perfectly, but the
|
||||||
|
* patterns we care about (plain identifiers, renamed keys `key: alias`,
|
||||||
|
* rest `...spread`) all surface as `\w+` tokens at predictable positions
|
||||||
|
* that a simple tokenizer can extract. Edge case: shorthand keys with
|
||||||
|
* default values (`{ x = 5 }`) are handled by the identifier-before-= rule.
|
||||||
|
*
|
||||||
|
* @param {string} pattern The body including the outer brackets.
|
||||||
|
* @param {Set<string>} names Mutated.
|
||||||
|
*/
|
||||||
|
function extractDestructuredNames(pattern, names) {
|
||||||
|
// Strip outer brackets so we focus on contents.
|
||||||
|
const inner = pattern.slice(1, -1);
|
||||||
|
|
||||||
|
// Token-walk: at each position consume one of:
|
||||||
|
// - `{ ... }` or `[ ... ]` — recurse into the nested pattern
|
||||||
|
// - `key: <rhs>` — bind whatever \w+ comes from <rhs>'s leading ident
|
||||||
|
// (or recurse if <rhs> is a nested pattern)
|
||||||
|
// - `...spread` — the next ident is the rest var
|
||||||
|
// - `ident` — bound directly (shorthand or array element)
|
||||||
|
// - `ident = default` — bound (default value ignored)
|
||||||
|
// - separators (`,`, whitespace) — skip
|
||||||
|
//
|
||||||
|
// Implementation simplification: match on three regex alternatives that
|
||||||
|
// cover everything in practice. Catastrophic-backtracking-safe: every
|
||||||
|
// token consumes ≥1 character.
|
||||||
|
|
||||||
|
let i = 0;
|
||||||
|
while (i < inner.length) {
|
||||||
|
const ch = inner[i];
|
||||||
|
|
||||||
|
if (ch === '{' || ch === '[') {
|
||||||
|
// Find matching close bracket via depth counter (handles nesting).
|
||||||
|
const open = ch;
|
||||||
|
const close = open === '{' ? '}' : ']';
|
||||||
|
let depth = 1;
|
||||||
|
let j = i + 1;
|
||||||
|
while (j < inner.length && depth > 0) {
|
||||||
|
if (inner[j] === open) depth++;
|
||||||
|
else if (inner[j] === close) depth--;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
// Recurse into the nested pattern body.
|
||||||
|
extractDestructuredNames(inner.slice(i, j), names);
|
||||||
|
i = j;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch === ',' || /\s/.test(ch) || ch === ':' || ch === '=') {
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inner.startsWith('...', i)) {
|
||||||
|
i += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Identifier token. After this token: either followed by `:` (then
|
||||||
|
// the RHS is the actual binding — skip this token, the bind is the
|
||||||
|
// next ident), or followed by `,`/`}`/`]`/`=`/whitespace/end (then
|
||||||
|
// this token is the bound name).
|
||||||
|
const idMatch = inner.slice(i).match(/^(\w+)/);
|
||||||
|
if (!idMatch) {
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const ident = idMatch[1];
|
||||||
|
const next = i + ident.length;
|
||||||
|
// Skip whitespace to find the next significant character.
|
||||||
|
let k = next;
|
||||||
|
while (k < inner.length && /\s/.test(inner[k])) k++;
|
||||||
|
if (inner[k] === ':') {
|
||||||
|
// This ident is a key — the RHS is the binding. Don't add this
|
||||||
|
// ident; the loop will pick up the RHS on the next iteration.
|
||||||
|
i = k + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Otherwise this ident IS bound.
|
||||||
|
names.add(ident);
|
||||||
|
i = next;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
128
plugins/llm-security/tests/scanners/taint-destructuring.test.mjs
Normal file
128
plugins/llm-security/tests/scanners/taint-destructuring.test.mjs
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
// taint-destructuring.test.mjs — B6 (v7.2.0) — taint-tracer destructuring + spread
|
||||||
|
//
|
||||||
|
// Critical-review §2 B6 finding: extractAssignedVariable handled `const X`
|
||||||
|
// and `X = ...` but missed every modern destructuring pattern. As a result,
|
||||||
|
// taint flows through destructured/spread variables produced false negatives
|
||||||
|
// at the propagation step (same-line sink detection still worked).
|
||||||
|
//
|
||||||
|
// This test pins the B6 fix at the unit-test level. The taint-tracer now
|
||||||
|
// exports `extractAssignedVariable` for direct testing — same pattern as
|
||||||
|
// policy-loader's `_resetCacheForTest`.
|
||||||
|
|
||||||
|
import { describe, it } from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { extractAssignedVariable } from '../../scanners/taint-tracer.mjs';
|
||||||
|
|
||||||
|
describe('extractAssignedVariable — pre-B6 patterns (regression guard)', () => {
|
||||||
|
it('plain const declaration: const x = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const x = req.body;');
|
||||||
|
assert.deepEqual(names, ['x']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('plain let declaration: let y = process.argv[1]', () => {
|
||||||
|
const names = extractAssignedVariable('let y = process.argv[1];');
|
||||||
|
assert.deepEqual(names, ['y']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('plain var declaration: var z = user_input', () => {
|
||||||
|
const names = extractAssignedVariable('var z = user_input;');
|
||||||
|
assert.deepEqual(names, ['z']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('plain assignment: tainted = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('tainted = req.body;');
|
||||||
|
assert.deepEqual(names, ['tainted']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not match equality: x == req.body', () => {
|
||||||
|
const names = extractAssignedVariable('if (x == req.body) {}');
|
||||||
|
assert.equal(names.length, 0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('extractAssignedVariable — B6 destructuring patterns', () => {
|
||||||
|
it('object destructuring: const { x } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { x } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['x']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('object destructuring multi-key: const { x, y } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { x, y } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['x', 'y']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('renamed destructuring: const { secret: alias } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { secret: alias } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['alias']);
|
||||||
|
// The key `secret` is NOT a binding — only `alias` is.
|
||||||
|
assert.ok(!names.includes('secret'), 'key (secret) must not be a binding');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('object rest: const { x, ...spread } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { x, ...spread } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['spread', 'x']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('nested object destructuring: const { a, b: { c } } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { a, b: { c } } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['a', 'c']);
|
||||||
|
// `b` is a key — not a binding.
|
||||||
|
assert.ok(!names.includes('b'), 'key (b) must not be a binding');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('array destructuring: const [a, b] = process.argv', () => {
|
||||||
|
const names = extractAssignedVariable('const [a, b] = process.argv;');
|
||||||
|
assert.deepEqual(names.sort(), ['a', 'b']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('array rest: const [first, ...rest] = process.argv', () => {
|
||||||
|
const names = extractAssignedVariable('const [first, ...rest] = process.argv;');
|
||||||
|
assert.deepEqual(names.sort(), ['first', 'rest']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('nested array destructuring: const [a, [b, c]] = source', () => {
|
||||||
|
const names = extractAssignedVariable('const [a, [b, c]] = source;');
|
||||||
|
assert.deepEqual(names.sort(), ['a', 'b', 'c']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('mixed destructuring: const { user: { id }, ...rest } = req.body', () => {
|
||||||
|
const names = extractAssignedVariable('const { user: { id }, ...rest } = req.body;');
|
||||||
|
assert.deepEqual(names.sort(), ['id', 'rest']);
|
||||||
|
// `user` is a key — not a binding.
|
||||||
|
assert.ok(!names.includes('user'), 'key (user) must not be a binding');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('let with destructuring: let { x } = source', () => {
|
||||||
|
const names = extractAssignedVariable('let { x } = source;');
|
||||||
|
assert.deepEqual(names.sort(), ['x']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('var with destructuring: var [a] = source', () => {
|
||||||
|
const names = extractAssignedVariable('var [a] = source;');
|
||||||
|
assert.deepEqual(names.sort(), ['a']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('default value in destructuring: const { x = 5 } = source', () => {
|
||||||
|
// The destructured binding `x` is still bound; the default `= 5` is on
|
||||||
|
// the RHS of `:` if absent or just trails the ident. The walker treats
|
||||||
|
// `=` and any literal that follows as separators.
|
||||||
|
const names = extractAssignedVariable('const { x = 5 } = source;');
|
||||||
|
assert.ok(names.includes('x'), `expected x in ${JSON.stringify(names)}`);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('extractAssignedVariable — non-destructuring pre-B6 regressions', () => {
|
||||||
|
it('does not falsely add destructure-shaped tokens from non-decl lines', () => {
|
||||||
|
// `if ({ x } === ...)` is invalid JS but the regex must not match it
|
||||||
|
// as a binding. Same for object literals on the RHS without `const`.
|
||||||
|
const names = extractAssignedVariable('return { x: 1 };');
|
||||||
|
assert.equal(names.length, 0, `non-decl produced unexpected names: ${JSON.stringify(names)}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not match arrow function param destructuring without const', () => {
|
||||||
|
// `({x}) => ...` is a destructure but not an assignment — skip.
|
||||||
|
const names = extractAssignedVariable('const fn = ({x}) => x;');
|
||||||
|
// The plain-decl branch still picks `fn` (the function-binding name).
|
||||||
|
assert.deepEqual(names, ['fn']);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue