532 lines
22 KiB
JavaScript
532 lines
22 KiB
JavaScript
// taint-tracer.mjs — Deterministic taint analysis: traces untrusted data from sources to dangerous sinks
|
|
// Zero dependencies (Node.js builtins only via lib helpers).
|
|
//
|
|
// LIMITATIONS (read before interpreting results):
|
|
// ~70% recall, ~50-70% precision for medium findings.
|
|
// - No scope awareness: a variable named `input` in one function taints all uses across the file.
|
|
// - No cross-file tracing: taint does not propagate across module boundaries.
|
|
// - No closure / callback analysis: reassignment inside closures is not tracked.
|
|
// - No data-flow through arrays or object properties (e.g., `obj.field = userInput`).
|
|
// - Sanitization suppression is keyword-based; adversarial code can evade it.
|
|
// - Shell variable pattern ($VAR) is very broad in .sh/.bash/.zsh files — expect FPs.
|
|
// - Same-line source+sink detection is approximate; unrelated code on the same line may trigger.
|
|
//
|
|
// References:
|
|
// - OWASP LLM01 (Prompt Injection — injection sinks: eval, exec, SQL queries)
|
|
// - OWASP LLM02 (Sensitive Info Disclosure — exfiltration sinks: fetch, .post, .send)
|
|
// - skill-threat-patterns.md: toolchain manipulation, persistence patterns
|
|
|
|
import { readTextFile } from './lib/file-discovery.mjs';
|
|
import { finding, scannerResult } from './lib/output.mjs';
|
|
import { SEVERITY } from './lib/severity.mjs';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// File extension filter — only scan code files, not config/docs
|
|
// JVM-language support (.kt, .kts, .groovy, .gradle, .scala) is required for
|
|
// JetBrains plugin scanning — plugin source lives in these languages.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const CODE_EXTENSIONS = new Set([
|
|
'.js', '.mjs', '.cjs',
|
|
'.ts', '.mts', '.cts',
|
|
'.jsx', '.tsx',
|
|
'.py', '.pyw',
|
|
'.rb', '.php',
|
|
'.go', '.rs',
|
|
'.java', '.cs',
|
|
'.kt', '.kts',
|
|
'.groovy', '.gradle',
|
|
'.scala',
|
|
'.sh', '.bash', '.zsh',
|
|
]);
|
|
|
|
const SHELL_EXTENSIONS = new Set(['.sh', '.bash', '.zsh']);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Source patterns — untrusted / externally controlled data origins
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// NOTE: Shell variable pattern ($VAR) is intentionally only applied in SHELL_EXTENSIONS.
|
|
// Applying it to JS/TS would produce massive false-positive rates.
|
|
const SOURCES_COMMON = [
|
|
// Node.js / JavaScript
|
|
{ pattern: /process\.env\[?/g, label: 'process.env' },
|
|
{ pattern: /process\.argv/g, label: 'process.argv' },
|
|
{ pattern: /req\.body/g, label: 'req.body' },
|
|
{ pattern: /req\.query/g, label: 'req.query' },
|
|
{ pattern: /req\.params/g, label: 'req.params' },
|
|
{ pattern: /req\.headers/g, label: 'req.headers' },
|
|
{ pattern: /request\.body/g, label: 'request.body' },
|
|
{ pattern: /request\.form/g, label: 'request.form' },
|
|
{ pattern: /tool_input/g, label: 'tool_input' },
|
|
{ pattern: /user_input/g, label: 'user_input' },
|
|
{ pattern: /\$ARGUMENTS/g, label: '$ARGUMENTS' },
|
|
{ pattern: /\bstdin\b/g, label: 'stdin' },
|
|
// Python
|
|
{ pattern: /os\.environ/g, label: 'os.environ' },
|
|
{ pattern: /sys\.argv/g, label: 'sys.argv' },
|
|
{ pattern: /\binput\s*\(/g, label: 'input()' },
|
|
{ pattern: /request\.args/g, label: 'request.args' },
|
|
{ pattern: /request\.json/g, label: 'request.json' },
|
|
];
|
|
|
|
// Shell-only source: $VARIABLE references (excluding safe well-known vars)
|
|
const SOURCE_SHELL = { pattern: /\$\{?\w+\}?/g, label: 'shell variable' };
|
|
|
|
// Shell vars that are virtually always safe — suppress false positives
|
|
const SHELL_SAFE_VARS = new Set([
|
|
'$HOME', '$PATH', '$USER', '$PWD', '$SHELL', '$IFS', '$0', '$#',
|
|
'${HOME}', '${PATH}', '${USER}', '${PWD}', '${SHELL}',
|
|
]);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Sink patterns — dangerous operations that could lead to injection/exfiltration
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// Each sink carries a `risk` label and a preferred OWASP mapping:
|
|
// injection → LLM01
|
|
// exfiltration → LLM02
|
|
|
|
const SINKS = [
|
|
// Code / command execution (injection risk → LLM01)
|
|
{ pattern: /\beval\s*\(/g, label: 'eval()', risk: 'code execution', owasp: 'LLM01' },
|
|
{ pattern: /\bexec\s*\(/g, label: 'exec()', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /\bexecSync\s*\(/g, label: 'execSync()', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /\bspawn\s*\(/g, label: 'spawn()', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /\bspawnSync\s*\(/g, label: 'spawnSync()', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /child_process/g, label: 'child_process', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /new\s+Function\s*\(/g, label: 'new Function()', risk: 'code execution', owasp: 'LLM01' },
|
|
{ pattern: /\bsubprocess\./g, label: 'subprocess', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /os\.system\s*\(/g, label: 'os.system()', risk: 'command execution', owasp: 'LLM01' },
|
|
{ pattern: /os\.popen\s*\(/g, label: 'os.popen()', risk: 'command execution', owasp: 'LLM01' },
|
|
// File system writes (could be used to persist injected content)
|
|
{ pattern: /writeFile\s*\(/g, label: 'writeFile()', risk: 'file write', owasp: 'LLM01' },
|
|
{ pattern: /writeFileSync\s*\(/g, label: 'writeFileSync()', risk: 'file write', owasp: 'LLM01' },
|
|
{ pattern: /\bappendFile/g, label: 'appendFile()', risk: 'file write', owasp: 'LLM01' },
|
|
{ pattern: /createWriteStream/g, label: 'createWriteStream()', risk: 'file write', owasp: 'LLM01' },
|
|
{ pattern: /open\s*\(.*['"]w/g, label: 'open(w)', risk: 'file write', owasp: 'LLM01' },
|
|
// Network / exfiltration (data leaving the process → LLM02)
|
|
{ pattern: /\bfetch\s*\(/g, label: 'fetch()', risk: 'network request', owasp: 'LLM02' },
|
|
{ pattern: /\.send\s*\(/g, label: '.send()', risk: 'data exfiltration', owasp: 'LLM02' },
|
|
{ pattern: /\.post\s*\(/g, label: '.post()', risk: 'data exfiltration', owasp: 'LLM02' },
|
|
{ pattern: /XMLHttpRequest/g, label: 'XMLHttpRequest', risk: 'network request', owasp: 'LLM02' },
|
|
{ pattern: /WebSocket/g, label: 'WebSocket', risk: 'network connection', owasp: 'LLM02' },
|
|
// Database (SQL injection → LLM01)
|
|
{ pattern: /\.query\s*\(/g, label: '.query()', risk: 'SQL injection', owasp: 'LLM01' },
|
|
{ pattern: /\.execute\s*\(/g, label: '.execute()', risk: 'SQL injection', owasp: 'LLM01' },
|
|
{ pattern: /\.raw\s*\(/g, label: '.raw()', risk: 'raw query', owasp: 'LLM01' },
|
|
// HTML / DOM injection (XSS → LLM01 in agentic browser contexts)
|
|
{ pattern: /innerHTML\s*=/g, label: 'innerHTML', risk: 'XSS', owasp: 'LLM01' },
|
|
{ pattern: /document\.write\s*\(/g, label: 'document.write()', risk: 'XSS', owasp: 'LLM01' },
|
|
{ pattern: /dangerouslySetInnerHTML/g, label: 'dangerouslySetInnerHTML', risk: 'XSS', owasp: 'LLM01' },
|
|
];
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Sanitization suppression keywords
|
|
// ---------------------------------------------------------------------------
|
|
// If any of these appear on a line between a source and a sink (inclusive),
|
|
// severity is downgraded by one level. This is a heuristic — skilled attackers
|
|
// can bypass it by naming variables after safe functions.
|
|
|
|
const SANITIZER_PATTERN = /sanitize|escape|validate|parseInt|Number\s*\(|path\.resolve|path\.join|encodeURI|encodeURIComponent|DOMPurify|\.strip\s*\(|\.clean\s*\(|\.filter\s*\(|whitelist|allowlist/i;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Severity ordering utilities
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const SEVERITY_ORDER = [
|
|
SEVERITY.CRITICAL,
|
|
SEVERITY.HIGH,
|
|
SEVERITY.MEDIUM,
|
|
SEVERITY.LOW,
|
|
SEVERITY.INFO,
|
|
];
|
|
|
|
/**
|
|
* Return the severity one step lower than the given one.
|
|
* INFO cannot be reduced further.
|
|
* @param {string} sev
|
|
* @returns {string}
|
|
*/
|
|
function downgradeSeverity(sev) {
|
|
const idx = SEVERITY_ORDER.indexOf(sev);
|
|
if (idx < 0) return sev;
|
|
return SEVERITY_ORDER[Math.min(idx + 1, SEVERITY_ORDER.length - 1)];
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Variable name extraction helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Attempt to extract the variable name being assigned on a source line.
|
|
* Handles:
|
|
* const/let/var X = <source>
|
|
* X = <source>
|
|
* X: <source> (Python / YAML-ish)
|
|
* (X) = <source> (destructuring approximation)
|
|
*
|
|
* Returns an empty array if no assignment variable is found — the source
|
|
* will still be tracked for same-line sink detection, but not propagated.
|
|
*
|
|
* @param {string} line
|
|
* @returns {string[]} variable names (may be empty)
|
|
*/
|
|
function extractAssignedVariable(line) {
|
|
const names = [];
|
|
|
|
// Pattern 1: const/let/var X = ... or const/let/var { X } = ...
|
|
const declMatch = line.match(/\b(?:const|let|var)\s+\{?\s*(\w+)/);
|
|
if (declMatch) {
|
|
names.push(declMatch[1]);
|
|
}
|
|
|
|
// Pattern 2: plain assignment X = ... (no keyword)
|
|
// Avoid matching == and ===
|
|
const assignMatch = line.match(/^\s*(\w+)\s*=[^=]/);
|
|
if (assignMatch && !names.includes(assignMatch[1])) {
|
|
names.push(assignMatch[1]);
|
|
}
|
|
|
|
// Pattern 3: Python-style keyword argument or named parameter: X = source
|
|
// Already covered by Pattern 2 above.
|
|
|
|
return names;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Shell file safety check
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* In shell files, check whether a matched shell variable token is a safe built-in.
|
|
* @param {string} token - e.g. "$HOME" or "${USER}"
|
|
* @returns {boolean}
|
|
*/
|
|
function isShellSafeVar(token) {
|
|
// Normalize: strip the part after the variable name in ${VAR:-default} patterns
|
|
const normalized = token.replace(/\{(\w+)[^}]*\}/, '{$1}').replace(/\{/, '').replace(/\}/, '');
|
|
const bare = '$' + normalized.replace(/^\$/, '');
|
|
return SHELL_SAFE_VARS.has(token) || SHELL_SAFE_VARS.has(bare);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Per-line source/sink detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Check if a line contains a source pattern.
|
|
* Returns all matches: { label, position }.
|
|
* For shell files, skips safe built-in variables.
|
|
*
|
|
* @param {string} line
|
|
* @param {boolean} isShell
|
|
* @returns {Array<{ label: string, position: number }>}
|
|
*/
|
|
function detectSources(line, isShell) {
|
|
const sources = [...SOURCES_COMMON];
|
|
if (isShell) sources.push(SOURCE_SHELL);
|
|
|
|
const matches = [];
|
|
|
|
for (const src of sources) {
|
|
// Reset regex state (global flag retains lastIndex)
|
|
const re = new RegExp(src.pattern.source, src.pattern.flags);
|
|
let m;
|
|
while ((m = re.exec(line)) !== null) {
|
|
// Shell safe-var suppression
|
|
if (isShell && src === SOURCE_SHELL) {
|
|
const token = m[0];
|
|
if (isShellSafeVar(token)) continue;
|
|
}
|
|
matches.push({ label: src.label, position: m.index });
|
|
}
|
|
}
|
|
|
|
return matches;
|
|
}
|
|
|
|
/**
|
|
* Check if a line contains a sink pattern.
|
|
* Returns all matches: { label, risk, owasp, position }.
|
|
*
|
|
* @param {string} line
|
|
* @returns {Array<{ label: string, risk: string, owasp: string, position: number }>}
|
|
*/
|
|
function detectSinks(line) {
|
|
const matches = [];
|
|
for (const sink of SINKS) {
|
|
const re = new RegExp(sink.pattern.source, sink.pattern.flags);
|
|
let m;
|
|
while ((m = re.exec(line)) !== null) {
|
|
matches.push({ label: sink.label, risk: sink.risk, owasp: sink.owasp, position: m.index });
|
|
}
|
|
}
|
|
return matches;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Sanitization check in a line range
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Check whether any line in [fromLine, toLine] (0-indexed, inclusive) contains
|
|
* a sanitization keyword. If so, caller should downgrade severity.
|
|
*
|
|
* @param {string[]} lines
|
|
* @param {number} fromIdx - 0-based inclusive start
|
|
* @param {number} toIdx - 0-based inclusive end
|
|
* @returns {boolean}
|
|
*/
|
|
function hasSanitizationBetween(lines, fromIdx, toIdx) {
|
|
const start = Math.max(0, fromIdx);
|
|
const end = Math.min(lines.length - 1, toIdx);
|
|
for (let i = start; i <= end; i++) {
|
|
if (SANITIZER_PATTERN.test(lines[i])) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Proximity-based severity
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Map line distance between source and sink to a base severity.
|
|
* same line (dist 0) → CRITICAL
|
|
* within 10 lines → HIGH
|
|
* within 50 lines → MEDIUM
|
|
* beyond 50 lines → LOW
|
|
*
|
|
* @param {number} distance - number of lines between source and sink (0 = same line)
|
|
* @returns {string}
|
|
*/
|
|
function distanceToSeverity(distance) {
|
|
if (distance === 0) return SEVERITY.CRITICAL;
|
|
if (distance <= 10) return SEVERITY.HIGH;
|
|
if (distance <= 50) return SEVERITY.MEDIUM;
|
|
return SEVERITY.LOW;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Tainted variable tracking
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @typedef {{ name: string, sourceLine: number, sourceLabel: string }} TaintedVar
|
|
*/
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Per-file scan
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Run the 3-pass taint analysis on a single file.
|
|
*
|
|
* Pass 1 — Source Detection: Find lines with source patterns, extract assigned variable names.
|
|
* Pass 2 — Same-line Flow: Source and sink on the same line → CRITICAL finding.
|
|
* Pass 3 — Variable-to-Sink: For each tainted variable, search subsequent lines for its name
|
|
* appearing near a sink → severity by proximity.
|
|
*
|
|
* @param {string} content - File text
|
|
* @param {string} absPath - Absolute path (for suppression checks)
|
|
* @param {string} relPath - Relative path (for finding output)
|
|
* @returns {ReturnType<typeof import('./lib/output.mjs').finding>[]}
|
|
*/
|
|
function scanFileContent(content, absPath, relPath) {
|
|
const lines = content.split('\n');
|
|
const isShell = SHELL_EXTENSIONS.has(
|
|
(relPath.match(/\.[^.]+$/) || [''])[0].toLowerCase()
|
|
);
|
|
const fileFindings = [];
|
|
|
|
// Dedup key: prevent reporting the same source+sink pair multiple times
|
|
const reportedPairs = new Set();
|
|
|
|
// ---- Pass 1: Source Detection ----
|
|
// Collect tainted variables and same-line sink candidates in a single sweep.
|
|
|
|
/** @type {TaintedVar[]} */
|
|
const taintedVars = [];
|
|
|
|
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
|
|
const line = lines[lineIdx];
|
|
const sourceMatches = detectSources(line, isShell);
|
|
if (sourceMatches.length === 0) continue;
|
|
|
|
// Extract variable being assigned on this source line
|
|
const assignedVarNames = extractAssignedVariable(line);
|
|
for (const varName of assignedVarNames) {
|
|
// Skip very short or overly generic names that would produce noise
|
|
if (varName.length < 2) continue;
|
|
taintedVars.push({ name: varName, sourceLine: lineIdx, sourceLabel: sourceMatches[0].label });
|
|
}
|
|
|
|
// ---- Pass 2: Same-line Source + Sink ----
|
|
const sinkMatches = detectSinks(line);
|
|
for (const src of sourceMatches) {
|
|
for (const sink of sinkMatches) {
|
|
const pairKey = `sameline:${lineIdx}:${src.label}:${sink.label}`;
|
|
if (reportedPairs.has(pairKey)) continue;
|
|
reportedPairs.add(pairKey);
|
|
|
|
// Same-line: CRITICAL, but check for sanitizer on the same line
|
|
let severity = SEVERITY.CRITICAL;
|
|
if (hasSanitizationBetween(lines, lineIdx, lineIdx)) {
|
|
severity = downgradeSeverity(severity);
|
|
}
|
|
|
|
fileFindings.push(
|
|
finding({
|
|
scanner: 'TNT',
|
|
severity,
|
|
title: `Taint: ${src.label} flows directly to ${sink.label} (same line)`,
|
|
description:
|
|
`Untrusted data from source \`${src.label}\` appears on the same line as ` +
|
|
`dangerous sink \`${sink.label}\` (${sink.risk}). ` +
|
|
`Same-line flow is a strong indicator of unsanitized data reaching a dangerous operation.`,
|
|
file: relPath,
|
|
line: lineIdx + 1,
|
|
evidence: `source \`${src.label}\` at line ${lineIdx + 1} flows to \`${sink.label}\` at line ${lineIdx + 1} (same-line)`,
|
|
owasp: sink.owasp,
|
|
recommendation:
|
|
'Validate/sanitize data before passing to sink. Consider using parameterized queries, allowlists, or safe APIs.',
|
|
})
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---- Pass 3: Variable-to-Sink ----
|
|
// For each tainted variable, scan lines after the source for the variable name
|
|
// appearing in context with a sink.
|
|
//
|
|
// Strategy: scan every line that comes after the source line for the presence of:
|
|
// (a) the tainted variable name as a word token, AND
|
|
// (b) a sink pattern on the same line.
|
|
//
|
|
// We also catch the case where the variable appears as an argument to a sink call
|
|
// on the same line (most common real-world pattern).
|
|
|
|
for (const taintedVar of taintedVars) {
|
|
// Build a word-boundary regex for the variable name to avoid substring matches
|
|
// (e.g., "cmd" should not match "cmdLine" unless we want it to).
|
|
// We use a simple word-boundary check here.
|
|
const varRe = new RegExp(`\\b${escapeRegex(taintedVar.name)}\\b`);
|
|
|
|
for (let lineIdx = taintedVar.sourceLine + 1; lineIdx < lines.length; lineIdx++) {
|
|
const line = lines[lineIdx];
|
|
|
|
// Check if tainted variable appears on this line
|
|
if (!varRe.test(line)) continue;
|
|
|
|
// Check if a sink also appears on this line
|
|
const sinkMatches = detectSinks(line);
|
|
if (sinkMatches.length === 0) continue;
|
|
|
|
for (const sink of sinkMatches) {
|
|
const distance = lineIdx - taintedVar.sourceLine;
|
|
const pairKey = `var:${relPath}:${taintedVar.name}:${taintedVar.sourceLine}:${sink.label}:${lineIdx}`;
|
|
if (reportedPairs.has(pairKey)) continue;
|
|
reportedPairs.add(pairKey);
|
|
|
|
let severity = distanceToSeverity(distance);
|
|
|
|
// Apply sanitization suppression: scan lines from source through sink
|
|
if (hasSanitizationBetween(lines, taintedVar.sourceLine, lineIdx)) {
|
|
severity = downgradeSeverity(severity);
|
|
}
|
|
|
|
fileFindings.push(
|
|
finding({
|
|
scanner: 'TNT',
|
|
severity,
|
|
title: `Taint: ${taintedVar.sourceLabel} → ${taintedVar.name} → ${sink.label}`,
|
|
description:
|
|
`Variable \`${taintedVar.name}\` is assigned from untrusted source ` +
|
|
`\`${taintedVar.sourceLabel}\` at line ${taintedVar.sourceLine + 1} ` +
|
|
`and flows into dangerous sink \`${sink.label}\` (${sink.risk}) ` +
|
|
`at line ${lineIdx + 1} (${distance} line${distance === 1 ? '' : 's'} away). ` +
|
|
`No recognized sanitization was detected between source and sink.`,
|
|
file: relPath,
|
|
line: lineIdx + 1,
|
|
evidence:
|
|
`source \`${taintedVar.sourceLabel}\` at line ${taintedVar.sourceLine + 1} ` +
|
|
`flows to \`${sink.label}\` at line ${lineIdx + 1} ` +
|
|
`via variable \`${taintedVar.name}\``,
|
|
owasp: sink.owasp,
|
|
recommendation:
|
|
'Validate/sanitize data before passing to sink. Consider using parameterized queries, allowlists, or safe APIs.',
|
|
})
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return fileFindings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Utility: escape regex special characters in a variable name
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Escape regex metacharacters in a literal string so it can be embedded in a RegExp.
|
|
* @param {string} str
|
|
* @returns {string}
|
|
*/
|
|
function escapeRegex(str) {
|
|
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public scanner entry point
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Scan a target path for taint flows from untrusted sources to dangerous sinks.
|
|
*
|
|
* Only processes code files (.js, .mjs, .cjs, .ts, .mts, .cts, .jsx, .tsx,
|
|
* .py, .pyw, .rb, .php, .go, .rs, .java, .cs, .sh, .bash, .zsh).
|
|
* All other files in the discovery set are skipped silently.
|
|
*
|
|
* @param {string} targetPath - Absolute path to scan (file or directory root)
|
|
* @param {{ files: Array<{ absPath: string, relPath: string, ext: string, size: number }> }} discovery
|
|
* Pre-computed file discovery result from the orchestrator.
|
|
* @returns {Promise<object>} Scanner result envelope (see lib/output.mjs::scannerResult)
|
|
*/
|
|
export async function scan(targetPath, discovery) {
|
|
const startMs = Date.now();
|
|
const allFindings = [];
|
|
let filesScanned = 0;
|
|
|
|
try {
|
|
for (const fileInfo of discovery.files) {
|
|
// Only scan code files
|
|
if (!CODE_EXTENSIONS.has(fileInfo.ext)) continue;
|
|
|
|
const content = await readTextFile(fileInfo.absPath);
|
|
|
|
// readTextFile returns null for binary files or unreadable paths
|
|
if (content === null) continue;
|
|
|
|
filesScanned++;
|
|
|
|
const fileFindings = scanFileContent(content, fileInfo.absPath, fileInfo.relPath);
|
|
allFindings.push(...fileFindings);
|
|
}
|
|
|
|
const durationMs = Date.now() - startMs;
|
|
return scannerResult('taint-tracer', 'ok', allFindings, filesScanned, durationMs);
|
|
} catch (err) {
|
|
const durationMs = Date.now() - startMs;
|
|
return scannerResult(
|
|
'taint-tracer',
|
|
'error',
|
|
allFindings,
|
|
filesScanned,
|
|
durationMs,
|
|
String(err?.message || err)
|
|
);
|
|
}
|
|
}
|