// dep-auditor.mjs — Deterministic dependency security scanner // Detects CVEs (npm/pip audit), typosquatting, malicious install scripts, // and unpinned versions. Zero external dependencies — Node.js builtins only. // // OWASP coverage: LLM03 (Supply Chain) import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { levenshtein, tokenize, tokenOverlap, TYPOSQUAT_SUSPICIOUS_TOKENS } from './lib/string-utils.mjs'; import { readFile } from 'node:fs/promises'; import { join, dirname } from 'node:path'; import { existsSync } from 'node:fs'; import { execSync } from 'node:child_process'; import { fileURLToPath } from 'node:url'; // --------------------------------------------------------------------------- // Top-package knowledge base loader // --------------------------------------------------------------------------- const __dirname = dirname(fileURLToPath(import.meta.url)); /** @type {{ npm: string[], pypi: string[] } | null} */ let _topPackages = null; let _typosquatAllowlist = null; /** * Load top-packages.json from the knowledge directory. * Result is cached after first load. * @returns {Promise<{ npm: string[], pypi: string[] }>} */ async function loadTopPackages() { if (_topPackages) return _topPackages; const knowledgePath = join(__dirname, '..', 'knowledge', 'top-packages.json'); try { const raw = await readFile(knowledgePath, 'utf8'); _topPackages = JSON.parse(raw); } catch { // Graceful fallback: empty lists — typosquatting detection skipped _topPackages = { npm: [], pypi: [] }; } return _topPackages; } async function loadTyposquatAllowlist() { if (_typosquatAllowlist) return _typosquatAllowlist; const allowPath = join(__dirname, '..', 'knowledge', 'typosquat-allowlist.json'); try { const raw = await readFile(allowPath, 'utf8'); const data = JSON.parse(raw); _typosquatAllowlist = { npm: new Set((data.npm || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))), pypi: new Set((data.pypi || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))), }; } catch { _typosquatAllowlist = { npm: new Set(), pypi: new Set() }; } return _typosquatAllowlist; } // --------------------------------------------------------------------------- // File reading helpers // --------------------------------------------------------------------------- /** * Read and parse a JSON file. Returns null on error. * @param {string} absPath * @returns {Promise} */ async function readJson(absPath) { try { const raw = await readFile(absPath, 'utf8'); return JSON.parse(raw); } catch { return null; } } /** * Read a text file line by line. Returns empty array on error. * @param {string} absPath * @returns {Promise} */ async function readLines(absPath) { try { const raw = await readFile(absPath, 'utf8'); return raw.split('\n').map(l => l.replace(/\r$/, '')); } catch { return []; } } // --------------------------------------------------------------------------- // Category 1: CVE Detection via npm/pip audit // --------------------------------------------------------------------------- /** Map npm audit severity strings to our SEVERITY constants. */ function npmSeverityToOurs(npmSev) { switch (npmSev) { case 'critical': return SEVERITY.CRITICAL; case 'high': return SEVERITY.HIGH; case 'moderate': return SEVERITY.MEDIUM; case 'low': default: return SEVERITY.LOW; } } /** * Run npm audit --json in targetPath and return findings. * Gracefully handles: command not found, timeout, parse errors, non-zero exit. * @param {string} targetPath * @returns {object[]} findings */ function runNpmAudit(targetPath) { const findings = []; let raw; try { raw = execSync('npm audit --json', { cwd: targetPath, timeout: 30_000, // Allow non-zero exit (npm audit exits 1 when vulnerabilities found) stdio: ['ignore', 'pipe', 'ignore'], }).toString(); } catch (err) { // execSync throws on non-zero exit; the stdout is still on err.stdout raw = err.stdout ? err.stdout.toString() : null; } if (!raw || raw.trim().length === 0) return findings; let parsed; try { parsed = JSON.parse(raw); } catch { return findings; } // npm audit v2 format: { vulnerabilities: { pkgName: { severity, via, ... } } } const vulns = parsed.vulnerabilities || {}; for (const [pkgName, vuln] of Object.entries(vulns)) { const severity = npmSeverityToOurs(vuln.severity); // Collect CVE IDs from the via chain const cveIds = []; if (Array.isArray(vuln.via)) { for (const v of vuln.via) { if (typeof v === 'object' && v.url) { // Extract CVE or advisory ID from URL const match = v.url.match(/GHSA-[\w-]+|CVE-\d{4}-\d+/i); if (match) cveIds.push(match[0]); } } } const cveRef = cveIds.length > 0 ? ` (${cveIds.join(', ')})` : ''; const fixAvailable = vuln.fixAvailable ? typeof vuln.fixAvailable === 'object' ? ` Fix: upgrade to ${vuln.fixAvailable.name}@${vuln.fixAvailable.version}.` : ' A fix is available — run `npm audit fix`.' : ' No automatic fix available — review manually.'; findings.push( finding({ scanner: 'DEP', severity, title: `Vulnerable npm dependency: ${pkgName}${cveRef}`, description: `npm audit reports a ${vuln.severity} severity vulnerability in "${pkgName}".` + (vuln.range ? ` Affected range: ${vuln.range}.` : '') + fixAvailable, file: 'package.json', evidence: cveIds.length > 0 ? cveIds.join(', ') : `${pkgName} @ ${vuln.range || 'unknown'}`, owasp: 'LLM03', recommendation: `Run \`npm audit fix\` or manually upgrade "${pkgName}" to a patched version. ` + 'Review the advisory for workarounds if no fix is available.', }), ); } return findings; } /** * Run pip audit --format json and return findings. * Gracefully handles pip audit not installed, timeout, parse errors. * @param {string} targetPath * @returns {object[]} findings */ function runPipAudit(targetPath) { const findings = []; let raw; try { raw = execSync('pip audit --format json', { cwd: targetPath, timeout: 30_000, stdio: ['ignore', 'pipe', 'ignore'], }).toString(); } catch (err) { raw = err.stdout ? err.stdout.toString() : null; } if (!raw || raw.trim().length === 0) return findings; let parsed; try { parsed = JSON.parse(raw); } catch { return findings; } // pip audit JSON format: array of { name, version, vulns: [{ id, fix_versions, description }] } const packages = Array.isArray(parsed) ? parsed : (parsed.dependencies || []); for (const pkg of packages) { if (!pkg.vulns || pkg.vulns.length === 0) continue; for (const vuln of pkg.vulns) { const fixes = vuln.fix_versions && vuln.fix_versions.length > 0 ? ` Fix in version(s): ${vuln.fix_versions.join(', ')}.` : ' No fix version reported.'; findings.push( finding({ scanner: 'DEP', severity: SEVERITY.HIGH, // pip audit does not expose severity; default HIGH title: `Vulnerable Python dependency: ${pkg.name} (${vuln.id})`, description: `pip audit reports vulnerability ${vuln.id} in "${pkg.name}" v${pkg.version}.` + (vuln.description ? ` ${vuln.description}` : '') + fixes, file: 'requirements.txt', evidence: `${vuln.id} — ${pkg.name}@${pkg.version}`, owasp: 'LLM03', recommendation: `Upgrade "${pkg.name}" to a patched version.${fixes} ` + 'Run `pip audit` after upgrading to verify resolution.', }), ); } } return findings; } // --------------------------------------------------------------------------- // Category 2: Typosquatting Detection // --------------------------------------------------------------------------- /** * Extract package names from requirements.txt lines. * Handles: pkg==1.0, pkg>=1.0, pkg~=1.0, pkg, # comments, -r includes, blanks. * @param {string[]} lines * @returns {string[]} */ function parseRequirementsTxt(lines) { const names = []; for (const line of lines) { const stripped = line.trim(); // Skip blanks, comments, options, includes if (!stripped || stripped.startsWith('#') || stripped.startsWith('-')) continue; // Extract package name: everything before first [>= 2 if (Math.abs(declaredName.length - topPkg.length) > 2) continue; const dist = levenshtein(declaredName, topPkg); if (dist < closestDist || (dist === closestDist && i < closestIdx)) { closestDist = dist; closestPkg = topPkg; closestIdx = i; } } if (closestPkg === null) return null; // Flag distance 1 always; distance 2 only if target is in top 200 (top200Cutoff) if (closestDist === 1) { return finding({ scanner: 'DEP', severity: SEVERITY.HIGH, title: `Possible typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 1)`, description: `The declared ${ecosystem} package "${declaredName}" is 1 character away from the ` + `popular package "${closestPkg}". This is a strong typosquatting indicator. ` + `Typosquatting packages impersonate popular libraries to execute malicious install scripts.`, file: sourceFile, evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 1)`, owasp: 'LLM03', recommendation: `Verify that "${declaredName}" is the intended package. If you meant "${closestPkg}", ` + `correct the dependency name. If "${declaredName}" is intentional, add an inline comment ` + `confirming this to suppress future alerts.`, }); } if (closestDist === 2 && closestIdx < top200Cutoff) { return finding({ scanner: 'DEP', severity: SEVERITY.MEDIUM, title: `Potential typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 2)`, description: `The declared ${ecosystem} package "${declaredName}" is 2 characters away from the ` + `highly popular package "${closestPkg}" (top ${top200Cutoff} by downloads). ` + `While less certain than distance-1 matches, this warrants manual verification.`, file: sourceFile, evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 2)`, owasp: 'LLM03', recommendation: `Confirm "${declaredName}" is the correct and intended package name. ` + `Check the package's publish date, author, and download count on the registry.`, }); } // B7 (v7.2.0): token-overlap heuristic. Flags typosquats that Levenshtein // misses because the attacker added a suspicious suffix token (e.g. // `lodash-utils` vs `lodash`, edit distance 6). Conservative — requires // BOTH a high token-overlap (entire popular name's tokens are a subset // of the declared name) AND at least one suspicious suffix token. const tokenOverlapFinding = checkTyposquatTokenOverlap( declaredName, topList, top200Cutoff, ecosystem, sourceFile, ); if (tokenOverlapFinding) return tokenOverlapFinding; return null; } /** * B7 token-overlap heuristic — complementary to Levenshtein. Returns a * MEDIUM finding when: * 1. The declared name's tokens contain ALL of a popular package's tokens * (popular package's tokens ⊆ declared name's tokens), AND * 2. The declared name has at least one suspicious suffix token * (`-utils`, `-helper`, `-pro`, `-cli`, `-wrapper`, etc.), AND * 3. The popular package is in the top200Cutoff window * * Returns null if no match. Allowlist precedence is enforced by the caller * (returning null at the top of `checkTyposquatting` before this is reached). */ function checkTyposquatTokenOverlap(declaredName, topList, top200Cutoff, ecosystem, sourceFile) { const declaredTokens = new Set(tokenize(declaredName)); if (declaredTokens.size < 2) return null; const suspiciousSuffixes = TYPOSQUAT_SUSPICIOUS_TOKENS.filter(t => declaredTokens.has(t)); if (suspiciousSuffixes.length === 0) return null; const limit = Math.min(top200Cutoff, topList.length); for (let i = 0; i < limit; i++) { const topPkg = topList[i]; if (declaredName === topPkg) return null; const topTokens = tokenize(topPkg); if (topTokens.length === 0) continue; const allContained = topTokens.every(t => declaredTokens.has(t)); if (!allContained) continue; if (topTokens.length === declaredTokens.size) continue; // exact-token-set is not a typosquat const overlap = tokenOverlap(declaredName, topPkg); if (overlap < 0.66) continue; return finding({ scanner: 'DEP', severity: SEVERITY.MEDIUM, title: `Possible typosquatting via token-overlap: "${declaredName}" vs "${topPkg}"`, description: `The declared ${ecosystem} package "${declaredName}" contains all tokens of the ` + `popular package "${topPkg}" plus a suspicious suffix (${suspiciousSuffixes.join(', ')}). ` + `This is a common typosquat pattern: attackers register popular-name-plus-suffix ` + `packages to capture installs from users misremembering the canonical name.`, file: sourceFile, evidence: `"${declaredName}" tokens ⊃ "${topPkg}" tokens; suffix=${suspiciousSuffixes.join(',')}; overlap=${overlap.toFixed(2)}`, owasp: 'LLM03', recommendation: `Verify that "${declaredName}" is intentional. If you meant "${topPkg}", ` + `correct the dependency name. If "${declaredName}" is a legitimate utility ` + `package, add it to knowledge/typosquat-allowlist.json under "${ecosystem}".`, }); } return null; } // --------------------------------------------------------------------------- // Category 3: Malicious Install Scripts // --------------------------------------------------------------------------- /** Patterns in install script values that indicate network/exec behaviour. */ const MALICIOUS_SCRIPT_PATTERNS = [ { pattern: /\bcurl\b/, label: 'curl (network fetch)' }, { pattern: /\bwget\b/, label: 'wget (network fetch)' }, { pattern: /\bfetch\b/, label: 'fetch (network request)' }, { pattern: /https?:\/\//, label: 'HTTP URL' }, { pattern: /\beval\b/, label: 'eval (code execution)' }, { pattern: /\bexec\b/, label: 'exec (process execution)' }, { pattern: /child_process/, label: 'child_process (subprocess)' }, { pattern: /net\.connect\b/, label: 'net.connect (raw TCP)' }, { pattern: /\bdgram\b/, label: 'dgram (UDP socket)' }, ]; /** npm lifecycle hooks that run automatically on install. */ const INSTALL_HOOKS = ['preinstall', 'install', 'postinstall']; /** * Check package.json scripts for malicious install script patterns. * @param {object} pkgJson - Parsed package.json object * @returns {object[]} - findings */ function checkInstallScripts(pkgJson) { const findings = []; const scripts = pkgJson.scripts || {}; for (const hook of INSTALL_HOOKS) { const script = scripts[hook]; if (!script || typeof script !== 'string') continue; const matched = MALICIOUS_SCRIPT_PATTERNS.filter(({ pattern }) => pattern.test(script)); if (matched.length === 0) continue; const labels = matched.map(m => m.label).join(', '); // Redact any URLs in the evidence to avoid leaking sensitive paths in reports const safeScript = script.replace(/https?:\/\/[^\s"']+/g, '[URL]').slice(0, 120); findings.push( finding({ scanner: 'DEP', severity: SEVERITY.HIGH, title: `Suspicious npm install hook: scripts.${hook} contains network/exec patterns`, description: `The package.json "scripts.${hook}" field runs automatically during \`npm install\` ` + `and contains suspicious patterns: ${labels}. ` + `Malicious packages use install hooks to exfiltrate data, download payloads, or establish persistence.`, file: 'package.json', evidence: `scripts.${hook}: "${safeScript}${script.length > 120 ? '...' : ''}"`, owasp: 'LLM03', recommendation: `Review the scripts.${hook} command carefully. If this package is a dependency ` + `(not your own), consider whether this behaviour is expected. Use \`npm install --ignore-scripts\` ` + `if install hooks are not needed. File a report at https://www.npmjs.com/support if malicious.`, }), ); } return findings; } // --------------------------------------------------------------------------- // Category 4: Unpinned Versions // --------------------------------------------------------------------------- /** Flags for unpinned npm dependency specifiers. */ const UNPINNED_NPM_RE = /^(\*|latest|x|>=\d|>\d)/; /** * Check package.json dependencies for unpinned version specifiers. * @param {object} pkgJson * @returns {object[]} */ function checkUnpinnedNpm(pkgJson) { const findings = []; const depSections = [ ['dependencies', pkgJson.dependencies], ['devDependencies', pkgJson.devDependencies], ]; for (const [sectionName, deps] of depSections) { if (!deps || typeof deps !== 'object') continue; for (const [name, version] of Object.entries(deps)) { if (typeof version !== 'string') continue; if (UNPINNED_NPM_RE.test(version.trim())) { findings.push( finding({ scanner: 'DEP', severity: SEVERITY.LOW, title: `Unpinned npm dependency: ${name}@${version}`, description: `The package "${name}" in ${sectionName} uses an unpinned version specifier "${version}". ` + `Unpinned dependencies can silently pull in a compromised version on the next install.`, file: 'package.json', evidence: `${sectionName}.${name}: "${version}"`, owasp: 'LLM03', recommendation: `Pin "${name}" to an exact version (e.g., "${name}": "x.y.z") or use a lockfile ` + `(\`package-lock.json\` or \`yarn.lock\`) and commit it. Run \`npm ci\` in CI instead of \`npm install\`.`, }), ); } } } return findings; } /** * Check requirements.txt lines for unpinned packages (missing == pin). * @param {string[]} lines * @returns {object[]} */ function checkUnpinnedPypi(lines) { const findings = []; for (let i = 0; i < lines.length; i++) { const line = lines[i].trim(); if (!line || line.startsWith('#') || line.startsWith('-')) continue; // Has a version specifier but NOT a strict == pin const hasSpecifier = /[><=~!]/.test(line); const hasPinned = /==/.test(line); const hasAnyOperator = hasSpecifier; if (!hasPinned && !hasAnyOperator) { // No version at all const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/); const name = match ? match[1] : line; findings.push( finding({ scanner: 'DEP', severity: SEVERITY.LOW, title: `Unpinned Python dependency: ${name} (no version specifier)`, description: `"${name}" in requirements.txt has no version pin. ` + `Without pinning, \`pip install\` may resolve to a future compromised version.`, file: 'requirements.txt', line: i + 1, evidence: line, owasp: 'LLM03', recommendation: `Pin to an exact version: \`${name}==\`. ` + `Use \`pip freeze > requirements.txt\` to capture current versions, ` + `or use \`pip-compile\` (pip-tools) for reproducible builds.`, }), ); } else if (hasSpecifier && !hasPinned) { // Has >= or ~= but no == — floating upper bound const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/); const name = match ? match[1] : line; findings.push( finding({ scanner: 'DEP', severity: SEVERITY.LOW, title: `Loosely pinned Python dependency: ${name}`, description: `"${name}" in requirements.txt uses a range specifier without a strict == pin. ` + `Range specifiers allow unexpected version upgrades that may introduce vulnerabilities.`, file: 'requirements.txt', line: i + 1, evidence: line, owasp: 'LLM03', recommendation: `Prefer exact version pinning (\`${name}==x.y.z\`) for reproducible installs. ` + `If you need flexibility, use a lockfile approach (\`pip-compile\`).`, }), ); } } return findings; } // --------------------------------------------------------------------------- // Main scanner export // --------------------------------------------------------------------------- /** * Scan targetPath for dependency security issues. * * Detection categories: * 1. CVE Detection via npm audit / pip audit (CRITICAL / HIGH) * 2. Typosquatting against top-200 npm / top-100 PyPI (HIGH / MEDIUM) * 3. Malicious install scripts in package.json (HIGH) * 4. Unpinned version specifiers (LOW) * * @param {string} targetPath - Absolute root path being scanned * @param {object} discovery - Unused (dep-auditor reads files by convention, not discovery list) * @returns {Promise} - scannerResult envelope */ export async function scan(targetPath, discovery) { const startMs = Date.now(); const findings = []; let filesScanned = 0; // Detect which ecosystems are present const pkgJsonPath = join(targetPath, 'package.json'); const requirementsTxt = join(targetPath, 'requirements.txt'); const setupPy = join(targetPath, 'setup.py'); const pyprojectToml = join(targetPath, 'pyproject.toml'); const hasNpm = existsSync(pkgJsonPath); const hasPypi = existsSync(requirementsTxt) || existsSync(setupPy) || existsSync(pyprojectToml); // Nothing to scan if (!hasNpm && !hasPypi) { return scannerResult('dep-auditor', 'skipped', [], 0, Date.now() - startMs); } try { // ----------------------------------------------------------------------- // npm ecosystem // ----------------------------------------------------------------------- if (hasNpm) { filesScanned++; const pkgJson = await readJson(pkgJsonPath); if (pkgJson) { // 1a. CVE via npm audit findings.push(...runNpmAudit(targetPath)); // 2a. Typosquatting — npm const [topPkgs, allowlist] = await Promise.all([loadTopPackages(), loadTyposquatAllowlist()]); const npmTop = topPkgs.npm.map(n => n.toLowerCase().replace(/_/g, '-')); const allDeps = { ...pkgJson.dependencies, ...pkgJson.devDependencies, }; for (const dep of Object.keys(allDeps)) { const normalized = dep.toLowerCase().replace(/_/g, '-'); const f = checkTyposquatting(normalized, npmTop, 200, 'npm', 'package.json', allowlist.npm); if (f) findings.push(f); } // 3. Malicious install scripts findings.push(...checkInstallScripts(pkgJson)); // 4a. Unpinned versions findings.push(...checkUnpinnedNpm(pkgJson)); } } // ----------------------------------------------------------------------- // PyPI ecosystem // ----------------------------------------------------------------------- if (hasPypi) { // 1b. CVE via pip audit (only if requirements.txt or pyproject.toml present) if (existsSync(requirementsTxt) || existsSync(pyprojectToml)) { findings.push(...runPipAudit(targetPath)); } // 2b. Typosquatting — PyPI (only if requirements.txt present) if (existsSync(requirementsTxt)) { filesScanned++; const reqLines = await readLines(requirementsTxt); const topPkgs2 = await loadTopPackages(); const allowlist2 = await loadTyposquatAllowlist(); const pypiTop = topPkgs2.pypi.map(n => n.toLowerCase().replace(/_/g, '-')); const declaredPypi = parseRequirementsTxt(reqLines); for (const dep of declaredPypi) { const f = checkTyposquatting(dep, pypiTop, 100, 'pypi', 'requirements.txt', allowlist2.pypi); if (f) findings.push(f); } // 4b. Unpinned versions findings.push(...checkUnpinnedPypi(reqLines)); } } const durationMs = Date.now() - startMs; return scannerResult('dep-auditor', 'ok', findings, filesScanned, durationMs); } catch (err) { const durationMs = Date.now() - startMs; return scannerResult( 'dep-auditor', 'error', findings, filesScanned, durationMs, err.message, ); } }