Critical-review §2 B7 finding: pure Levenshtein <=2 misses the most common
modern typosquat pattern — popular-name + token-injection suffix. Examples:
lodash → lodash-utils (edit distance 6, not flagged pre-B7)
react → react-helper (edit distance 7, not flagged pre-B7)
express → express-wrapper (edit distance 8, not flagged pre-B7)
Three coordinated edits:
scanners/lib/string-utils.mjs
- Adds tokenize(name): string[] splits on -/_, lowercases
- Adds tokenOverlap(a, b): number intersection.size / min(|a|,|b|)
- Adds TYPOSQUAT_SUSPICIOUS_TOKENS frozen list of common typosquat
suffixes. Excludes language-extension tokens (js, jsx, ts, tsx) — the
v7.0.0 allowlist contains `tsx` as a legit package and including the
same token in the suspicious set creates a contradiction. Caught by
the new allowlist-intersection-guard test. Also excludes 'pro'
(legitimate edition marker).
scanners/dep-auditor.mjs + scanners/supply-chain-recheck.mjs
- New checkTyposquatTokenOverlap() helper — fires AFTER Levenshtein 1/2
branches, only when:
1. popular package's tokens ⊆ declared name's tokens (strict superset)
2. declared name has at least one suspicious suffix
3. popular package is in topCutoff window
All three conditions required — conservative by design. Allowlist
precedence preserved (existing 22 npm + 13 PyPI entries always pass).
MEDIUM severity, NOT block. New finding title prefix:
"Possible typosquatting via token-overlap".
Tests: +21 cases across two new files
- tests/lib/string-utils-tokens.test.mjs (15) — tokenize, tokenOverlap,
TYPOSQUAT_SUSPICIOUS_TOKENS frozen contract, allowlist-intersection
guard (caught the tsx conflict on first run)
- tests/scanners/dep-token-overlap.test.mjs (7) — integration via
in-memory tmpdir fixtures: lodash-utils flagged, react-helper flagged,
express-wrapper flagged, lodash exact NOT flagged, allowlist tools
(knip/tsx/nx/rimraf) NOT flagged, react-router-dom (no suspicious
suffix) NOT flagged, react itself (equal token set, not superset)
NOT flagged.
Existing dep.test.mjs and supply-chain-recheck.test.mjs unchanged —
all green (149 → 149 regression guard).
Suite: 1570 → 1591 (+21). All green.
700 lines
26 KiB
JavaScript
700 lines
26 KiB
JavaScript
// dep-auditor.mjs — Deterministic dependency security scanner
|
|
// Detects CVEs (npm/pip audit), typosquatting, malicious install scripts,
|
|
// and unpinned versions. Zero external dependencies — Node.js builtins only.
|
|
//
|
|
// OWASP coverage: LLM03 (Supply Chain)
|
|
|
|
import { finding, scannerResult } from './lib/output.mjs';
|
|
import { SEVERITY } from './lib/severity.mjs';
|
|
import { levenshtein, tokenize, tokenOverlap, TYPOSQUAT_SUSPICIOUS_TOKENS } from './lib/string-utils.mjs';
|
|
import { readFile } from 'node:fs/promises';
|
|
import { join, dirname } from 'node:path';
|
|
import { existsSync } from 'node:fs';
|
|
import { execSync } from 'node:child_process';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Top-package knowledge base loader
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
/** @type {{ npm: string[], pypi: string[] } | null} */
|
|
let _topPackages = null;
|
|
let _typosquatAllowlist = null;
|
|
|
|
/**
|
|
* Load top-packages.json from the knowledge directory.
|
|
* Result is cached after first load.
|
|
* @returns {Promise<{ npm: string[], pypi: string[] }>}
|
|
*/
|
|
async function loadTopPackages() {
|
|
if (_topPackages) return _topPackages;
|
|
const knowledgePath = join(__dirname, '..', 'knowledge', 'top-packages.json');
|
|
try {
|
|
const raw = await readFile(knowledgePath, 'utf8');
|
|
_topPackages = JSON.parse(raw);
|
|
} catch {
|
|
// Graceful fallback: empty lists — typosquatting detection skipped
|
|
_topPackages = { npm: [], pypi: [] };
|
|
}
|
|
return _topPackages;
|
|
}
|
|
|
|
async function loadTyposquatAllowlist() {
|
|
if (_typosquatAllowlist) return _typosquatAllowlist;
|
|
const allowPath = join(__dirname, '..', 'knowledge', 'typosquat-allowlist.json');
|
|
try {
|
|
const raw = await readFile(allowPath, 'utf8');
|
|
const data = JSON.parse(raw);
|
|
_typosquatAllowlist = {
|
|
npm: new Set((data.npm || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))),
|
|
pypi: new Set((data.pypi || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))),
|
|
};
|
|
} catch {
|
|
_typosquatAllowlist = { npm: new Set(), pypi: new Set() };
|
|
}
|
|
return _typosquatAllowlist;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// File reading helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Read and parse a JSON file. Returns null on error.
|
|
* @param {string} absPath
|
|
* @returns {Promise<object|null>}
|
|
*/
|
|
async function readJson(absPath) {
|
|
try {
|
|
const raw = await readFile(absPath, 'utf8');
|
|
return JSON.parse(raw);
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read a text file line by line. Returns empty array on error.
|
|
* @param {string} absPath
|
|
* @returns {Promise<string[]>}
|
|
*/
|
|
async function readLines(absPath) {
|
|
try {
|
|
const raw = await readFile(absPath, 'utf8');
|
|
return raw.split('\n').map(l => l.replace(/\r$/, ''));
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 1: CVE Detection via npm/pip audit
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Map npm audit severity strings to our SEVERITY constants. */
|
|
function npmSeverityToOurs(npmSev) {
|
|
switch (npmSev) {
|
|
case 'critical': return SEVERITY.CRITICAL;
|
|
case 'high': return SEVERITY.HIGH;
|
|
case 'moderate': return SEVERITY.MEDIUM;
|
|
case 'low':
|
|
default: return SEVERITY.LOW;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Run npm audit --json in targetPath and return findings.
|
|
* Gracefully handles: command not found, timeout, parse errors, non-zero exit.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function runNpmAudit(targetPath) {
|
|
const findings = [];
|
|
let raw;
|
|
try {
|
|
raw = execSync('npm audit --json', {
|
|
cwd: targetPath,
|
|
timeout: 30_000,
|
|
// Allow non-zero exit (npm audit exits 1 when vulnerabilities found)
|
|
stdio: ['ignore', 'pipe', 'ignore'],
|
|
}).toString();
|
|
} catch (err) {
|
|
// execSync throws on non-zero exit; the stdout is still on err.stdout
|
|
raw = err.stdout ? err.stdout.toString() : null;
|
|
}
|
|
|
|
if (!raw || raw.trim().length === 0) return findings;
|
|
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(raw);
|
|
} catch {
|
|
return findings;
|
|
}
|
|
|
|
// npm audit v2 format: { vulnerabilities: { pkgName: { severity, via, ... } } }
|
|
const vulns = parsed.vulnerabilities || {};
|
|
for (const [pkgName, vuln] of Object.entries(vulns)) {
|
|
const severity = npmSeverityToOurs(vuln.severity);
|
|
|
|
// Collect CVE IDs from the via chain
|
|
const cveIds = [];
|
|
if (Array.isArray(vuln.via)) {
|
|
for (const v of vuln.via) {
|
|
if (typeof v === 'object' && v.url) {
|
|
// Extract CVE or advisory ID from URL
|
|
const match = v.url.match(/GHSA-[\w-]+|CVE-\d{4}-\d+/i);
|
|
if (match) cveIds.push(match[0]);
|
|
}
|
|
}
|
|
}
|
|
|
|
const cveRef = cveIds.length > 0 ? ` (${cveIds.join(', ')})` : '';
|
|
const fixAvailable = vuln.fixAvailable
|
|
? typeof vuln.fixAvailable === 'object'
|
|
? ` Fix: upgrade to ${vuln.fixAvailable.name}@${vuln.fixAvailable.version}.`
|
|
: ' A fix is available — run `npm audit fix`.'
|
|
: ' No automatic fix available — review manually.';
|
|
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity,
|
|
title: `Vulnerable npm dependency: ${pkgName}${cveRef}`,
|
|
description:
|
|
`npm audit reports a ${vuln.severity} severity vulnerability in "${pkgName}".` +
|
|
(vuln.range ? ` Affected range: ${vuln.range}.` : '') +
|
|
fixAvailable,
|
|
file: 'package.json',
|
|
evidence: cveIds.length > 0 ? cveIds.join(', ') : `${pkgName} @ ${vuln.range || 'unknown'}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Run \`npm audit fix\` or manually upgrade "${pkgName}" to a patched version. ` +
|
|
'Review the advisory for workarounds if no fix is available.',
|
|
}),
|
|
);
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
/**
|
|
* Run pip audit --format json and return findings.
|
|
* Gracefully handles pip audit not installed, timeout, parse errors.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function runPipAudit(targetPath) {
|
|
const findings = [];
|
|
let raw;
|
|
try {
|
|
raw = execSync('pip audit --format json', {
|
|
cwd: targetPath,
|
|
timeout: 30_000,
|
|
stdio: ['ignore', 'pipe', 'ignore'],
|
|
}).toString();
|
|
} catch (err) {
|
|
raw = err.stdout ? err.stdout.toString() : null;
|
|
}
|
|
|
|
if (!raw || raw.trim().length === 0) return findings;
|
|
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(raw);
|
|
} catch {
|
|
return findings;
|
|
}
|
|
|
|
// pip audit JSON format: array of { name, version, vulns: [{ id, fix_versions, description }] }
|
|
const packages = Array.isArray(parsed) ? parsed : (parsed.dependencies || []);
|
|
for (const pkg of packages) {
|
|
if (!pkg.vulns || pkg.vulns.length === 0) continue;
|
|
for (const vuln of pkg.vulns) {
|
|
const fixes = vuln.fix_versions && vuln.fix_versions.length > 0
|
|
? ` Fix in version(s): ${vuln.fix_versions.join(', ')}.`
|
|
: ' No fix version reported.';
|
|
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.HIGH, // pip audit does not expose severity; default HIGH
|
|
title: `Vulnerable Python dependency: ${pkg.name} (${vuln.id})`,
|
|
description:
|
|
`pip audit reports vulnerability ${vuln.id} in "${pkg.name}" v${pkg.version}.` +
|
|
(vuln.description ? ` ${vuln.description}` : '') +
|
|
fixes,
|
|
file: 'requirements.txt',
|
|
evidence: `${vuln.id} — ${pkg.name}@${pkg.version}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Upgrade "${pkg.name}" to a patched version.${fixes} ` +
|
|
'Run `pip audit` after upgrading to verify resolution.',
|
|
}),
|
|
);
|
|
}
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 2: Typosquatting Detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Extract package names from requirements.txt lines.
|
|
* Handles: pkg==1.0, pkg>=1.0, pkg~=1.0, pkg, # comments, -r includes, blanks.
|
|
* @param {string[]} lines
|
|
* @returns {string[]}
|
|
*/
|
|
function parseRequirementsTxt(lines) {
|
|
const names = [];
|
|
for (const line of lines) {
|
|
const stripped = line.trim();
|
|
// Skip blanks, comments, options, includes
|
|
if (!stripped || stripped.startsWith('#') || stripped.startsWith('-')) continue;
|
|
// Extract package name: everything before first [>=<!~;@\s]
|
|
const match = stripped.match(/^([A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?)/);
|
|
if (match) names.push(match[1].toLowerCase().replace(/_/g, '-'));
|
|
}
|
|
return names;
|
|
}
|
|
|
|
/**
|
|
* Check one declared package name against the top-packages list for typosquatting.
|
|
* Pre-filter by length difference to avoid O(n*m) full distance for irrelevant pairs.
|
|
* Returns a finding object or null.
|
|
*
|
|
* @param {string} declaredName - Normalized (lowercase, hyphens) declared package name
|
|
* @param {string[]} topList - Top package names (same normalization)
|
|
* @param {number} top200Cutoff - Index cutoff for "very popular" (top 200 for npm, top 100 for PyPI)
|
|
* @param {string} ecosystem - 'npm' or 'pypi'
|
|
* @param {string} sourceFile - 'package.json' or 'requirements.txt'
|
|
* @returns {object|null}
|
|
*/
|
|
function checkTyposquatting(declaredName, topList, top200Cutoff, ecosystem, sourceFile, allowlist) {
|
|
// Skip known legitimate packages
|
|
if (allowlist && allowlist.has(declaredName)) return null;
|
|
|
|
let closestDist = Infinity;
|
|
let closestPkg = null;
|
|
let closestIdx = Infinity;
|
|
|
|
for (let i = 0; i < topList.length; i++) {
|
|
const topPkg = topList[i];
|
|
|
|
// Exact match — legitimate package, skip
|
|
if (declaredName === topPkg) return null;
|
|
|
|
// Pre-filter: skip if length difference > 2
|
|
if (Math.abs(declaredName.length - topPkg.length) > 2) continue;
|
|
|
|
const dist = levenshtein(declaredName, topPkg);
|
|
|
|
if (dist < closestDist || (dist === closestDist && i < closestIdx)) {
|
|
closestDist = dist;
|
|
closestPkg = topPkg;
|
|
closestIdx = i;
|
|
}
|
|
}
|
|
|
|
if (closestPkg === null) return null;
|
|
|
|
// Flag distance 1 always; distance 2 only if target is in top 200 (top200Cutoff)
|
|
if (closestDist === 1) {
|
|
return finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.HIGH,
|
|
title: `Possible typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 1)`,
|
|
description:
|
|
`The declared ${ecosystem} package "${declaredName}" is 1 character away from the ` +
|
|
`popular package "${closestPkg}". This is a strong typosquatting indicator. ` +
|
|
`Typosquatting packages impersonate popular libraries to execute malicious install scripts.`,
|
|
file: sourceFile,
|
|
evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 1)`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Verify that "${declaredName}" is the intended package. If you meant "${closestPkg}", ` +
|
|
`correct the dependency name. If "${declaredName}" is intentional, add an inline comment ` +
|
|
`confirming this to suppress future alerts.`,
|
|
});
|
|
}
|
|
|
|
if (closestDist === 2 && closestIdx < top200Cutoff) {
|
|
return finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: `Potential typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 2)`,
|
|
description:
|
|
`The declared ${ecosystem} package "${declaredName}" is 2 characters away from the ` +
|
|
`highly popular package "${closestPkg}" (top ${top200Cutoff} by downloads). ` +
|
|
`While less certain than distance-1 matches, this warrants manual verification.`,
|
|
file: sourceFile,
|
|
evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 2)`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Confirm "${declaredName}" is the correct and intended package name. ` +
|
|
`Check the package's publish date, author, and download count on the registry.`,
|
|
});
|
|
}
|
|
|
|
// B7 (v7.2.0): token-overlap heuristic. Flags typosquats that Levenshtein
|
|
// misses because the attacker added a suspicious suffix token (e.g.
|
|
// `lodash-utils` vs `lodash`, edit distance 6). Conservative — requires
|
|
// BOTH a high token-overlap (entire popular name's tokens are a subset
|
|
// of the declared name) AND at least one suspicious suffix token.
|
|
const tokenOverlapFinding = checkTyposquatTokenOverlap(
|
|
declaredName,
|
|
topList,
|
|
top200Cutoff,
|
|
ecosystem,
|
|
sourceFile,
|
|
);
|
|
if (tokenOverlapFinding) return tokenOverlapFinding;
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* B7 token-overlap heuristic — complementary to Levenshtein. Returns a
|
|
* MEDIUM finding when:
|
|
* 1. The declared name's tokens contain ALL of a popular package's tokens
|
|
* (popular package's tokens ⊆ declared name's tokens), AND
|
|
* 2. The declared name has at least one suspicious suffix token
|
|
* (`-utils`, `-helper`, `-pro`, `-cli`, `-wrapper`, etc.), AND
|
|
* 3. The popular package is in the top200Cutoff window
|
|
*
|
|
* Returns null if no match. Allowlist precedence is enforced by the caller
|
|
* (returning null at the top of `checkTyposquatting` before this is reached).
|
|
*/
|
|
function checkTyposquatTokenOverlap(declaredName, topList, top200Cutoff, ecosystem, sourceFile) {
|
|
const declaredTokens = new Set(tokenize(declaredName));
|
|
if (declaredTokens.size < 2) return null;
|
|
|
|
const suspiciousSuffixes = TYPOSQUAT_SUSPICIOUS_TOKENS.filter(t => declaredTokens.has(t));
|
|
if (suspiciousSuffixes.length === 0) return null;
|
|
|
|
const limit = Math.min(top200Cutoff, topList.length);
|
|
for (let i = 0; i < limit; i++) {
|
|
const topPkg = topList[i];
|
|
if (declaredName === topPkg) return null;
|
|
const topTokens = tokenize(topPkg);
|
|
if (topTokens.length === 0) continue;
|
|
const allContained = topTokens.every(t => declaredTokens.has(t));
|
|
if (!allContained) continue;
|
|
if (topTokens.length === declaredTokens.size) continue; // exact-token-set is not a typosquat
|
|
const overlap = tokenOverlap(declaredName, topPkg);
|
|
if (overlap < 0.66) continue;
|
|
return finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: `Possible typosquatting via token-overlap: "${declaredName}" vs "${topPkg}"`,
|
|
description:
|
|
`The declared ${ecosystem} package "${declaredName}" contains all tokens of the ` +
|
|
`popular package "${topPkg}" plus a suspicious suffix (${suspiciousSuffixes.join(', ')}). ` +
|
|
`This is a common typosquat pattern: attackers register popular-name-plus-suffix ` +
|
|
`packages to capture installs from users misremembering the canonical name.`,
|
|
file: sourceFile,
|
|
evidence: `"${declaredName}" tokens ⊃ "${topPkg}" tokens; suffix=${suspiciousSuffixes.join(',')}; overlap=${overlap.toFixed(2)}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Verify that "${declaredName}" is intentional. If you meant "${topPkg}", ` +
|
|
`correct the dependency name. If "${declaredName}" is a legitimate utility ` +
|
|
`package, add it to knowledge/typosquat-allowlist.json under "${ecosystem}".`,
|
|
});
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 3: Malicious Install Scripts
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Patterns in install script values that indicate network/exec behaviour. */
|
|
const MALICIOUS_SCRIPT_PATTERNS = [
|
|
{ pattern: /\bcurl\b/, label: 'curl (network fetch)' },
|
|
{ pattern: /\bwget\b/, label: 'wget (network fetch)' },
|
|
{ pattern: /\bfetch\b/, label: 'fetch (network request)' },
|
|
{ pattern: /https?:\/\//, label: 'HTTP URL' },
|
|
{ pattern: /\beval\b/, label: 'eval (code execution)' },
|
|
{ pattern: /\bexec\b/, label: 'exec (process execution)' },
|
|
{ pattern: /child_process/, label: 'child_process (subprocess)' },
|
|
{ pattern: /net\.connect\b/, label: 'net.connect (raw TCP)' },
|
|
{ pattern: /\bdgram\b/, label: 'dgram (UDP socket)' },
|
|
];
|
|
|
|
/** npm lifecycle hooks that run automatically on install. */
|
|
const INSTALL_HOOKS = ['preinstall', 'install', 'postinstall'];
|
|
|
|
/**
|
|
* Check package.json scripts for malicious install script patterns.
|
|
* @param {object} pkgJson - Parsed package.json object
|
|
* @returns {object[]} - findings
|
|
*/
|
|
function checkInstallScripts(pkgJson) {
|
|
const findings = [];
|
|
const scripts = pkgJson.scripts || {};
|
|
|
|
for (const hook of INSTALL_HOOKS) {
|
|
const script = scripts[hook];
|
|
if (!script || typeof script !== 'string') continue;
|
|
|
|
const matched = MALICIOUS_SCRIPT_PATTERNS.filter(({ pattern }) => pattern.test(script));
|
|
if (matched.length === 0) continue;
|
|
|
|
const labels = matched.map(m => m.label).join(', ');
|
|
// Redact any URLs in the evidence to avoid leaking sensitive paths in reports
|
|
const safeScript = script.replace(/https?:\/\/[^\s"']+/g, '[URL]').slice(0, 120);
|
|
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.HIGH,
|
|
title: `Suspicious npm install hook: scripts.${hook} contains network/exec patterns`,
|
|
description:
|
|
`The package.json "scripts.${hook}" field runs automatically during \`npm install\` ` +
|
|
`and contains suspicious patterns: ${labels}. ` +
|
|
`Malicious packages use install hooks to exfiltrate data, download payloads, or establish persistence.`,
|
|
file: 'package.json',
|
|
evidence: `scripts.${hook}: "${safeScript}${script.length > 120 ? '...' : ''}"`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Review the scripts.${hook} command carefully. If this package is a dependency ` +
|
|
`(not your own), consider whether this behaviour is expected. Use \`npm install --ignore-scripts\` ` +
|
|
`if install hooks are not needed. File a report at https://www.npmjs.com/support if malicious.`,
|
|
}),
|
|
);
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 4: Unpinned Versions
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/** Flags for unpinned npm dependency specifiers. */
|
|
const UNPINNED_NPM_RE = /^(\*|latest|x|>=\d|>\d)/;
|
|
|
|
/**
|
|
* Check package.json dependencies for unpinned version specifiers.
|
|
* @param {object} pkgJson
|
|
* @returns {object[]}
|
|
*/
|
|
function checkUnpinnedNpm(pkgJson) {
|
|
const findings = [];
|
|
const depSections = [
|
|
['dependencies', pkgJson.dependencies],
|
|
['devDependencies', pkgJson.devDependencies],
|
|
];
|
|
|
|
for (const [sectionName, deps] of depSections) {
|
|
if (!deps || typeof deps !== 'object') continue;
|
|
for (const [name, version] of Object.entries(deps)) {
|
|
if (typeof version !== 'string') continue;
|
|
if (UNPINNED_NPM_RE.test(version.trim())) {
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.LOW,
|
|
title: `Unpinned npm dependency: ${name}@${version}`,
|
|
description:
|
|
`The package "${name}" in ${sectionName} uses an unpinned version specifier "${version}". ` +
|
|
`Unpinned dependencies can silently pull in a compromised version on the next install.`,
|
|
file: 'package.json',
|
|
evidence: `${sectionName}.${name}: "${version}"`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Pin "${name}" to an exact version (e.g., "${name}": "x.y.z") or use a lockfile ` +
|
|
`(\`package-lock.json\` or \`yarn.lock\`) and commit it. Run \`npm ci\` in CI instead of \`npm install\`.`,
|
|
}),
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
/**
|
|
* Check requirements.txt lines for unpinned packages (missing == pin).
|
|
* @param {string[]} lines
|
|
* @returns {object[]}
|
|
*/
|
|
function checkUnpinnedPypi(lines) {
|
|
const findings = [];
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i].trim();
|
|
if (!line || line.startsWith('#') || line.startsWith('-')) continue;
|
|
|
|
// Has a version specifier but NOT a strict == pin
|
|
const hasSpecifier = /[><=~!]/.test(line);
|
|
const hasPinned = /==/.test(line);
|
|
const hasAnyOperator = hasSpecifier;
|
|
|
|
if (!hasPinned && !hasAnyOperator) {
|
|
// No version at all
|
|
const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
|
|
const name = match ? match[1] : line;
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.LOW,
|
|
title: `Unpinned Python dependency: ${name} (no version specifier)`,
|
|
description:
|
|
`"${name}" in requirements.txt has no version pin. ` +
|
|
`Without pinning, \`pip install\` may resolve to a future compromised version.`,
|
|
file: 'requirements.txt',
|
|
line: i + 1,
|
|
evidence: line,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Pin to an exact version: \`${name}==<version>\`. ` +
|
|
`Use \`pip freeze > requirements.txt\` to capture current versions, ` +
|
|
`or use \`pip-compile\` (pip-tools) for reproducible builds.`,
|
|
}),
|
|
);
|
|
} else if (hasSpecifier && !hasPinned) {
|
|
// Has >= or ~= but no == — floating upper bound
|
|
const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
|
|
const name = match ? match[1] : line;
|
|
findings.push(
|
|
finding({
|
|
scanner: 'DEP',
|
|
severity: SEVERITY.LOW,
|
|
title: `Loosely pinned Python dependency: ${name}`,
|
|
description:
|
|
`"${name}" in requirements.txt uses a range specifier without a strict == pin. ` +
|
|
`Range specifiers allow unexpected version upgrades that may introduce vulnerabilities.`,
|
|
file: 'requirements.txt',
|
|
line: i + 1,
|
|
evidence: line,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Prefer exact version pinning (\`${name}==x.y.z\`) for reproducible installs. ` +
|
|
`If you need flexibility, use a lockfile approach (\`pip-compile\`).`,
|
|
}),
|
|
);
|
|
}
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main scanner export
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Scan targetPath for dependency security issues.
|
|
*
|
|
* Detection categories:
|
|
* 1. CVE Detection via npm audit / pip audit (CRITICAL / HIGH)
|
|
* 2. Typosquatting against top-200 npm / top-100 PyPI (HIGH / MEDIUM)
|
|
* 3. Malicious install scripts in package.json (HIGH)
|
|
* 4. Unpinned version specifiers (LOW)
|
|
*
|
|
* @param {string} targetPath - Absolute root path being scanned
|
|
* @param {object} discovery - Unused (dep-auditor reads files by convention, not discovery list)
|
|
* @returns {Promise<object>} - scannerResult envelope
|
|
*/
|
|
export async function scan(targetPath, discovery) {
|
|
const startMs = Date.now();
|
|
const findings = [];
|
|
let filesScanned = 0;
|
|
|
|
// Detect which ecosystems are present
|
|
const pkgJsonPath = join(targetPath, 'package.json');
|
|
const requirementsTxt = join(targetPath, 'requirements.txt');
|
|
const setupPy = join(targetPath, 'setup.py');
|
|
const pyprojectToml = join(targetPath, 'pyproject.toml');
|
|
|
|
const hasNpm = existsSync(pkgJsonPath);
|
|
const hasPypi = existsSync(requirementsTxt) || existsSync(setupPy) || existsSync(pyprojectToml);
|
|
|
|
// Nothing to scan
|
|
if (!hasNpm && !hasPypi) {
|
|
return scannerResult('dep-auditor', 'skipped', [], 0, Date.now() - startMs);
|
|
}
|
|
|
|
try {
|
|
// -----------------------------------------------------------------------
|
|
// npm ecosystem
|
|
// -----------------------------------------------------------------------
|
|
if (hasNpm) {
|
|
filesScanned++;
|
|
const pkgJson = await readJson(pkgJsonPath);
|
|
|
|
if (pkgJson) {
|
|
// 1a. CVE via npm audit
|
|
findings.push(...runNpmAudit(targetPath));
|
|
|
|
// 2a. Typosquatting — npm
|
|
const [topPkgs, allowlist] = await Promise.all([loadTopPackages(), loadTyposquatAllowlist()]);
|
|
const npmTop = topPkgs.npm.map(n => n.toLowerCase().replace(/_/g, '-'));
|
|
const allDeps = {
|
|
...pkgJson.dependencies,
|
|
...pkgJson.devDependencies,
|
|
};
|
|
for (const dep of Object.keys(allDeps)) {
|
|
const normalized = dep.toLowerCase().replace(/_/g, '-');
|
|
const f = checkTyposquatting(normalized, npmTop, 200, 'npm', 'package.json', allowlist.npm);
|
|
if (f) findings.push(f);
|
|
}
|
|
|
|
// 3. Malicious install scripts
|
|
findings.push(...checkInstallScripts(pkgJson));
|
|
|
|
// 4a. Unpinned versions
|
|
findings.push(...checkUnpinnedNpm(pkgJson));
|
|
}
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// PyPI ecosystem
|
|
// -----------------------------------------------------------------------
|
|
if (hasPypi) {
|
|
// 1b. CVE via pip audit (only if requirements.txt or pyproject.toml present)
|
|
if (existsSync(requirementsTxt) || existsSync(pyprojectToml)) {
|
|
findings.push(...runPipAudit(targetPath));
|
|
}
|
|
|
|
// 2b. Typosquatting — PyPI (only if requirements.txt present)
|
|
if (existsSync(requirementsTxt)) {
|
|
filesScanned++;
|
|
const reqLines = await readLines(requirementsTxt);
|
|
const topPkgs2 = await loadTopPackages();
|
|
const allowlist2 = await loadTyposquatAllowlist();
|
|
const pypiTop = topPkgs2.pypi.map(n => n.toLowerCase().replace(/_/g, '-'));
|
|
const declaredPypi = parseRequirementsTxt(reqLines);
|
|
|
|
for (const dep of declaredPypi) {
|
|
const f = checkTyposquatting(dep, pypiTop, 100, 'pypi', 'requirements.txt', allowlist2.pypi);
|
|
if (f) findings.push(f);
|
|
}
|
|
|
|
// 4b. Unpinned versions
|
|
findings.push(...checkUnpinnedPypi(reqLines));
|
|
}
|
|
}
|
|
|
|
const durationMs = Date.now() - startMs;
|
|
return scannerResult('dep-auditor', 'ok', findings, filesScanned, durationMs);
|
|
|
|
} catch (err) {
|
|
const durationMs = Date.now() - startMs;
|
|
return scannerResult(
|
|
'dep-auditor',
|
|
'error',
|
|
findings,
|
|
filesScanned,
|
|
durationMs,
|
|
err.message,
|
|
);
|
|
}
|
|
}
|