// supply-chain-recheck.mjs — Periodic re-audit of installed dependencies // Parses lockfiles (package-lock.json, yarn.lock, requirements.txt, Pipfile.lock) // and checks against blocklists, OSV.dev batch API, and typosquat detection. // // Unlike pre-install-supply-chain.mjs (hook, checks at install time), // this scanner checks what's ALREADY installed — catching deps that became // compromised after installation. // // Scanner prefix: SCR // OWASP coverage: LLM03 (Supply Chain), ASI04, AST06, MCP04 // Zero external dependencies — Node.js builtins only. import { readFile } from 'node:fs/promises'; import { join, dirname } from 'node:path'; import { existsSync, readFileSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { levenshtein, tokenize, tokenOverlap, TYPOSQUAT_SUSPICIOUS_TOKENS } from './lib/string-utils.mjs'; import { NPM_COMPROMISED, PIP_COMPROMISED, CARGO_COMPROMISED, GEM_COMPROMISED, isCompromised, extractOSVSeverity, queryOSVBatch, OSV_ECOSYSTEM_MAP, } from './lib/supply-chain-data.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); // --------------------------------------------------------------------------- // Top-package knowledge base loader (for typosquat detection) // --------------------------------------------------------------------------- let _topPackages = null; let _typosquatAllowlist = null; async function loadTopPackages() { if (_topPackages) return _topPackages; const knowledgePath = join(__dirname, '..', 'knowledge', 'top-packages.json'); try { const raw = await readFile(knowledgePath, 'utf8'); _topPackages = JSON.parse(raw); } catch { _topPackages = { npm: [], pypi: [] }; } return _topPackages; } async function loadTyposquatAllowlist() { if (_typosquatAllowlist) return _typosquatAllowlist; const allowPath = join(__dirname, '..', 'knowledge', 'typosquat-allowlist.json'); try { const raw = await readFile(allowPath, 'utf8'); const data = JSON.parse(raw); _typosquatAllowlist = { npm: new Set((data.npm || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))), pypi: new Set((data.pypi || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))), }; } catch { _typosquatAllowlist = { npm: new Set(), pypi: new Set() }; } return _typosquatAllowlist; } // --------------------------------------------------------------------------- // Lockfile parsers — extract { name, version, ecosystem } tuples // --------------------------------------------------------------------------- /** * Parse package-lock.json (v2/v3 format with packages field). * @param {string} filePath - Absolute path to package-lock.json * @returns {Promise<{ name: string, version: string, ecosystem: string }[]>} */ async function parsePackageLock(filePath) { const deps = []; try { const raw = await readFile(filePath, 'utf8'); const lock = JSON.parse(raw); // v3 format: packages object const packages = lock.packages || {}; for (const [key, info] of Object.entries(packages)) { if (key === '') continue; // Root package const name = key.replace(/^node_modules\//, ''); if (name && info.version) { deps.push({ name, version: info.version, ecosystem: 'npm' }); } } // v1 fallback: dependencies object if (deps.length === 0 && lock.dependencies) { for (const [name, info] of Object.entries(lock.dependencies)) { if (info.version) { deps.push({ name, version: info.version, ecosystem: 'npm' }); } } } } catch { /* parse error — skip */ } return deps; } /** * Parse yarn.lock (v1 format). * Extracts package name and resolved version from each entry. * @param {string} filePath - Absolute path to yarn.lock * @returns {Promise<{ name: string, version: string, ecosystem: string }[]>} */ async function parseYarnLock(filePath) { const deps = []; try { const raw = await readFile(filePath, 'utf8'); const lines = raw.split('\n'); let currentPkg = null; for (const line of lines) { // Package header: "pkg@^1.0.0", "pkg@1.0.0:" or "@scope/pkg@^1.0.0": if (!line.startsWith(' ') && !line.startsWith('#') && line.includes('@')) { const trimmed = line.replace(/[":]/g, '').trim(); if (trimmed.startsWith('@')) { // Scoped: @scope/pkg@version const rest = trimmed.slice(1); const atIdx = rest.indexOf('@'); if (atIdx > 0) currentPkg = '@' + rest.slice(0, atIdx); } else { const atIdx = trimmed.indexOf('@'); if (atIdx > 0) currentPkg = trimmed.slice(0, atIdx); } } // Version line: " version "1.2.3"" const versionMatch = line.match(/^\s+version\s+"([^"]+)"/); if (versionMatch && currentPkg) { deps.push({ name: currentPkg, version: versionMatch[1], ecosystem: 'npm' }); currentPkg = null; } } } catch { /* parse error — skip */ } return deps; } /** * Parse requirements.txt (pip format). * @param {string} filePath - Absolute path to requirements.txt * @returns {Promise<{ name: string, version: string|null, ecosystem: string }[]>} */ async function parseRequirementsTxt(filePath) { const deps = []; try { const raw = await readFile(filePath, 'utf8'); for (const rawLine of raw.split('\n')) { const line = rawLine.trim(); if (!line || line.startsWith('#') || line.startsWith('-')) continue; const eqIdx = line.indexOf('=='); if (eqIdx > 0) { deps.push({ name: line.slice(0, eqIdx).trim(), version: line.slice(eqIdx + 2).trim(), ecosystem: 'pip' }); } else { const match = line.match(/^([a-zA-Z0-9_.-]+)/); if (match) deps.push({ name: match[1], version: null, ecosystem: 'pip' }); } } } catch { /* parse error — skip */ } return deps; } /** * Parse Pipfile.lock (JSON format). * @param {string} filePath - Absolute path to Pipfile.lock * @returns {Promise<{ name: string, version: string, ecosystem: string }[]>} */ async function parsePipfileLock(filePath) { const deps = []; try { const raw = await readFile(filePath, 'utf8'); const lock = JSON.parse(raw); for (const section of ['default', 'develop']) { const packages = lock[section] || {}; for (const [name, info] of Object.entries(packages)) { const version = typeof info === 'object' && info.version ? info.version.replace(/^==/, '') : null; if (version) { deps.push({ name, version, ecosystem: 'pip' }); } } } } catch { /* parse error — skip */ } return deps; } // --------------------------------------------------------------------------- // Checks // --------------------------------------------------------------------------- /** * Check all dependencies against blocklists. * @param {{ name: string, version: string, ecosystem: string }[]} deps * @param {string} lockfile - Source lockfile name for finding references * @returns {object[]} findings */ function checkBlocklists(deps, lockfile) { const results = []; const lists = { npm: NPM_COMPROMISED, pip: PIP_COMPROMISED, cargo: CARGO_COMPROMISED, gem: GEM_COMPROMISED }; for (const dep of deps) { const blocklist = lists[dep.ecosystem]; if (!blocklist) continue; if (isCompromised(blocklist, dep.name, dep.version)) { results.push(finding({ scanner: 'SCR', severity: SEVERITY.CRITICAL, title: `Compromised dependency: ${dep.name}@${dep.version || '*'}`, description: `"${dep.name}"${dep.version ? '@' + dep.version : ''} in ${lockfile} is on the known-compromised blocklist. ` + `This package/version is associated with supply chain attacks (malware, data exfiltration, or sabotage).`, file: lockfile, evidence: `${dep.name}@${dep.version || 'any'} in ${dep.ecosystem} blocklist`, owasp: 'LLM03', recommendation: `Remove "${dep.name}" immediately. If this was a transitive dependency, find and remove ` + `the parent package that requires it. Audit your system for signs of compromise.`, })); } } return results; } /** * Check dependencies against OSV.dev batch API for known vulnerabilities. * @param {{ name: string, version: string, ecosystem: string }[]} deps * @param {string} lockfile * @returns {{ findings: object[], offline: boolean }} */ async function checkOSV(deps, lockfile) { // Only query deps that have a version (OSV requires version) const queryable = deps.filter(d => d.version && OSV_ECOSYSTEM_MAP[d.ecosystem]); if (queryable.length === 0) return { findings: [], offline: false }; const { results, offline } = await queryOSVBatch(queryable); if (offline) return { findings: [], offline: true }; const findings = []; for (let i = 0; i < results.length; i++) { const vulns = results[i]?.vulns || []; if (vulns.length === 0) continue; const dep = queryable[i]; let hasCritical = false; for (const vuln of vulns) { const severity = extractOSVSeverity(vuln); const sevConst = severity === 'CRITICAL' ? SEVERITY.CRITICAL : severity === 'HIGH' ? SEVERITY.HIGH : SEVERITY.MEDIUM; if (severity === 'CRITICAL') hasCritical = true; findings.push(finding({ scanner: 'SCR', severity: sevConst, title: `Known vulnerability: ${dep.name}@${dep.version} (${vuln.id})`, description: `${vuln.id}: ${(vuln.summary || vuln.details || 'No description').slice(0, 200)}. ` + `Found in ${lockfile}.`, file: lockfile, evidence: `${vuln.id} — ${dep.name}@${dep.version}`, owasp: 'LLM03', recommendation: `Upgrade "${dep.name}" to a patched version. Check ${vuln.id} for fix details.`, })); } } return { findings, offline: false }; } /** * Check npm dependencies for typosquatting against top packages. * @param {{ name: string, version: string, ecosystem: string }[]} deps * @param {string[]} topList - Normalized top package names * @param {number} topCutoff - Top N for stricter matching * @param {string} ecosystem * @param {string} lockfile * @returns {object[]} */ function checkTyposquatting(deps, topList, topCutoff, ecosystem, lockfile, allowlist) { const results = []; const checked = new Set(); for (const dep of deps) { if (dep.ecosystem !== ecosystem) continue; const normalized = dep.name.toLowerCase().replace(/[_.-]/g, '-'); if (checked.has(normalized)) continue; checked.add(normalized); // Skip known legitimate packages if (allowlist && allowlist.has(normalized)) continue; let closestDist = Infinity; let closestPkg = null; let closestIdx = Infinity; for (let i = 0; i < topList.length; i++) { const topPkg = topList[i]; if (normalized === topPkg) { closestPkg = null; break; } // Exact match — legit if (Math.abs(normalized.length - topPkg.length) > 2) continue; const dist = levenshtein(normalized, topPkg); if (dist < closestDist || (dist === closestDist && i < closestIdx)) { closestDist = dist; closestPkg = topPkg; closestIdx = i; } } if (!closestPkg) continue; if (closestDist === 1) { results.push(finding({ scanner: 'SCR', severity: SEVERITY.HIGH, title: `Possible typosquatting: "${dep.name}" vs "${closestPkg}" (edit distance 1)`, description: `"${dep.name}" in ${lockfile} is 1 character away from the popular ${ecosystem} package "${closestPkg}". ` + `Typosquatting packages impersonate popular libraries to execute malicious code.`, file: lockfile, evidence: `"${dep.name}" → "${closestPkg}" (Levenshtein: 1)`, owasp: 'LLM03', recommendation: `Verify "${dep.name}" is the intended package. If you meant "${closestPkg}", correct the dependency.`, })); } else if (closestDist === 2 && closestIdx < topCutoff) { results.push(finding({ scanner: 'SCR', severity: SEVERITY.MEDIUM, title: `Potential typosquatting: "${dep.name}" vs "${closestPkg}" (edit distance 2)`, description: `"${dep.name}" in ${lockfile} is 2 characters away from the popular ${ecosystem} package "${closestPkg}" ` + `(top ${topCutoff} by downloads).`, file: lockfile, evidence: `"${dep.name}" → "${closestPkg}" (Levenshtein: 2)`, owasp: 'LLM03', recommendation: `Confirm "${dep.name}" is the correct package. Check publish date and author on the registry.`, })); } else { // B7 (v7.2.0): token-overlap fallback. Catches typosquats with edit // distance >= 3 that contain all tokens of a popular package plus a // suspicious suffix (e.g. `lodash-utils` vs `lodash`). const tokenFinding = checkTyposquatTokenOverlap(dep, normalized, topList, topCutoff, ecosystem, lockfile); if (tokenFinding) results.push(tokenFinding); } } return results; } function checkTyposquatTokenOverlap(dep, normalized, topList, topCutoff, ecosystem, lockfile) { const declaredTokens = new Set(tokenize(normalized)); if (declaredTokens.size < 2) return null; const suspiciousSuffixes = TYPOSQUAT_SUSPICIOUS_TOKENS.filter(t => declaredTokens.has(t)); if (suspiciousSuffixes.length === 0) return null; const limit = Math.min(topCutoff, topList.length); for (let i = 0; i < limit; i++) { const topPkg = topList[i]; if (normalized === topPkg) return null; const topTokens = tokenize(topPkg); if (topTokens.length === 0) continue; const allContained = topTokens.every(t => declaredTokens.has(t)); if (!allContained) continue; if (topTokens.length === declaredTokens.size) continue; const overlap = tokenOverlap(normalized, topPkg); if (overlap < 0.66) continue; return finding({ scanner: 'SCR', severity: SEVERITY.MEDIUM, title: `Possible typosquatting via token-overlap: "${dep.name}" vs "${topPkg}"`, description: `"${dep.name}" in ${lockfile} contains all tokens of the popular ${ecosystem} package ` + `"${topPkg}" plus a suspicious suffix (${suspiciousSuffixes.join(', ')}). ` + `This is a common typosquat pattern: attackers register popular-name-plus-suffix ` + `packages to capture installs.`, file: lockfile, evidence: `"${dep.name}" tokens ⊃ "${topPkg}" tokens; suffix=${suspiciousSuffixes.join(',')}; overlap=${overlap.toFixed(2)}`, owasp: 'LLM03', recommendation: `Verify "${dep.name}" is intentional. If you meant "${topPkg}", correct the dependency. ` + `If "${dep.name}" is a legitimate utility, add it to knowledge/typosquat-allowlist.json under "${ecosystem}".`, }); } return null; } // --------------------------------------------------------------------------- // Main scanner export // --------------------------------------------------------------------------- /** * Scan targetPath lockfiles for supply chain issues. * * Detection categories: * 1. Blocklist matches (known compromised packages) — CRITICAL * 2. OSV.dev CVE/advisory detection (batch API) — CRITICAL/HIGH/MEDIUM * 3. Typosquatting against top packages — HIGH/MEDIUM * * Lockfiles parsed: package-lock.json, yarn.lock, requirements.txt, Pipfile.lock * * @param {string} targetPath - Absolute root path being scanned * @param {object} discovery - Unused (scanner reads lockfiles by convention) * @returns {Promise} - scannerResult envelope */ export async function scan(targetPath, discovery) { const startMs = Date.now(); const allFindings = []; let filesScanned = 0; let osvOffline = false; // Discover lockfiles const lockfiles = [ { path: join(targetPath, 'package-lock.json'), parser: parsePackageLock, name: 'package-lock.json', ecosystem: 'npm' }, { path: join(targetPath, 'yarn.lock'), parser: parseYarnLock, name: 'yarn.lock', ecosystem: 'npm' }, { path: join(targetPath, 'requirements.txt'), parser: parseRequirementsTxt, name: 'requirements.txt', ecosystem: 'pip' }, { path: join(targetPath, 'Pipfile.lock'), parser: parsePipfileLock, name: 'Pipfile.lock', ecosystem: 'pip' }, ]; // Also check for requirements-*.txt variants for (const variant of ['requirements-dev.txt', 'requirements-prod.txt', 'requirements.lock']) { const varPath = join(targetPath, variant); if (existsSync(varPath)) { lockfiles.push({ path: varPath, parser: parseRequirementsTxt, name: variant, ecosystem: 'pip' }); } } const presentLockfiles = lockfiles.filter(l => existsSync(l.path)); if (presentLockfiles.length === 0) { return scannerResult('supply-chain-recheck', 'skipped', [], 0, Date.now() - startMs); } try { // Load top packages and allowlist for typosquat detection const [topPkgs, allowlist] = await Promise.all([loadTopPackages(), loadTyposquatAllowlist()]); const npmTop = topPkgs.npm.map(n => n.toLowerCase().replace(/[_.-]/g, '-')); const pypiTop = topPkgs.pypi.map(n => n.toLowerCase().replace(/[_.-]/g, '-')); // Parse all lockfiles const allDeps = []; for (const lockfile of presentLockfiles) { filesScanned++; const deps = await lockfile.parser(lockfile.path); // 1. Blocklist check allFindings.push(...checkBlocklists(deps, lockfile.name)); // 3. Typosquat check if (lockfile.ecosystem === 'npm') { allFindings.push(...checkTyposquatting(deps, npmTop, 200, 'npm', lockfile.name, allowlist.npm)); } else if (lockfile.ecosystem === 'pip') { allFindings.push(...checkTyposquatting(deps, pypiTop, 100, 'pip', lockfile.name, allowlist.pypi)); } allDeps.push(...deps.map(d => ({ ...d, lockfile: lockfile.name }))); } // 2. OSV.dev batch check (all deps from all lockfiles at once) const osvDeps = allDeps.filter(d => d.version); if (osvDeps.length > 0) { const osvResult = await checkOSV(osvDeps, 'lockfiles'); if (osvResult.offline) { osvOffline = true; allFindings.push(finding({ scanner: 'SCR', severity: SEVERITY.INFO, title: 'OSV.dev unreachable — CVE check skipped', description: 'Could not reach the OSV.dev API. Blocklist and typosquat checks were performed, ' + 'but known vulnerability (CVE) detection was skipped. Re-run when network is available.', owasp: 'LLM03', recommendation: 'Re-run the scanner when network connectivity is restored.', })); } else { // Re-tag findings with correct lockfile names for (const f of osvResult.findings) { // Find the dep this finding refers to const depMatch = f.evidence?.match(/^(\S+)\s*—\s*(\S+?)@/); if (depMatch) { const depName = depMatch[2]; const sourceDep = allDeps.find(d => d.name === depName); if (sourceDep) { f.file = sourceDep.lockfile; } } allFindings.push(f); } } } const durationMs = Date.now() - startMs; const result = scannerResult('supply-chain-recheck', 'ok', allFindings, filesScanned, durationMs); if (osvOffline) result.osv_offline = true; return result; } catch (err) { const durationMs = Date.now() - startMs; return scannerResult('supply-chain-recheck', 'error', allFindings, filesScanned, durationMs, err.message); } }