ktg-plugin-marketplace/plugins/llm-security/scanners/dep-auditor.mjs

634 lines
23 KiB
JavaScript

// dep-auditor.mjs — Deterministic dependency security scanner
// Detects CVEs (npm/pip audit), typosquatting, malicious install scripts,
// and unpinned versions. Zero external dependencies — Node.js builtins only.
//
// OWASP coverage: LLM03 (Supply Chain)
import { finding, scannerResult } from './lib/output.mjs';
import { SEVERITY } from './lib/severity.mjs';
import { levenshtein } from './lib/string-utils.mjs';
import { readFile } from 'node:fs/promises';
import { join, dirname } from 'node:path';
import { existsSync } from 'node:fs';
import { execSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
// ---------------------------------------------------------------------------
// Top-package knowledge base loader
// ---------------------------------------------------------------------------
const __dirname = dirname(fileURLToPath(import.meta.url));
/** @type {{ npm: string[], pypi: string[] } | null} */
let _topPackages = null;
let _typosquatAllowlist = null;
/**
* Load top-packages.json from the knowledge directory.
* Result is cached after first load.
* @returns {Promise<{ npm: string[], pypi: string[] }>}
*/
async function loadTopPackages() {
if (_topPackages) return _topPackages;
const knowledgePath = join(__dirname, '..', 'knowledge', 'top-packages.json');
try {
const raw = await readFile(knowledgePath, 'utf8');
_topPackages = JSON.parse(raw);
} catch {
// Graceful fallback: empty lists — typosquatting detection skipped
_topPackages = { npm: [], pypi: [] };
}
return _topPackages;
}
async function loadTyposquatAllowlist() {
if (_typosquatAllowlist) return _typosquatAllowlist;
const allowPath = join(__dirname, '..', 'knowledge', 'typosquat-allowlist.json');
try {
const raw = await readFile(allowPath, 'utf8');
const data = JSON.parse(raw);
_typosquatAllowlist = {
npm: new Set((data.npm || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))),
pypi: new Set((data.pypi || []).map(n => n.toLowerCase().replace(/[_.-]/g, '-'))),
};
} catch {
_typosquatAllowlist = { npm: new Set(), pypi: new Set() };
}
return _typosquatAllowlist;
}
// ---------------------------------------------------------------------------
// File reading helpers
// ---------------------------------------------------------------------------
/**
* Read and parse a JSON file. Returns null on error.
* @param {string} absPath
* @returns {Promise<object|null>}
*/
async function readJson(absPath) {
try {
const raw = await readFile(absPath, 'utf8');
return JSON.parse(raw);
} catch {
return null;
}
}
/**
* Read a text file line by line. Returns empty array on error.
* @param {string} absPath
* @returns {Promise<string[]>}
*/
async function readLines(absPath) {
try {
const raw = await readFile(absPath, 'utf8');
return raw.split('\n').map(l => l.replace(/\r$/, ''));
} catch {
return [];
}
}
// ---------------------------------------------------------------------------
// Category 1: CVE Detection via npm/pip audit
// ---------------------------------------------------------------------------
/** Map npm audit severity strings to our SEVERITY constants. */
function npmSeverityToOurs(npmSev) {
switch (npmSev) {
case 'critical': return SEVERITY.CRITICAL;
case 'high': return SEVERITY.HIGH;
case 'moderate': return SEVERITY.MEDIUM;
case 'low':
default: return SEVERITY.LOW;
}
}
/**
* Run npm audit --json in targetPath and return findings.
* Gracefully handles: command not found, timeout, parse errors, non-zero exit.
* @param {string} targetPath
* @returns {object[]} findings
*/
function runNpmAudit(targetPath) {
const findings = [];
let raw;
try {
raw = execSync('npm audit --json', {
cwd: targetPath,
timeout: 30_000,
// Allow non-zero exit (npm audit exits 1 when vulnerabilities found)
stdio: ['ignore', 'pipe', 'ignore'],
}).toString();
} catch (err) {
// execSync throws on non-zero exit; the stdout is still on err.stdout
raw = err.stdout ? err.stdout.toString() : null;
}
if (!raw || raw.trim().length === 0) return findings;
let parsed;
try {
parsed = JSON.parse(raw);
} catch {
return findings;
}
// npm audit v2 format: { vulnerabilities: { pkgName: { severity, via, ... } } }
const vulns = parsed.vulnerabilities || {};
for (const [pkgName, vuln] of Object.entries(vulns)) {
const severity = npmSeverityToOurs(vuln.severity);
// Collect CVE IDs from the via chain
const cveIds = [];
if (Array.isArray(vuln.via)) {
for (const v of vuln.via) {
if (typeof v === 'object' && v.url) {
// Extract CVE or advisory ID from URL
const match = v.url.match(/GHSA-[\w-]+|CVE-\d{4}-\d+/i);
if (match) cveIds.push(match[0]);
}
}
}
const cveRef = cveIds.length > 0 ? ` (${cveIds.join(', ')})` : '';
const fixAvailable = vuln.fixAvailable
? typeof vuln.fixAvailable === 'object'
? ` Fix: upgrade to ${vuln.fixAvailable.name}@${vuln.fixAvailable.version}.`
: ' A fix is available — run `npm audit fix`.'
: ' No automatic fix available — review manually.';
findings.push(
finding({
scanner: 'DEP',
severity,
title: `Vulnerable npm dependency: ${pkgName}${cveRef}`,
description:
`npm audit reports a ${vuln.severity} severity vulnerability in "${pkgName}".` +
(vuln.range ? ` Affected range: ${vuln.range}.` : '') +
fixAvailable,
file: 'package.json',
evidence: cveIds.length > 0 ? cveIds.join(', ') : `${pkgName} @ ${vuln.range || 'unknown'}`,
owasp: 'LLM03',
recommendation:
`Run \`npm audit fix\` or manually upgrade "${pkgName}" to a patched version. ` +
'Review the advisory for workarounds if no fix is available.',
}),
);
}
return findings;
}
/**
* Run pip audit --format json and return findings.
* Gracefully handles pip audit not installed, timeout, parse errors.
* @param {string} targetPath
* @returns {object[]} findings
*/
function runPipAudit(targetPath) {
const findings = [];
let raw;
try {
raw = execSync('pip audit --format json', {
cwd: targetPath,
timeout: 30_000,
stdio: ['ignore', 'pipe', 'ignore'],
}).toString();
} catch (err) {
raw = err.stdout ? err.stdout.toString() : null;
}
if (!raw || raw.trim().length === 0) return findings;
let parsed;
try {
parsed = JSON.parse(raw);
} catch {
return findings;
}
// pip audit JSON format: array of { name, version, vulns: [{ id, fix_versions, description }] }
const packages = Array.isArray(parsed) ? parsed : (parsed.dependencies || []);
for (const pkg of packages) {
if (!pkg.vulns || pkg.vulns.length === 0) continue;
for (const vuln of pkg.vulns) {
const fixes = vuln.fix_versions && vuln.fix_versions.length > 0
? ` Fix in version(s): ${vuln.fix_versions.join(', ')}.`
: ' No fix version reported.';
findings.push(
finding({
scanner: 'DEP',
severity: SEVERITY.HIGH, // pip audit does not expose severity; default HIGH
title: `Vulnerable Python dependency: ${pkg.name} (${vuln.id})`,
description:
`pip audit reports vulnerability ${vuln.id} in "${pkg.name}" v${pkg.version}.` +
(vuln.description ? ` ${vuln.description}` : '') +
fixes,
file: 'requirements.txt',
evidence: `${vuln.id}${pkg.name}@${pkg.version}`,
owasp: 'LLM03',
recommendation:
`Upgrade "${pkg.name}" to a patched version.${fixes} ` +
'Run `pip audit` after upgrading to verify resolution.',
}),
);
}
}
return findings;
}
// ---------------------------------------------------------------------------
// Category 2: Typosquatting Detection
// ---------------------------------------------------------------------------
/**
* Extract package names from requirements.txt lines.
* Handles: pkg==1.0, pkg>=1.0, pkg~=1.0, pkg, # comments, -r includes, blanks.
* @param {string[]} lines
* @returns {string[]}
*/
function parseRequirementsTxt(lines) {
const names = [];
for (const line of lines) {
const stripped = line.trim();
// Skip blanks, comments, options, includes
if (!stripped || stripped.startsWith('#') || stripped.startsWith('-')) continue;
// Extract package name: everything before first [>=<!~;@\s]
const match = stripped.match(/^([A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?)/);
if (match) names.push(match[1].toLowerCase().replace(/_/g, '-'));
}
return names;
}
/**
* Check one declared package name against the top-packages list for typosquatting.
* Pre-filter by length difference to avoid O(n*m) full distance for irrelevant pairs.
* Returns a finding object or null.
*
* @param {string} declaredName - Normalized (lowercase, hyphens) declared package name
* @param {string[]} topList - Top package names (same normalization)
* @param {number} top200Cutoff - Index cutoff for "very popular" (top 200 for npm, top 100 for PyPI)
* @param {string} ecosystem - 'npm' or 'pypi'
* @param {string} sourceFile - 'package.json' or 'requirements.txt'
* @returns {object|null}
*/
function checkTyposquatting(declaredName, topList, top200Cutoff, ecosystem, sourceFile, allowlist) {
// Skip known legitimate packages
if (allowlist && allowlist.has(declaredName)) return null;
let closestDist = Infinity;
let closestPkg = null;
let closestIdx = Infinity;
for (let i = 0; i < topList.length; i++) {
const topPkg = topList[i];
// Exact match — legitimate package, skip
if (declaredName === topPkg) return null;
// Pre-filter: skip if length difference > 2
if (Math.abs(declaredName.length - topPkg.length) > 2) continue;
const dist = levenshtein(declaredName, topPkg);
if (dist < closestDist || (dist === closestDist && i < closestIdx)) {
closestDist = dist;
closestPkg = topPkg;
closestIdx = i;
}
}
if (closestPkg === null) return null;
// Flag distance 1 always; distance 2 only if target is in top 200 (top200Cutoff)
if (closestDist === 1) {
return finding({
scanner: 'DEP',
severity: SEVERITY.HIGH,
title: `Possible typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 1)`,
description:
`The declared ${ecosystem} package "${declaredName}" is 1 character away from the ` +
`popular package "${closestPkg}". This is a strong typosquatting indicator. ` +
`Typosquatting packages impersonate popular libraries to execute malicious install scripts.`,
file: sourceFile,
evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 1)`,
owasp: 'LLM03',
recommendation:
`Verify that "${declaredName}" is the intended package. If you meant "${closestPkg}", ` +
`correct the dependency name. If "${declaredName}" is intentional, add an inline comment ` +
`confirming this to suppress future alerts.`,
});
}
if (closestDist === 2 && closestIdx < top200Cutoff) {
return finding({
scanner: 'DEP',
severity: SEVERITY.MEDIUM,
title: `Potential typosquatting: "${declaredName}" vs "${closestPkg}" (edit distance 2)`,
description:
`The declared ${ecosystem} package "${declaredName}" is 2 characters away from the ` +
`highly popular package "${closestPkg}" (top ${top200Cutoff} by downloads). ` +
`While less certain than distance-1 matches, this warrants manual verification.`,
file: sourceFile,
evidence: `"${declaredName}" → closest match "${closestPkg}" (Levenshtein distance: 2)`,
owasp: 'LLM03',
recommendation:
`Confirm "${declaredName}" is the correct and intended package name. ` +
`Check the package's publish date, author, and download count on the registry.`,
});
}
return null;
}
// ---------------------------------------------------------------------------
// Category 3: Malicious Install Scripts
// ---------------------------------------------------------------------------
/** Patterns in install script values that indicate network/exec behaviour. */
const MALICIOUS_SCRIPT_PATTERNS = [
{ pattern: /\bcurl\b/, label: 'curl (network fetch)' },
{ pattern: /\bwget\b/, label: 'wget (network fetch)' },
{ pattern: /\bfetch\b/, label: 'fetch (network request)' },
{ pattern: /https?:\/\//, label: 'HTTP URL' },
{ pattern: /\beval\b/, label: 'eval (code execution)' },
{ pattern: /\bexec\b/, label: 'exec (process execution)' },
{ pattern: /child_process/, label: 'child_process (subprocess)' },
{ pattern: /net\.connect\b/, label: 'net.connect (raw TCP)' },
{ pattern: /\bdgram\b/, label: 'dgram (UDP socket)' },
];
/** npm lifecycle hooks that run automatically on install. */
const INSTALL_HOOKS = ['preinstall', 'install', 'postinstall'];
/**
* Check package.json scripts for malicious install script patterns.
* @param {object} pkgJson - Parsed package.json object
* @returns {object[]} - findings
*/
function checkInstallScripts(pkgJson) {
const findings = [];
const scripts = pkgJson.scripts || {};
for (const hook of INSTALL_HOOKS) {
const script = scripts[hook];
if (!script || typeof script !== 'string') continue;
const matched = MALICIOUS_SCRIPT_PATTERNS.filter(({ pattern }) => pattern.test(script));
if (matched.length === 0) continue;
const labels = matched.map(m => m.label).join(', ');
// Redact any URLs in the evidence to avoid leaking sensitive paths in reports
const safeScript = script.replace(/https?:\/\/[^\s"']+/g, '[URL]').slice(0, 120);
findings.push(
finding({
scanner: 'DEP',
severity: SEVERITY.HIGH,
title: `Suspicious npm install hook: scripts.${hook} contains network/exec patterns`,
description:
`The package.json "scripts.${hook}" field runs automatically during \`npm install\` ` +
`and contains suspicious patterns: ${labels}. ` +
`Malicious packages use install hooks to exfiltrate data, download payloads, or establish persistence.`,
file: 'package.json',
evidence: `scripts.${hook}: "${safeScript}${script.length > 120 ? '...' : ''}"`,
owasp: 'LLM03',
recommendation:
`Review the scripts.${hook} command carefully. If this package is a dependency ` +
`(not your own), consider whether this behaviour is expected. Use \`npm install --ignore-scripts\` ` +
`if install hooks are not needed. File a report at https://www.npmjs.com/support if malicious.`,
}),
);
}
return findings;
}
// ---------------------------------------------------------------------------
// Category 4: Unpinned Versions
// ---------------------------------------------------------------------------
/** Flags for unpinned npm dependency specifiers. */
const UNPINNED_NPM_RE = /^(\*|latest|x|>=\d|>\d)/;
/**
* Check package.json dependencies for unpinned version specifiers.
* @param {object} pkgJson
* @returns {object[]}
*/
function checkUnpinnedNpm(pkgJson) {
const findings = [];
const depSections = [
['dependencies', pkgJson.dependencies],
['devDependencies', pkgJson.devDependencies],
];
for (const [sectionName, deps] of depSections) {
if (!deps || typeof deps !== 'object') continue;
for (const [name, version] of Object.entries(deps)) {
if (typeof version !== 'string') continue;
if (UNPINNED_NPM_RE.test(version.trim())) {
findings.push(
finding({
scanner: 'DEP',
severity: SEVERITY.LOW,
title: `Unpinned npm dependency: ${name}@${version}`,
description:
`The package "${name}" in ${sectionName} uses an unpinned version specifier "${version}". ` +
`Unpinned dependencies can silently pull in a compromised version on the next install.`,
file: 'package.json',
evidence: `${sectionName}.${name}: "${version}"`,
owasp: 'LLM03',
recommendation:
`Pin "${name}" to an exact version (e.g., "${name}": "x.y.z") or use a lockfile ` +
`(\`package-lock.json\` or \`yarn.lock\`) and commit it. Run \`npm ci\` in CI instead of \`npm install\`.`,
}),
);
}
}
}
return findings;
}
/**
* Check requirements.txt lines for unpinned packages (missing == pin).
* @param {string[]} lines
* @returns {object[]}
*/
function checkUnpinnedPypi(lines) {
const findings = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
if (!line || line.startsWith('#') || line.startsWith('-')) continue;
// Has a version specifier but NOT a strict == pin
const hasSpecifier = /[><=~!]/.test(line);
const hasPinned = /==/.test(line);
const hasAnyOperator = hasSpecifier;
if (!hasPinned && !hasAnyOperator) {
// No version at all
const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
const name = match ? match[1] : line;
findings.push(
finding({
scanner: 'DEP',
severity: SEVERITY.LOW,
title: `Unpinned Python dependency: ${name} (no version specifier)`,
description:
`"${name}" in requirements.txt has no version pin. ` +
`Without pinning, \`pip install\` may resolve to a future compromised version.`,
file: 'requirements.txt',
line: i + 1,
evidence: line,
owasp: 'LLM03',
recommendation:
`Pin to an exact version: \`${name}==<version>\`. ` +
`Use \`pip freeze > requirements.txt\` to capture current versions, ` +
`or use \`pip-compile\` (pip-tools) for reproducible builds.`,
}),
);
} else if (hasSpecifier && !hasPinned) {
// Has >= or ~= but no == — floating upper bound
const match = line.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
const name = match ? match[1] : line;
findings.push(
finding({
scanner: 'DEP',
severity: SEVERITY.LOW,
title: `Loosely pinned Python dependency: ${name}`,
description:
`"${name}" in requirements.txt uses a range specifier without a strict == pin. ` +
`Range specifiers allow unexpected version upgrades that may introduce vulnerabilities.`,
file: 'requirements.txt',
line: i + 1,
evidence: line,
owasp: 'LLM03',
recommendation:
`Prefer exact version pinning (\`${name}==x.y.z\`) for reproducible installs. ` +
`If you need flexibility, use a lockfile approach (\`pip-compile\`).`,
}),
);
}
}
return findings;
}
// ---------------------------------------------------------------------------
// Main scanner export
// ---------------------------------------------------------------------------
/**
* Scan targetPath for dependency security issues.
*
* Detection categories:
* 1. CVE Detection via npm audit / pip audit (CRITICAL / HIGH)
* 2. Typosquatting against top-200 npm / top-100 PyPI (HIGH / MEDIUM)
* 3. Malicious install scripts in package.json (HIGH)
* 4. Unpinned version specifiers (LOW)
*
* @param {string} targetPath - Absolute root path being scanned
* @param {object} discovery - Unused (dep-auditor reads files by convention, not discovery list)
* @returns {Promise<object>} - scannerResult envelope
*/
export async function scan(targetPath, discovery) {
const startMs = Date.now();
const findings = [];
let filesScanned = 0;
// Detect which ecosystems are present
const pkgJsonPath = join(targetPath, 'package.json');
const requirementsTxt = join(targetPath, 'requirements.txt');
const setupPy = join(targetPath, 'setup.py');
const pyprojectToml = join(targetPath, 'pyproject.toml');
const hasNpm = existsSync(pkgJsonPath);
const hasPypi = existsSync(requirementsTxt) || existsSync(setupPy) || existsSync(pyprojectToml);
// Nothing to scan
if (!hasNpm && !hasPypi) {
return scannerResult('dep-auditor', 'skipped', [], 0, Date.now() - startMs);
}
try {
// -----------------------------------------------------------------------
// npm ecosystem
// -----------------------------------------------------------------------
if (hasNpm) {
filesScanned++;
const pkgJson = await readJson(pkgJsonPath);
if (pkgJson) {
// 1a. CVE via npm audit
findings.push(...runNpmAudit(targetPath));
// 2a. Typosquatting — npm
const [topPkgs, allowlist] = await Promise.all([loadTopPackages(), loadTyposquatAllowlist()]);
const npmTop = topPkgs.npm.map(n => n.toLowerCase().replace(/_/g, '-'));
const allDeps = {
...pkgJson.dependencies,
...pkgJson.devDependencies,
};
for (const dep of Object.keys(allDeps)) {
const normalized = dep.toLowerCase().replace(/_/g, '-');
const f = checkTyposquatting(normalized, npmTop, 200, 'npm', 'package.json', allowlist.npm);
if (f) findings.push(f);
}
// 3. Malicious install scripts
findings.push(...checkInstallScripts(pkgJson));
// 4a. Unpinned versions
findings.push(...checkUnpinnedNpm(pkgJson));
}
}
// -----------------------------------------------------------------------
// PyPI ecosystem
// -----------------------------------------------------------------------
if (hasPypi) {
// 1b. CVE via pip audit (only if requirements.txt or pyproject.toml present)
if (existsSync(requirementsTxt) || existsSync(pyprojectToml)) {
findings.push(...runPipAudit(targetPath));
}
// 2b. Typosquatting — PyPI (only if requirements.txt present)
if (existsSync(requirementsTxt)) {
filesScanned++;
const reqLines = await readLines(requirementsTxt);
const topPkgs2 = await loadTopPackages();
const allowlist2 = await loadTyposquatAllowlist();
const pypiTop = topPkgs2.pypi.map(n => n.toLowerCase().replace(/_/g, '-'));
const declaredPypi = parseRequirementsTxt(reqLines);
for (const dep of declaredPypi) {
const f = checkTyposquatting(dep, pypiTop, 100, 'pypi', 'requirements.txt', allowlist2.pypi);
if (f) findings.push(f);
}
// 4b. Unpinned versions
findings.push(...checkUnpinnedPypi(reqLines));
}
}
const durationMs = Date.now() - startMs;
return scannerResult('dep-auditor', 'ok', findings, filesScanned, durationMs);
} catch (err) {
const durationMs = Date.now() - startMs;
return scannerResult(
'dep-auditor',
'error',
findings,
filesScanned,
durationMs,
err.message,
);
}
}