// git-forensics.mjs — Deterministic git history forensics scanner // Detects supply chain rug pull signals: force pushes, description drift, // hook modifications, new outbound URLs, author changes, binary additions, // and suspicious commit patterns. // // Zero external dependencies — Node.js builtins only. // OWASP coverage: LLM03 (Supply Chain) import { finding, scannerResult } from './lib/output.mjs'; import { SEVERITY } from './lib/severity.mjs'; import { levenshtein } from './lib/string-utils.mjs'; import { execSync } from 'node:child_process'; import { existsSync } from 'node:fs'; import { join } from 'node:path'; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const MAX_COMMITS = 500; const GIT_TIMEOUT_MS = 15000; const MAX_DRIFT_FILES = 20; /** Domains strongly associated with exfiltration or ephemeral endpoints */ const SUSPICIOUS_DOMAINS = [ 'webhook.site', 'requestbin', 'ngrok', 'ngrok.io', 'pipedream.net', 'pastebin.com', 'hastebin.com', 'beeceptor.com', 'hookbin.com', 'httpbin.org', 'canarytokens.com', ]; /** Binary file extensions unusual in a plugin/package repo */ const BINARY_EXTENSIONS = new Set([ '.exe', '.dll', '.so', '.dylib', '.bin', '.dat', '.wasm', '.node', ]); /** Network-access patterns in source code (hooks/scripts concern) */ const NETWORK_PATTERNS = /\b(fetch|http|https|curl|wget|dns\.lookup|net\.connect|XMLHttpRequest|axios|got)\b/i; // --------------------------------------------------------------------------- // Helper: run a git command with standard options // --------------------------------------------------------------------------- /** * Run a git command in the target directory. * @param {string} cmd - Git command (without 'git' prefix) or full command * @param {string} cwd - Working directory * @returns {string} - stdout string, trimmed * @throws - On non-zero exit or timeout */ function git(cmd, cwd) { return execSync(`git ${cmd}`, { cwd, timeout: GIT_TIMEOUT_MS, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], }).trim(); } // --------------------------------------------------------------------------- // Git repo detection // --------------------------------------------------------------------------- /** * Determine if targetPath is inside a git repository. * First checks for .git directory (top-level), then tries git rev-parse. * @param {string} targetPath * @returns {boolean} */ function isGitRepo(targetPath) { if (existsSync(join(targetPath, '.git'))) return true; try { git('rev-parse --git-dir', targetPath); return true; } catch { return false; } } // --------------------------------------------------------------------------- // Category 1: Force Push Detection // --------------------------------------------------------------------------- /** * Detect force push signals in reflog. * Looks for "reset" entries and "forced-update" in walk-reflogs. * @param {string} targetPath * @returns {object[]} findings */ function detectForcePushes(targetPath) { const findings = []; // Check reflog for reset entries (local force push evidence) try { const reflog = git("reflog --format='%H %gD %gs' -n 500", targetPath); const lines = reflog.split('\n').filter(Boolean); const resetLines = lines.filter(l => l.includes('reset:') || l.includes('reset')); if (resetLines.length > 0) { const examples = resetLines.slice(0, 3).map(l => l.slice(0, 80)).join(' | '); findings.push(finding({ scanner: 'GIT', severity: SEVERITY.HIGH, title: 'Force push signal: reflog contains reset entries', description: `Reflog contains ${resetLines.length} reset entry/entries. ` + 'git reset --hard in a shared repo indicates history was rewritten, ' + 'which is the mechanism used in rug pull attacks to swap legitimate code ' + 'with malicious content after trust is established.', evidence: `${resetLines.length} reset entries. Examples: ${examples}`, owasp: 'LLM03', recommendation: 'Review what was changed in the rewritten history. Compare the pre-reset ' + 'commit (visible in reflog) with the current HEAD to identify removed content.', })); } } catch { // reflog unavailable — not fatal } // Check walk-reflogs for forced-update try { const walkLog = git('log --walk-reflogs --format="%H %gD %gs" -n 200', targetPath); const forcedLines = walkLog.split('\n').filter(l => l.includes('forced-update')); if (forcedLines.length > 0) { const shortHash = forcedLines[0].split(' ')[0].slice(0, 8); findings.push(finding({ scanner: 'GIT', severity: SEVERITY.HIGH, title: 'Force push signal: forced-update entries in walk-reflogs', description: `Found ${forcedLines.length} forced-update entry/entries in reflog walk. ` + 'Forced updates overwrite remote history non-fast-forward, a classic rug pull vector.', evidence: `${forcedLines.length} forced-update entries; first at commit ${shortHash}`, owasp: 'LLM03', recommendation: 'Audit the commits immediately before and after each forced-update. ' + 'Pin the plugin to a specific commit hash rather than a branch reference.', })); } } catch { // walk-reflogs may fail in shallow clones } return findings; } // --------------------------------------------------------------------------- // Category 2: Description Drift // --------------------------------------------------------------------------- /** * Extract the description field from YAML frontmatter in a string. * Handles both single-line and block scalar (|) styles. * @param {string} content * @returns {string | null} */ function extractDescription(content) { const fmMatch = content.match(/^---[\r\n]([\s\S]*?)[\r\n]---/); if (!fmMatch) return null; const block = fmMatch[1]; // Single-line: description: some text const singleLine = block.match(/^description:\s*(.+)$/m); if (singleLine && singleLine[1].trim() !== '|' && singleLine[1].trim() !== '>') { return singleLine[1].trim().replace(/^['"]|['"]$/g, ''); } // Block scalar: description: | const blockScalar = block.match(/^description:\s*[|>][\r\n]((?:[ \t]+.+[\r\n]?)*)/m); if (blockScalar) { return blockScalar[1] .split('\n') .map(l => l.replace(/^[ \t]{2}/, '')) .join('\n') .trim(); } return null; } /** * Detect significant description changes in commands/ and agents/ files. * @param {string} targetPath * @returns {object[]} findings */ function detectDescriptionDrift(targetPath) { const results = []; // List tracked files matching commands/*.md or agents/*.md let trackedFiles; try { const raw = git('ls-files -- "commands/*.md" "agents/*.md"', targetPath); trackedFiles = raw.split('\n').filter(Boolean).slice(0, MAX_DRIFT_FILES); } catch { return results; } for (const relFile of trackedFiles) { try { // Find the commit that first added this file const addHash = git(`log --diff-filter=A --format='%H' -- "${relFile}"`, targetPath) .split('\n') .filter(Boolean) .pop(); // oldest = last in log output (reverse chrono) if (!addHash) continue; const shortAddHash = addHash.slice(0, 8); // Get initial content at that commit let initialContent; try { initialContent = git(`show ${addHash}:${relFile}`, targetPath); } catch { continue; } // Get current content let currentContent; try { currentContent = git(`show HEAD:${relFile}`, targetPath); } catch { continue; } const initialDesc = extractDescription(initialContent); const currentDesc = extractDescription(currentContent); if (!initialDesc || !currentDesc) continue; if (initialDesc === currentDesc) continue; const dist = levenshtein(initialDesc, currentDesc); const threshold = Math.ceil(initialDesc.length * 0.20); if (dist > threshold) { results.push(finding({ scanner: 'GIT', severity: SEVERITY.MEDIUM, title: `Description drift detected: ${relFile}`, description: `The description in "${relFile}" has changed significantly since its initial commit (${shortAddHash}). ` + `Edit distance: ${dist} characters (threshold: ${threshold}, 20% of original length ${initialDesc.length}). ` + 'Substantial description changes can indicate purpose drift or an attempt to ' + 'misrepresent what an agent/command does after users have trusted it.', file: relFile, evidence: `Initial (${shortAddHash}): "${initialDesc.slice(0, 80)}${initialDesc.length > 80 ? '…' : ''}" | ` + `Current: "${currentDesc.slice(0, 80)}${currentDesc.length > 80 ? '…' : ''}" | ` + `Levenshtein distance: ${dist}`, owasp: 'LLM03', recommendation: 'Review the description change history: ' + `git log -p -- "${relFile}". ` + 'Verify the new description accurately represents current behavior.', })); } } catch { // Per-file errors are non-fatal } } return results; } // --------------------------------------------------------------------------- // Category 3: Hook Modification After Initial Commit // --------------------------------------------------------------------------- /** * Detect suspicious hook file modification patterns. * @param {string} targetPath * @returns {object[]} findings */ function detectHookModifications(targetPath) { const results = []; let hookFiles; try { const raw = git('ls-files -- "hooks/scripts/*"', targetPath); hookFiles = raw.split('\n').filter(Boolean); } catch { return results; } for (const relFile of hookFiles) { try { // Count total commits touching this file const logLines = git(`log --oneline -- "${relFile}"`, targetPath) .split('\n') .filter(Boolean); const modCount = logLines.length; if (modCount <= 1) continue; // Only the initial commit — clean // Check if latest diff adds network calls let latestDiff = ''; try { latestDiff = git(`diff HEAD~1 HEAD -- "${relFile}"`, targetPath); } catch { // HEAD~1 may not exist (single commit repo after first mod) } const addedLines = latestDiff .split('\n') .filter(l => l.startsWith('+') && !l.startsWith('+++')); const addedContent = addedLines.join('\n'); const addsNetwork = NETWORK_PATTERNS.test(addedContent); if (modCount > 1 && addsNetwork) { const shortHash = logLines[0].split(' ')[0]; results.push(finding({ scanner: 'GIT', severity: SEVERITY.HIGH, title: `Hook modified with new network capability: ${relFile}`, description: `Hook script "${relFile}" was modified ${modCount} time(s) and the latest change ` + `adds outbound network calls (fetch/http/curl/wget/etc.). ` + 'Hook scripts run automatically with full filesystem access — adding network calls ' + 'post-initial-commit is a strong rug pull indicator (exfiltration vector).', file: relFile, evidence: `${modCount} modifications; latest commit: ${shortHash}; network pattern detected in diff`, owasp: 'LLM03', recommendation: `Audit: git log -p -- "${relFile}". ` + 'Pin hook files to trusted commits. Review what data the network calls access.', })); } else if (modCount > 3) { const shortHash = logLines[0].split(' ')[0]; results.push(finding({ scanner: 'GIT', severity: SEVERITY.MEDIUM, title: `Hook script modified frequently: ${relFile}`, description: `Hook script "${relFile}" has been modified ${modCount} times. ` + 'Frequent modifications to hook scripts are unusual and warrant review — ' + 'hooks run automatically and are a high-value target for supply chain attacks.', file: relFile, evidence: `${modCount} commits modify this file; latest: ${shortHash}`, owasp: 'LLM03', recommendation: `Review all hook changes: git log -p -- "${relFile}". ` + 'Ensure each modification has a clear, legitimate purpose.', })); } } catch { // Per-file errors are non-fatal } } return results; } // --------------------------------------------------------------------------- // Category 4: New Outbound URLs Post-Initial Commit // --------------------------------------------------------------------------- /** * Extract unique hostnames from URLs in a text block. * @param {string} text * @returns {Set} */ function extractHostnames(text) { const hosts = new Set(); const urlRe = /https?:\/\/([a-zA-Z0-9.-]+)/g; let m; while ((m = urlRe.exec(text)) !== null) { hosts.add(m[1].toLowerCase()); } return hosts; } /** * Detect new outbound URLs added in recent commits not present at initial commit. * @param {string} targetPath * @returns {object[]} findings */ function detectNewOutboundUrls(targetPath) { const results = []; // Get initial commit hash let initialHash; try { initialHash = git('rev-list --max-parents=0 HEAD', targetPath).split('\n')[0].trim(); } catch { return results; } // Get all URLs present in initial commit (full tree) let initialUrls = new Set(); try { const initialContent = git(`show ${initialHash}:`, targetPath); // This lists files — we need content. Use git grep on the initial tree. const initialGrep = git(`grep -r "https\\?://" ${initialHash}`, targetPath); initialUrls = extractHostnames(initialGrep); } catch { // Fallback: grep the initial commit diff itself try { const initDiff = git(`show ${initialHash}`, targetPath); initialUrls = extractHostnames(initDiff); } catch { // Cannot determine initial URLs — skip return results; } } // Get diff of last 50 commits (added lines only) let recentDiff = ''; try { recentDiff = git(`log -50 --format='' -p`, targetPath); } catch { return results; } // Parse added lines from the diff const addedLines = recentDiff .split('\n') .filter(l => l.startsWith('+') && !l.startsWith('+++')); const addedContent = addedLines.join('\n'); const addedHostnames = extractHostnames(addedContent); const newHostnames = [...addedHostnames].filter(h => !initialUrls.has(h)); for (const host of newHostnames) { const isSuspicious = SUSPICIOUS_DOMAINS.some(d => host === d || host.endsWith(`.${d}`)); const sev = isSuspicious ? SEVERITY.HIGH : SEVERITY.MEDIUM; results.push(finding({ scanner: 'GIT', severity: sev, title: isSuspicious ? `Suspicious exfiltration endpoint added post-initial-commit: ${host}` : `New outbound domain added in recent commits: ${host}`, description: isSuspicious ? `Domain "${host}" was added in recent commits and matches known exfiltration/ephemeral ` + 'endpoint patterns (webhook.site, requestbin, ngrok, pipedream, pastebin, etc.). ' + 'This is a high-confidence rug pull indicator — these services receive arbitrary HTTP requests.' : `Domain "${host}" appears in recent commits but was not present at initial commit. ` + 'New outbound connections introduced after trust establishment warrant review.', evidence: `New domain: ${host}; not present in initial commit (${initialHash.slice(0, 8)})`, owasp: 'LLM03', recommendation: isSuspicious ? `Remove all references to "${host}" immediately and audit what data was sent. ` + 'This domain pattern is used exclusively for receiving exfiltrated data.' : `Verify the purpose of "${host}". If legitimate, document it in README. ` + 'If unexpected, this may indicate a compromised dependency or injected code.', })); } return results; } // --------------------------------------------------------------------------- // Category 5: Author/Email Changes // --------------------------------------------------------------------------- /** * Detect suspicious author diversity in repository history. * @param {string} targetPath * @returns {object[]} findings */ function detectAuthorChanges(targetPath) { const results = []; let emailList; try { emailList = git('log --format="%ae"', targetPath).split('\n').filter(Boolean); } catch { return results; } const totalCommits = emailList.length; const uniqueEmails = new Set(emailList); const uniqueCount = uniqueEmails.size; // Flag: many distinct emails in a small repo if (uniqueCount > 3 && totalCommits < 50) { results.push(finding({ scanner: 'GIT', severity: SEVERITY.MEDIUM, title: 'High author diversity in small repository', description: `Repository has ${uniqueCount} distinct commit author email(s) across only ${totalCommits} ` + 'commit(s). High author diversity in a small plugin/package repo can indicate ' + 'that multiple unrelated parties have committed (e.g., compromised maintainer account, ' + 'supply chain injection via PR merge with altered identity).', evidence: `${uniqueCount} unique emails in ${totalCommits} commits: ${[...uniqueEmails].join(', ')}`, owasp: 'LLM03', recommendation: 'Verify each commit author is a known, trusted contributor. ' + 'Check for commits from unfamiliar email domains or auto-generated addresses.', })); } // Flag: mid-history author change (compare first commit author to later commits) try { const allAuthors = git('log --reverse --format="%ae"', targetPath); const firstAuthor = allAuthors.split('\n')[0].trim(); const laterAuthors = emailList.slice(0, -1); // all except the oldest (last in desc order) const newAuthors = laterAuthors.filter(e => e !== firstAuthor); const newAuthorSet = new Set(newAuthors); if (newAuthorSet.size > 0) { results.push(finding({ scanner: 'GIT', severity: SEVERITY.INFO, title: 'Author change mid-history', description: `Repository was initially committed by "${firstAuthor}" but later commits use ` + `${newAuthorSet.size} different author email(s). This is normal for collaborative ` + 'projects but worth noting for single-author plugins.', evidence: `Original author: ${firstAuthor}; subsequent authors: ${[...newAuthorSet].slice(0, 5).join(', ')}`, owasp: 'LLM03', recommendation: 'Verify all contributing authors are known and trusted. ' + 'For single-maintainer plugins, unexpected author changes warrant investigation.', })); } } catch { // git log may fail on some platforms — non-fatal } return results; } // --------------------------------------------------------------------------- // Category 6: Binary File Additions // --------------------------------------------------------------------------- /** * Detect unusual binary files added in recent commits. * @param {string} targetPath * @returns {object[]} findings */ function detectBinaryAdditions(targetPath) { const results = []; let addedFiles; try { const raw = git('log --diff-filter=A --name-only --format="" -50', targetPath); addedFiles = raw.split('\n').filter(Boolean); } catch { return results; } const binaryFiles = addedFiles.filter(f => { const lower = f.toLowerCase(); return [...BINARY_EXTENSIONS].some(ext => lower.endsWith(ext)); }); for (const binFile of binaryFiles) { // Find which commit added it let addCommit = 'unknown'; try { addCommit = git(`log --diff-filter=A --format="%H %ae %ai" -- "${binFile}"`, targetPath) .split('\n')[0] || 'unknown'; } catch { // non-fatal } const shortHash = addCommit.split(' ')[0].slice(0, 8); const author = addCommit.split(' ')[1] || 'unknown'; results.push(finding({ scanner: 'GIT', severity: SEVERITY.LOW, title: `Binary file added in recent commits: ${binFile}`, description: `Binary file "${binFile}" was added in the last 50 commits. ` + 'Binary files in plugin/package repositories are unusual and cannot be easily audited. ' + 'They may contain compiled malware, encoded payloads, or native modules with backdoors.', file: binFile, evidence: `Added in commit ${shortHash} by ${author}`, owasp: 'LLM03', recommendation: `Verify the necessity of "${binFile}". If it must exist, document its provenance ` + 'and provide a reproducible build process. Scan with antivirus and inspect with ' + 'strings/objdump/hexdump for suspicious embedded content.', })); } return results; } // --------------------------------------------------------------------------- // Category 7: Suspicious Commit Patterns // --------------------------------------------------------------------------- /** * Detect commits that add new network capabilities while modifying hook files. * @param {string} targetPath * @returns {object[]} findings */ function detectSuspiciousCommitPatterns(targetPath) { const results = []; let commitHashes; try { const raw = git(`log --format="%H" -${MAX_COMMITS}`, targetPath); commitHashes = raw.split('\n').filter(Boolean).slice(0, 50); // check last 50 } catch { return results; } for (const hash of commitHashes) { try { // Get commit subject and diff stat const subject = git(`log -1 --format="%s" ${hash}`, targetPath).toLowerCase(); const isCosmeticMsg = /^(update|fix|cleanup|refactor|minor|bump|chore)/.test(subject); if (!isCosmeticMsg) continue; // Check if this "cosmetic" commit actually touches hooks const changedFiles = git(`diff-tree --no-commit-id -r --name-only ${hash}`, targetPath) .split('\n') .filter(Boolean); const touchesHooks = changedFiles.some(f => f.includes('hooks/') || f.includes('hook')); if (!touchesHooks) continue; // Check if the diff adds network patterns let commitDiff; try { commitDiff = git(`show ${hash} --format=""`, targetPath); } catch { continue; } const addedInCommit = commitDiff .split('\n') .filter(l => l.startsWith('+') && !l.startsWith('+++')) .join('\n'); if (!NETWORK_PATTERNS.test(addedInCommit)) continue; const shortHash = hash.slice(0, 8); const author = git(`log -1 --format="%ae" ${hash}`, targetPath); const date = git(`log -1 --format="%ai" ${hash}`, targetPath); results.push(finding({ scanner: 'GIT', severity: SEVERITY.MEDIUM, title: `Suspicious commit: cosmetic message hides hook+network changes (${shortHash})`, description: `Commit ${shortHash} has a cosmetic message ("${subject}") but modifies hook files ` + 'and introduces new network-access code. This pattern — disguising functional changes ' + 'as maintenance — is used to slip malicious hook modifications past reviewers.', evidence: `Commit: ${shortHash} | Author: ${author} | Date: ${date} | ` + `Message: "${subject}" | Hooks modified: ${changedFiles.filter(f => f.includes('hook')).join(', ')}`, owasp: 'LLM03', recommendation: `Audit this commit in full: git show ${shortHash}. ` + 'Verify the network calls introduced are intentional and documented. ' + 'Enforce commit message policies that require meaningful descriptions for hook changes.', })); } catch { // Per-commit errors are non-fatal } } return results; } // --------------------------------------------------------------------------- // Main scanner export // --------------------------------------------------------------------------- /** * Scan git history of targetPath for supply chain rug pull signals. * * @param {string} targetPath - Absolute root path being scanned * @param {object} discovery - File discovery result (not used directly; git commands enumerate) * @returns {Promise} - scannerResult envelope */ export async function scan(targetPath, discovery) { const startMs = Date.now(); // Prerequisite: must be a git repo if (!isGitRepo(targetPath)) { return scannerResult( 'git-forensics', 'skipped', [], 0, Date.now() - startMs, 'Not a git repository — git forensics skipped', ); } const findings = []; const errors = []; // Run all detection categories, collecting errors without aborting const categories = [ ['force-push', () => detectForcePushes(targetPath)], ['description-drift', () => detectDescriptionDrift(targetPath)], ['hook-modifications', () => detectHookModifications(targetPath)], ['new-outbound-urls', () => detectNewOutboundUrls(targetPath)], ['author-changes', () => detectAuthorChanges(targetPath)], ['binary-additions', () => detectBinaryAdditions(targetPath)], ['suspicious-patterns', () => detectSuspiciousCommitPatterns(targetPath)], ]; for (const [name, fn] of categories) { try { const categoryFindings = fn(); findings.push(...categoryFindings); } catch (err) { errors.push(`${name}: ${err.message}`); } } const durationMs = Date.now() - startMs; if (errors.length > 0 && findings.length === 0) { // All categories failed — report as error return scannerResult( 'git-forensics', 'error', findings, 0, durationMs, `All detection categories failed: ${errors.join('; ')}`, ); } // Partial errors are logged but status is 'ok' if we have results const result = scannerResult('git-forensics', 'ok', findings, 0, durationMs); if (errors.length > 0) { result.partial_errors = errors; } return result; }