743 lines
26 KiB
JavaScript
743 lines
26 KiB
JavaScript
// git-forensics.mjs — Deterministic git history forensics scanner
|
|
// Detects supply chain rug pull signals: force pushes, description drift,
|
|
// hook modifications, new outbound URLs, author changes, binary additions,
|
|
// and suspicious commit patterns.
|
|
//
|
|
// Zero external dependencies — Node.js builtins only.
|
|
// OWASP coverage: LLM03 (Supply Chain)
|
|
|
|
import { finding, scannerResult } from './lib/output.mjs';
|
|
import { SEVERITY } from './lib/severity.mjs';
|
|
import { levenshtein } from './lib/string-utils.mjs';
|
|
import { execSync } from 'node:child_process';
|
|
import { existsSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const MAX_COMMITS = 500;
|
|
const GIT_TIMEOUT_MS = 15000;
|
|
const MAX_DRIFT_FILES = 20;
|
|
|
|
/** Domains strongly associated with exfiltration or ephemeral endpoints */
|
|
const SUSPICIOUS_DOMAINS = [
|
|
'webhook.site',
|
|
'requestbin',
|
|
'ngrok',
|
|
'ngrok.io',
|
|
'pipedream.net',
|
|
'pastebin.com',
|
|
'hastebin.com',
|
|
'beeceptor.com',
|
|
'hookbin.com',
|
|
'httpbin.org',
|
|
'canarytokens.com',
|
|
];
|
|
|
|
/** Binary file extensions unusual in a plugin/package repo */
|
|
const BINARY_EXTENSIONS = new Set([
|
|
'.exe', '.dll', '.so', '.dylib', '.bin', '.dat',
|
|
'.wasm', '.node',
|
|
]);
|
|
|
|
/** Network-access patterns in source code (hooks/scripts concern) */
|
|
const NETWORK_PATTERNS = /\b(fetch|http|https|curl|wget|dns\.lookup|net\.connect|XMLHttpRequest|axios|got)\b/i;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helper: run a git command with standard options
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Run a git command in the target directory.
|
|
* @param {string} cmd - Git command (without 'git' prefix) or full command
|
|
* @param {string} cwd - Working directory
|
|
* @returns {string} - stdout string, trimmed
|
|
* @throws - On non-zero exit or timeout
|
|
*/
|
|
function git(cmd, cwd) {
|
|
return execSync(`git ${cmd}`, {
|
|
cwd,
|
|
timeout: GIT_TIMEOUT_MS,
|
|
encoding: 'utf-8',
|
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
}).trim();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Git repo detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Determine if targetPath is inside a git repository.
|
|
* First checks for .git directory (top-level), then tries git rev-parse.
|
|
* @param {string} targetPath
|
|
* @returns {boolean}
|
|
*/
|
|
function isGitRepo(targetPath) {
|
|
if (existsSync(join(targetPath, '.git'))) return true;
|
|
try {
|
|
git('rev-parse --git-dir', targetPath);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 1: Force Push Detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect force push signals in reflog.
|
|
* Looks for "reset" entries and "forced-update" in walk-reflogs.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectForcePushes(targetPath) {
|
|
const findings = [];
|
|
|
|
// Check reflog for reset entries (local force push evidence)
|
|
try {
|
|
const reflog = git("reflog --format='%H %gD %gs' -n 500", targetPath);
|
|
const lines = reflog.split('\n').filter(Boolean);
|
|
const resetLines = lines.filter(l => l.includes('reset:') || l.includes('reset'));
|
|
|
|
if (resetLines.length > 0) {
|
|
const examples = resetLines.slice(0, 3).map(l => l.slice(0, 80)).join(' | ');
|
|
findings.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.HIGH,
|
|
title: 'Force push signal: reflog contains reset entries',
|
|
description:
|
|
`Reflog contains ${resetLines.length} reset entry/entries. ` +
|
|
'git reset --hard in a shared repo indicates history was rewritten, ' +
|
|
'which is the mechanism used in rug pull attacks to swap legitimate code ' +
|
|
'with malicious content after trust is established.',
|
|
evidence: `${resetLines.length} reset entries. Examples: ${examples}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
'Review what was changed in the rewritten history. Compare the pre-reset ' +
|
|
'commit (visible in reflog) with the current HEAD to identify removed content.',
|
|
}));
|
|
}
|
|
} catch {
|
|
// reflog unavailable — not fatal
|
|
}
|
|
|
|
// Check walk-reflogs for forced-update
|
|
try {
|
|
const walkLog = git('log --walk-reflogs --format="%H %gD %gs" -n 200', targetPath);
|
|
const forcedLines = walkLog.split('\n').filter(l => l.includes('forced-update'));
|
|
|
|
if (forcedLines.length > 0) {
|
|
const shortHash = forcedLines[0].split(' ')[0].slice(0, 8);
|
|
findings.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.HIGH,
|
|
title: 'Force push signal: forced-update entries in walk-reflogs',
|
|
description:
|
|
`Found ${forcedLines.length} forced-update entry/entries in reflog walk. ` +
|
|
'Forced updates overwrite remote history non-fast-forward, a classic rug pull vector.',
|
|
evidence: `${forcedLines.length} forced-update entries; first at commit ${shortHash}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
'Audit the commits immediately before and after each forced-update. ' +
|
|
'Pin the plugin to a specific commit hash rather than a branch reference.',
|
|
}));
|
|
}
|
|
} catch {
|
|
// walk-reflogs may fail in shallow clones
|
|
}
|
|
|
|
return findings;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 2: Description Drift
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Extract the description field from YAML frontmatter in a string.
|
|
* Handles both single-line and block scalar (|) styles.
|
|
* @param {string} content
|
|
* @returns {string | null}
|
|
*/
|
|
function extractDescription(content) {
|
|
const fmMatch = content.match(/^---[\r\n]([\s\S]*?)[\r\n]---/);
|
|
if (!fmMatch) return null;
|
|
const block = fmMatch[1];
|
|
|
|
// Single-line: description: some text
|
|
const singleLine = block.match(/^description:\s*(.+)$/m);
|
|
if (singleLine && singleLine[1].trim() !== '|' && singleLine[1].trim() !== '>') {
|
|
return singleLine[1].trim().replace(/^['"]|['"]$/g, '');
|
|
}
|
|
|
|
// Block scalar: description: |
|
|
const blockScalar = block.match(/^description:\s*[|>][\r\n]((?:[ \t]+.+[\r\n]?)*)/m);
|
|
if (blockScalar) {
|
|
return blockScalar[1]
|
|
.split('\n')
|
|
.map(l => l.replace(/^[ \t]{2}/, ''))
|
|
.join('\n')
|
|
.trim();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Detect significant description changes in commands/ and agents/ files.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectDescriptionDrift(targetPath) {
|
|
const results = [];
|
|
|
|
// List tracked files matching commands/*.md or agents/*.md
|
|
let trackedFiles;
|
|
try {
|
|
const raw = git('ls-files -- "commands/*.md" "agents/*.md"', targetPath);
|
|
trackedFiles = raw.split('\n').filter(Boolean).slice(0, MAX_DRIFT_FILES);
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
for (const relFile of trackedFiles) {
|
|
try {
|
|
// Find the commit that first added this file
|
|
const addHash = git(`log --diff-filter=A --format='%H' -- "${relFile}"`, targetPath)
|
|
.split('\n')
|
|
.filter(Boolean)
|
|
.pop(); // oldest = last in log output (reverse chrono)
|
|
|
|
if (!addHash) continue;
|
|
|
|
const shortAddHash = addHash.slice(0, 8);
|
|
|
|
// Get initial content at that commit
|
|
let initialContent;
|
|
try {
|
|
initialContent = git(`show ${addHash}:${relFile}`, targetPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
// Get current content
|
|
let currentContent;
|
|
try {
|
|
currentContent = git(`show HEAD:${relFile}`, targetPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
const initialDesc = extractDescription(initialContent);
|
|
const currentDesc = extractDescription(currentContent);
|
|
|
|
if (!initialDesc || !currentDesc) continue;
|
|
if (initialDesc === currentDesc) continue;
|
|
|
|
const dist = levenshtein(initialDesc, currentDesc);
|
|
const threshold = Math.ceil(initialDesc.length * 0.20);
|
|
|
|
if (dist > threshold) {
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: `Description drift detected: ${relFile}`,
|
|
description:
|
|
`The description in "${relFile}" has changed significantly since its initial commit (${shortAddHash}). ` +
|
|
`Edit distance: ${dist} characters (threshold: ${threshold}, 20% of original length ${initialDesc.length}). ` +
|
|
'Substantial description changes can indicate purpose drift or an attempt to ' +
|
|
'misrepresent what an agent/command does after users have trusted it.',
|
|
file: relFile,
|
|
evidence:
|
|
`Initial (${shortAddHash}): "${initialDesc.slice(0, 80)}${initialDesc.length > 80 ? '…' : ''}" | ` +
|
|
`Current: "${currentDesc.slice(0, 80)}${currentDesc.length > 80 ? '…' : ''}" | ` +
|
|
`Levenshtein distance: ${dist}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
'Review the description change history: ' +
|
|
`git log -p -- "${relFile}". ` +
|
|
'Verify the new description accurately represents current behavior.',
|
|
}));
|
|
}
|
|
} catch {
|
|
// Per-file errors are non-fatal
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 3: Hook Modification After Initial Commit
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect suspicious hook file modification patterns.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectHookModifications(targetPath) {
|
|
const results = [];
|
|
|
|
let hookFiles;
|
|
try {
|
|
const raw = git('ls-files -- "hooks/scripts/*"', targetPath);
|
|
hookFiles = raw.split('\n').filter(Boolean);
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
for (const relFile of hookFiles) {
|
|
try {
|
|
// Count total commits touching this file
|
|
const logLines = git(`log --oneline -- "${relFile}"`, targetPath)
|
|
.split('\n')
|
|
.filter(Boolean);
|
|
const modCount = logLines.length;
|
|
|
|
if (modCount <= 1) continue; // Only the initial commit — clean
|
|
|
|
// Check if latest diff adds network calls
|
|
let latestDiff = '';
|
|
try {
|
|
latestDiff = git(`diff HEAD~1 HEAD -- "${relFile}"`, targetPath);
|
|
} catch {
|
|
// HEAD~1 may not exist (single commit repo after first mod)
|
|
}
|
|
|
|
const addedLines = latestDiff
|
|
.split('\n')
|
|
.filter(l => l.startsWith('+') && !l.startsWith('+++'));
|
|
const addedContent = addedLines.join('\n');
|
|
const addsNetwork = NETWORK_PATTERNS.test(addedContent);
|
|
|
|
if (modCount > 1 && addsNetwork) {
|
|
const shortHash = logLines[0].split(' ')[0];
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.HIGH,
|
|
title: `Hook modified with new network capability: ${relFile}`,
|
|
description:
|
|
`Hook script "${relFile}" was modified ${modCount} time(s) and the latest change ` +
|
|
`adds outbound network calls (fetch/http/curl/wget/etc.). ` +
|
|
'Hook scripts run automatically with full filesystem access — adding network calls ' +
|
|
'post-initial-commit is a strong rug pull indicator (exfiltration vector).',
|
|
file: relFile,
|
|
evidence: `${modCount} modifications; latest commit: ${shortHash}; network pattern detected in diff`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Audit: git log -p -- "${relFile}". ` +
|
|
'Pin hook files to trusted commits. Review what data the network calls access.',
|
|
}));
|
|
} else if (modCount > 3) {
|
|
const shortHash = logLines[0].split(' ')[0];
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: `Hook script modified frequently: ${relFile}`,
|
|
description:
|
|
`Hook script "${relFile}" has been modified ${modCount} times. ` +
|
|
'Frequent modifications to hook scripts are unusual and warrant review — ' +
|
|
'hooks run automatically and are a high-value target for supply chain attacks.',
|
|
file: relFile,
|
|
evidence: `${modCount} commits modify this file; latest: ${shortHash}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Review all hook changes: git log -p -- "${relFile}". ` +
|
|
'Ensure each modification has a clear, legitimate purpose.',
|
|
}));
|
|
}
|
|
} catch {
|
|
// Per-file errors are non-fatal
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 4: New Outbound URLs Post-Initial Commit
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Extract unique hostnames from URLs in a text block.
|
|
* @param {string} text
|
|
* @returns {Set<string>}
|
|
*/
|
|
function extractHostnames(text) {
|
|
const hosts = new Set();
|
|
const urlRe = /https?:\/\/([a-zA-Z0-9.-]+)/g;
|
|
let m;
|
|
while ((m = urlRe.exec(text)) !== null) {
|
|
hosts.add(m[1].toLowerCase());
|
|
}
|
|
return hosts;
|
|
}
|
|
|
|
/**
|
|
* Detect new outbound URLs added in recent commits not present at initial commit.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectNewOutboundUrls(targetPath) {
|
|
const results = [];
|
|
|
|
// Get initial commit hash
|
|
let initialHash;
|
|
try {
|
|
initialHash = git('rev-list --max-parents=0 HEAD', targetPath).split('\n')[0].trim();
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
// Get all URLs present in initial commit (full tree)
|
|
let initialUrls = new Set();
|
|
try {
|
|
const initialContent = git(`show ${initialHash}:`, targetPath);
|
|
// This lists files — we need content. Use git grep on the initial tree.
|
|
const initialGrep = git(`grep -r "https\\?://" ${initialHash}`, targetPath);
|
|
initialUrls = extractHostnames(initialGrep);
|
|
} catch {
|
|
// Fallback: grep the initial commit diff itself
|
|
try {
|
|
const initDiff = git(`show ${initialHash}`, targetPath);
|
|
initialUrls = extractHostnames(initDiff);
|
|
} catch {
|
|
// Cannot determine initial URLs — skip
|
|
return results;
|
|
}
|
|
}
|
|
|
|
// Get diff of last 50 commits (added lines only)
|
|
let recentDiff = '';
|
|
try {
|
|
recentDiff = git(`log -50 --format='' -p`, targetPath);
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
// Parse added lines from the diff
|
|
const addedLines = recentDiff
|
|
.split('\n')
|
|
.filter(l => l.startsWith('+') && !l.startsWith('+++'));
|
|
const addedContent = addedLines.join('\n');
|
|
|
|
const addedHostnames = extractHostnames(addedContent);
|
|
const newHostnames = [...addedHostnames].filter(h => !initialUrls.has(h));
|
|
|
|
for (const host of newHostnames) {
|
|
const isSuspicious = SUSPICIOUS_DOMAINS.some(d => host === d || host.endsWith(`.${d}`));
|
|
const sev = isSuspicious ? SEVERITY.HIGH : SEVERITY.MEDIUM;
|
|
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: sev,
|
|
title: isSuspicious
|
|
? `Suspicious exfiltration endpoint added post-initial-commit: ${host}`
|
|
: `New outbound domain added in recent commits: ${host}`,
|
|
description: isSuspicious
|
|
? `Domain "${host}" was added in recent commits and matches known exfiltration/ephemeral ` +
|
|
'endpoint patterns (webhook.site, requestbin, ngrok, pipedream, pastebin, etc.). ' +
|
|
'This is a high-confidence rug pull indicator — these services receive arbitrary HTTP requests.'
|
|
: `Domain "${host}" appears in recent commits but was not present at initial commit. ` +
|
|
'New outbound connections introduced after trust establishment warrant review.',
|
|
evidence: `New domain: ${host}; not present in initial commit (${initialHash.slice(0, 8)})`,
|
|
owasp: 'LLM03',
|
|
recommendation: isSuspicious
|
|
? `Remove all references to "${host}" immediately and audit what data was sent. ` +
|
|
'This domain pattern is used exclusively for receiving exfiltrated data.'
|
|
: `Verify the purpose of "${host}". If legitimate, document it in README. ` +
|
|
'If unexpected, this may indicate a compromised dependency or injected code.',
|
|
}));
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 5: Author/Email Changes
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect suspicious author diversity in repository history.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectAuthorChanges(targetPath) {
|
|
const results = [];
|
|
|
|
let emailList;
|
|
try {
|
|
emailList = git('log --format="%ae"', targetPath).split('\n').filter(Boolean);
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
const totalCommits = emailList.length;
|
|
const uniqueEmails = new Set(emailList);
|
|
const uniqueCount = uniqueEmails.size;
|
|
|
|
// Flag: many distinct emails in a small repo
|
|
if (uniqueCount > 3 && totalCommits < 50) {
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: 'High author diversity in small repository',
|
|
description:
|
|
`Repository has ${uniqueCount} distinct commit author email(s) across only ${totalCommits} ` +
|
|
'commit(s). High author diversity in a small plugin/package repo can indicate ' +
|
|
'that multiple unrelated parties have committed (e.g., compromised maintainer account, ' +
|
|
'supply chain injection via PR merge with altered identity).',
|
|
evidence: `${uniqueCount} unique emails in ${totalCommits} commits: ${[...uniqueEmails].join(', ')}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
'Verify each commit author is a known, trusted contributor. ' +
|
|
'Check for commits from unfamiliar email domains or auto-generated addresses.',
|
|
}));
|
|
}
|
|
|
|
// Flag: mid-history author change (compare first commit author to later commits)
|
|
try {
|
|
const allAuthors = git('log --reverse --format="%ae"', targetPath);
|
|
const firstAuthor = allAuthors.split('\n')[0].trim();
|
|
const laterAuthors = emailList.slice(0, -1); // all except the oldest (last in desc order)
|
|
const newAuthors = laterAuthors.filter(e => e !== firstAuthor);
|
|
const newAuthorSet = new Set(newAuthors);
|
|
|
|
if (newAuthorSet.size > 0) {
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.INFO,
|
|
title: 'Author change mid-history',
|
|
description:
|
|
`Repository was initially committed by "${firstAuthor}" but later commits use ` +
|
|
`${newAuthorSet.size} different author email(s). This is normal for collaborative ` +
|
|
'projects but worth noting for single-author plugins.',
|
|
evidence: `Original author: ${firstAuthor}; subsequent authors: ${[...newAuthorSet].slice(0, 5).join(', ')}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
'Verify all contributing authors are known and trusted. ' +
|
|
'For single-maintainer plugins, unexpected author changes warrant investigation.',
|
|
}));
|
|
}
|
|
} catch {
|
|
// git log may fail on some platforms — non-fatal
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 6: Binary File Additions
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect unusual binary files added in recent commits.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectBinaryAdditions(targetPath) {
|
|
const results = [];
|
|
|
|
let addedFiles;
|
|
try {
|
|
const raw = git('log --diff-filter=A --name-only --format="" -50', targetPath);
|
|
addedFiles = raw.split('\n').filter(Boolean);
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
const binaryFiles = addedFiles.filter(f => {
|
|
const lower = f.toLowerCase();
|
|
return [...BINARY_EXTENSIONS].some(ext => lower.endsWith(ext));
|
|
});
|
|
|
|
for (const binFile of binaryFiles) {
|
|
// Find which commit added it
|
|
let addCommit = 'unknown';
|
|
try {
|
|
addCommit = git(`log --diff-filter=A --format="%H %ae %ai" -- "${binFile}"`, targetPath)
|
|
.split('\n')[0] || 'unknown';
|
|
} catch {
|
|
// non-fatal
|
|
}
|
|
|
|
const shortHash = addCommit.split(' ')[0].slice(0, 8);
|
|
const author = addCommit.split(' ')[1] || 'unknown';
|
|
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.LOW,
|
|
title: `Binary file added in recent commits: ${binFile}`,
|
|
description:
|
|
`Binary file "${binFile}" was added in the last 50 commits. ` +
|
|
'Binary files in plugin/package repositories are unusual and cannot be easily audited. ' +
|
|
'They may contain compiled malware, encoded payloads, or native modules with backdoors.',
|
|
file: binFile,
|
|
evidence: `Added in commit ${shortHash} by ${author}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Verify the necessity of "${binFile}". If it must exist, document its provenance ` +
|
|
'and provide a reproducible build process. Scan with antivirus and inspect with ' +
|
|
'strings/objdump/hexdump for suspicious embedded content.',
|
|
}));
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Category 7: Suspicious Commit Patterns
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Detect commits that add new network capabilities while modifying hook files.
|
|
* @param {string} targetPath
|
|
* @returns {object[]} findings
|
|
*/
|
|
function detectSuspiciousCommitPatterns(targetPath) {
|
|
const results = [];
|
|
|
|
let commitHashes;
|
|
try {
|
|
const raw = git(`log --format="%H" -${MAX_COMMITS}`, targetPath);
|
|
commitHashes = raw.split('\n').filter(Boolean).slice(0, 50); // check last 50
|
|
} catch {
|
|
return results;
|
|
}
|
|
|
|
for (const hash of commitHashes) {
|
|
try {
|
|
// Get commit subject and diff stat
|
|
const subject = git(`log -1 --format="%s" ${hash}`, targetPath).toLowerCase();
|
|
const isCosmeticMsg = /^(update|fix|cleanup|refactor|minor|bump|chore)/.test(subject);
|
|
|
|
if (!isCosmeticMsg) continue;
|
|
|
|
// Check if this "cosmetic" commit actually touches hooks
|
|
const changedFiles = git(`diff-tree --no-commit-id -r --name-only ${hash}`, targetPath)
|
|
.split('\n')
|
|
.filter(Boolean);
|
|
const touchesHooks = changedFiles.some(f => f.includes('hooks/') || f.includes('hook'));
|
|
|
|
if (!touchesHooks) continue;
|
|
|
|
// Check if the diff adds network patterns
|
|
let commitDiff;
|
|
try {
|
|
commitDiff = git(`show ${hash} --format=""`, targetPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
const addedInCommit = commitDiff
|
|
.split('\n')
|
|
.filter(l => l.startsWith('+') && !l.startsWith('+++'))
|
|
.join('\n');
|
|
|
|
if (!NETWORK_PATTERNS.test(addedInCommit)) continue;
|
|
|
|
const shortHash = hash.slice(0, 8);
|
|
const author = git(`log -1 --format="%ae" ${hash}`, targetPath);
|
|
const date = git(`log -1 --format="%ai" ${hash}`, targetPath);
|
|
|
|
results.push(finding({
|
|
scanner: 'GIT',
|
|
severity: SEVERITY.MEDIUM,
|
|
title: `Suspicious commit: cosmetic message hides hook+network changes (${shortHash})`,
|
|
description:
|
|
`Commit ${shortHash} has a cosmetic message ("${subject}") but modifies hook files ` +
|
|
'and introduces new network-access code. This pattern — disguising functional changes ' +
|
|
'as maintenance — is used to slip malicious hook modifications past reviewers.',
|
|
evidence: `Commit: ${shortHash} | Author: ${author} | Date: ${date} | ` +
|
|
`Message: "${subject}" | Hooks modified: ${changedFiles.filter(f => f.includes('hook')).join(', ')}`,
|
|
owasp: 'LLM03',
|
|
recommendation:
|
|
`Audit this commit in full: git show ${shortHash}. ` +
|
|
'Verify the network calls introduced are intentional and documented. ' +
|
|
'Enforce commit message policies that require meaningful descriptions for hook changes.',
|
|
}));
|
|
} catch {
|
|
// Per-commit errors are non-fatal
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main scanner export
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Scan git history of targetPath for supply chain rug pull signals.
|
|
*
|
|
* @param {string} targetPath - Absolute root path being scanned
|
|
* @param {object} discovery - File discovery result (not used directly; git commands enumerate)
|
|
* @returns {Promise<object>} - scannerResult envelope
|
|
*/
|
|
export async function scan(targetPath, discovery) {
|
|
const startMs = Date.now();
|
|
|
|
// Prerequisite: must be a git repo
|
|
if (!isGitRepo(targetPath)) {
|
|
return scannerResult(
|
|
'git-forensics',
|
|
'skipped',
|
|
[],
|
|
0,
|
|
Date.now() - startMs,
|
|
'Not a git repository — git forensics skipped',
|
|
);
|
|
}
|
|
|
|
const findings = [];
|
|
const errors = [];
|
|
|
|
// Run all detection categories, collecting errors without aborting
|
|
const categories = [
|
|
['force-push', () => detectForcePushes(targetPath)],
|
|
['description-drift', () => detectDescriptionDrift(targetPath)],
|
|
['hook-modifications', () => detectHookModifications(targetPath)],
|
|
['new-outbound-urls', () => detectNewOutboundUrls(targetPath)],
|
|
['author-changes', () => detectAuthorChanges(targetPath)],
|
|
['binary-additions', () => detectBinaryAdditions(targetPath)],
|
|
['suspicious-patterns', () => detectSuspiciousCommitPatterns(targetPath)],
|
|
];
|
|
|
|
for (const [name, fn] of categories) {
|
|
try {
|
|
const categoryFindings = fn();
|
|
findings.push(...categoryFindings);
|
|
} catch (err) {
|
|
errors.push(`${name}: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
const durationMs = Date.now() - startMs;
|
|
|
|
if (errors.length > 0 && findings.length === 0) {
|
|
// All categories failed — report as error
|
|
return scannerResult(
|
|
'git-forensics',
|
|
'error',
|
|
findings,
|
|
0,
|
|
durationMs,
|
|
`All detection categories failed: ${errors.join('; ')}`,
|
|
);
|
|
}
|
|
|
|
// Partial errors are logged but status is 'ok' if we have results
|
|
const result = scannerResult('git-forensics', 'ok', findings, 0, durationMs);
|
|
if (errors.length > 0) {
|
|
result.partial_errors = errors;
|
|
}
|
|
|
|
return result;
|
|
}
|