feat: initial open marketplace with llm-security, config-audit, ultraplan-local
This commit is contained in:
commit
f93d6abdae
380 changed files with 65935 additions and 0 deletions
743
plugins/llm-security/scanners/git-forensics.mjs
Normal file
743
plugins/llm-security/scanners/git-forensics.mjs
Normal file
|
|
@ -0,0 +1,743 @@
|
|||
// git-forensics.mjs — Deterministic git history forensics scanner
|
||||
// Detects supply chain rug pull signals: force pushes, description drift,
|
||||
// hook modifications, new outbound URLs, author changes, binary additions,
|
||||
// and suspicious commit patterns.
|
||||
//
|
||||
// Zero external dependencies — Node.js builtins only.
|
||||
// OWASP coverage: LLM03 (Supply Chain)
|
||||
|
||||
import { finding, scannerResult } from './lib/output.mjs';
|
||||
import { SEVERITY } from './lib/severity.mjs';
|
||||
import { levenshtein } from './lib/string-utils.mjs';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MAX_COMMITS = 500;
|
||||
const GIT_TIMEOUT_MS = 15000;
|
||||
const MAX_DRIFT_FILES = 20;
|
||||
|
||||
/** Domains strongly associated with exfiltration or ephemeral endpoints */
|
||||
const SUSPICIOUS_DOMAINS = [
|
||||
'webhook.site',
|
||||
'requestbin',
|
||||
'ngrok',
|
||||
'ngrok.io',
|
||||
'pipedream.net',
|
||||
'pastebin.com',
|
||||
'hastebin.com',
|
||||
'beeceptor.com',
|
||||
'hookbin.com',
|
||||
'httpbin.org',
|
||||
'canarytokens.com',
|
||||
];
|
||||
|
||||
/** Binary file extensions unusual in a plugin/package repo */
|
||||
const BINARY_EXTENSIONS = new Set([
|
||||
'.exe', '.dll', '.so', '.dylib', '.bin', '.dat',
|
||||
'.wasm', '.node',
|
||||
]);
|
||||
|
||||
/** Network-access patterns in source code (hooks/scripts concern) */
|
||||
const NETWORK_PATTERNS = /\b(fetch|http|https|curl|wget|dns\.lookup|net\.connect|XMLHttpRequest|axios|got)\b/i;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: run a git command with standard options
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Run a git command in the target directory.
|
||||
* @param {string} cmd - Git command (without 'git' prefix) or full command
|
||||
* @param {string} cwd - Working directory
|
||||
* @returns {string} - stdout string, trimmed
|
||||
* @throws - On non-zero exit or timeout
|
||||
*/
|
||||
function git(cmd, cwd) {
|
||||
return execSync(`git ${cmd}`, {
|
||||
cwd,
|
||||
timeout: GIT_TIMEOUT_MS,
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}).trim();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Git repo detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Determine if targetPath is inside a git repository.
|
||||
* First checks for .git directory (top-level), then tries git rev-parse.
|
||||
* @param {string} targetPath
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function isGitRepo(targetPath) {
|
||||
if (existsSync(join(targetPath, '.git'))) return true;
|
||||
try {
|
||||
git('rev-parse --git-dir', targetPath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 1: Force Push Detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect force push signals in reflog.
|
||||
* Looks for "reset" entries and "forced-update" in walk-reflogs.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectForcePushes(targetPath) {
|
||||
const findings = [];
|
||||
|
||||
// Check reflog for reset entries (local force push evidence)
|
||||
try {
|
||||
const reflog = git("reflog --format='%H %gD %gs' -n 500", targetPath);
|
||||
const lines = reflog.split('\n').filter(Boolean);
|
||||
const resetLines = lines.filter(l => l.includes('reset:') || l.includes('reset'));
|
||||
|
||||
if (resetLines.length > 0) {
|
||||
const examples = resetLines.slice(0, 3).map(l => l.slice(0, 80)).join(' | ');
|
||||
findings.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.HIGH,
|
||||
title: 'Force push signal: reflog contains reset entries',
|
||||
description:
|
||||
`Reflog contains ${resetLines.length} reset entry/entries. ` +
|
||||
'git reset --hard in a shared repo indicates history was rewritten, ' +
|
||||
'which is the mechanism used in rug pull attacks to swap legitimate code ' +
|
||||
'with malicious content after trust is established.',
|
||||
evidence: `${resetLines.length} reset entries. Examples: ${examples}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
'Review what was changed in the rewritten history. Compare the pre-reset ' +
|
||||
'commit (visible in reflog) with the current HEAD to identify removed content.',
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// reflog unavailable — not fatal
|
||||
}
|
||||
|
||||
// Check walk-reflogs for forced-update
|
||||
try {
|
||||
const walkLog = git('log --walk-reflogs --format="%H %gD %gs" -n 200', targetPath);
|
||||
const forcedLines = walkLog.split('\n').filter(l => l.includes('forced-update'));
|
||||
|
||||
if (forcedLines.length > 0) {
|
||||
const shortHash = forcedLines[0].split(' ')[0].slice(0, 8);
|
||||
findings.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.HIGH,
|
||||
title: 'Force push signal: forced-update entries in walk-reflogs',
|
||||
description:
|
||||
`Found ${forcedLines.length} forced-update entry/entries in reflog walk. ` +
|
||||
'Forced updates overwrite remote history non-fast-forward, a classic rug pull vector.',
|
||||
evidence: `${forcedLines.length} forced-update entries; first at commit ${shortHash}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
'Audit the commits immediately before and after each forced-update. ' +
|
||||
'Pin the plugin to a specific commit hash rather than a branch reference.',
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// walk-reflogs may fail in shallow clones
|
||||
}
|
||||
|
||||
return findings;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 2: Description Drift
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract the description field from YAML frontmatter in a string.
|
||||
* Handles both single-line and block scalar (|) styles.
|
||||
* @param {string} content
|
||||
* @returns {string | null}
|
||||
*/
|
||||
function extractDescription(content) {
|
||||
const fmMatch = content.match(/^---[\r\n]([\s\S]*?)[\r\n]---/);
|
||||
if (!fmMatch) return null;
|
||||
const block = fmMatch[1];
|
||||
|
||||
// Single-line: description: some text
|
||||
const singleLine = block.match(/^description:\s*(.+)$/m);
|
||||
if (singleLine && singleLine[1].trim() !== '|' && singleLine[1].trim() !== '>') {
|
||||
return singleLine[1].trim().replace(/^['"]|['"]$/g, '');
|
||||
}
|
||||
|
||||
// Block scalar: description: |
|
||||
const blockScalar = block.match(/^description:\s*[|>][\r\n]((?:[ \t]+.+[\r\n]?)*)/m);
|
||||
if (blockScalar) {
|
||||
return blockScalar[1]
|
||||
.split('\n')
|
||||
.map(l => l.replace(/^[ \t]{2}/, ''))
|
||||
.join('\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect significant description changes in commands/ and agents/ files.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectDescriptionDrift(targetPath) {
|
||||
const results = [];
|
||||
|
||||
// List tracked files matching commands/*.md or agents/*.md
|
||||
let trackedFiles;
|
||||
try {
|
||||
const raw = git('ls-files -- "commands/*.md" "agents/*.md"', targetPath);
|
||||
trackedFiles = raw.split('\n').filter(Boolean).slice(0, MAX_DRIFT_FILES);
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
for (const relFile of trackedFiles) {
|
||||
try {
|
||||
// Find the commit that first added this file
|
||||
const addHash = git(`log --diff-filter=A --format='%H' -- "${relFile}"`, targetPath)
|
||||
.split('\n')
|
||||
.filter(Boolean)
|
||||
.pop(); // oldest = last in log output (reverse chrono)
|
||||
|
||||
if (!addHash) continue;
|
||||
|
||||
const shortAddHash = addHash.slice(0, 8);
|
||||
|
||||
// Get initial content at that commit
|
||||
let initialContent;
|
||||
try {
|
||||
initialContent = git(`show ${addHash}:${relFile}`, targetPath);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get current content
|
||||
let currentContent;
|
||||
try {
|
||||
currentContent = git(`show HEAD:${relFile}`, targetPath);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const initialDesc = extractDescription(initialContent);
|
||||
const currentDesc = extractDescription(currentContent);
|
||||
|
||||
if (!initialDesc || !currentDesc) continue;
|
||||
if (initialDesc === currentDesc) continue;
|
||||
|
||||
const dist = levenshtein(initialDesc, currentDesc);
|
||||
const threshold = Math.ceil(initialDesc.length * 0.20);
|
||||
|
||||
if (dist > threshold) {
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.MEDIUM,
|
||||
title: `Description drift detected: ${relFile}`,
|
||||
description:
|
||||
`The description in "${relFile}" has changed significantly since its initial commit (${shortAddHash}). ` +
|
||||
`Edit distance: ${dist} characters (threshold: ${threshold}, 20% of original length ${initialDesc.length}). ` +
|
||||
'Substantial description changes can indicate purpose drift or an attempt to ' +
|
||||
'misrepresent what an agent/command does after users have trusted it.',
|
||||
file: relFile,
|
||||
evidence:
|
||||
`Initial (${shortAddHash}): "${initialDesc.slice(0, 80)}${initialDesc.length > 80 ? '…' : ''}" | ` +
|
||||
`Current: "${currentDesc.slice(0, 80)}${currentDesc.length > 80 ? '…' : ''}" | ` +
|
||||
`Levenshtein distance: ${dist}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
'Review the description change history: ' +
|
||||
`git log -p -- "${relFile}". ` +
|
||||
'Verify the new description accurately represents current behavior.',
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// Per-file errors are non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 3: Hook Modification After Initial Commit
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect suspicious hook file modification patterns.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectHookModifications(targetPath) {
|
||||
const results = [];
|
||||
|
||||
let hookFiles;
|
||||
try {
|
||||
const raw = git('ls-files -- "hooks/scripts/*"', targetPath);
|
||||
hookFiles = raw.split('\n').filter(Boolean);
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
for (const relFile of hookFiles) {
|
||||
try {
|
||||
// Count total commits touching this file
|
||||
const logLines = git(`log --oneline -- "${relFile}"`, targetPath)
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
const modCount = logLines.length;
|
||||
|
||||
if (modCount <= 1) continue; // Only the initial commit — clean
|
||||
|
||||
// Check if latest diff adds network calls
|
||||
let latestDiff = '';
|
||||
try {
|
||||
latestDiff = git(`diff HEAD~1 HEAD -- "${relFile}"`, targetPath);
|
||||
} catch {
|
||||
// HEAD~1 may not exist (single commit repo after first mod)
|
||||
}
|
||||
|
||||
const addedLines = latestDiff
|
||||
.split('\n')
|
||||
.filter(l => l.startsWith('+') && !l.startsWith('+++'));
|
||||
const addedContent = addedLines.join('\n');
|
||||
const addsNetwork = NETWORK_PATTERNS.test(addedContent);
|
||||
|
||||
if (modCount > 1 && addsNetwork) {
|
||||
const shortHash = logLines[0].split(' ')[0];
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.HIGH,
|
||||
title: `Hook modified with new network capability: ${relFile}`,
|
||||
description:
|
||||
`Hook script "${relFile}" was modified ${modCount} time(s) and the latest change ` +
|
||||
`adds outbound network calls (fetch/http/curl/wget/etc.). ` +
|
||||
'Hook scripts run automatically with full filesystem access — adding network calls ' +
|
||||
'post-initial-commit is a strong rug pull indicator (exfiltration vector).',
|
||||
file: relFile,
|
||||
evidence: `${modCount} modifications; latest commit: ${shortHash}; network pattern detected in diff`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
`Audit: git log -p -- "${relFile}". ` +
|
||||
'Pin hook files to trusted commits. Review what data the network calls access.',
|
||||
}));
|
||||
} else if (modCount > 3) {
|
||||
const shortHash = logLines[0].split(' ')[0];
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.MEDIUM,
|
||||
title: `Hook script modified frequently: ${relFile}`,
|
||||
description:
|
||||
`Hook script "${relFile}" has been modified ${modCount} times. ` +
|
||||
'Frequent modifications to hook scripts are unusual and warrant review — ' +
|
||||
'hooks run automatically and are a high-value target for supply chain attacks.',
|
||||
file: relFile,
|
||||
evidence: `${modCount} commits modify this file; latest: ${shortHash}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
`Review all hook changes: git log -p -- "${relFile}". ` +
|
||||
'Ensure each modification has a clear, legitimate purpose.',
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// Per-file errors are non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 4: New Outbound URLs Post-Initial Commit
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract unique hostnames from URLs in a text block.
|
||||
* @param {string} text
|
||||
* @returns {Set<string>}
|
||||
*/
|
||||
function extractHostnames(text) {
|
||||
const hosts = new Set();
|
||||
const urlRe = /https?:\/\/([a-zA-Z0-9.-]+)/g;
|
||||
let m;
|
||||
while ((m = urlRe.exec(text)) !== null) {
|
||||
hosts.add(m[1].toLowerCase());
|
||||
}
|
||||
return hosts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect new outbound URLs added in recent commits not present at initial commit.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectNewOutboundUrls(targetPath) {
|
||||
const results = [];
|
||||
|
||||
// Get initial commit hash
|
||||
let initialHash;
|
||||
try {
|
||||
initialHash = git('rev-list --max-parents=0 HEAD', targetPath).split('\n')[0].trim();
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Get all URLs present in initial commit (full tree)
|
||||
let initialUrls = new Set();
|
||||
try {
|
||||
const initialContent = git(`show ${initialHash}:`, targetPath);
|
||||
// This lists files — we need content. Use git grep on the initial tree.
|
||||
const initialGrep = git(`grep -r "https\\?://" ${initialHash}`, targetPath);
|
||||
initialUrls = extractHostnames(initialGrep);
|
||||
} catch {
|
||||
// Fallback: grep the initial commit diff itself
|
||||
try {
|
||||
const initDiff = git(`show ${initialHash}`, targetPath);
|
||||
initialUrls = extractHostnames(initDiff);
|
||||
} catch {
|
||||
// Cannot determine initial URLs — skip
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
// Get diff of last 50 commits (added lines only)
|
||||
let recentDiff = '';
|
||||
try {
|
||||
recentDiff = git(`log -50 --format='' -p`, targetPath);
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
// Parse added lines from the diff
|
||||
const addedLines = recentDiff
|
||||
.split('\n')
|
||||
.filter(l => l.startsWith('+') && !l.startsWith('+++'));
|
||||
const addedContent = addedLines.join('\n');
|
||||
|
||||
const addedHostnames = extractHostnames(addedContent);
|
||||
const newHostnames = [...addedHostnames].filter(h => !initialUrls.has(h));
|
||||
|
||||
for (const host of newHostnames) {
|
||||
const isSuspicious = SUSPICIOUS_DOMAINS.some(d => host === d || host.endsWith(`.${d}`));
|
||||
const sev = isSuspicious ? SEVERITY.HIGH : SEVERITY.MEDIUM;
|
||||
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: sev,
|
||||
title: isSuspicious
|
||||
? `Suspicious exfiltration endpoint added post-initial-commit: ${host}`
|
||||
: `New outbound domain added in recent commits: ${host}`,
|
||||
description: isSuspicious
|
||||
? `Domain "${host}" was added in recent commits and matches known exfiltration/ephemeral ` +
|
||||
'endpoint patterns (webhook.site, requestbin, ngrok, pipedream, pastebin, etc.). ' +
|
||||
'This is a high-confidence rug pull indicator — these services receive arbitrary HTTP requests.'
|
||||
: `Domain "${host}" appears in recent commits but was not present at initial commit. ` +
|
||||
'New outbound connections introduced after trust establishment warrant review.',
|
||||
evidence: `New domain: ${host}; not present in initial commit (${initialHash.slice(0, 8)})`,
|
||||
owasp: 'LLM03',
|
||||
recommendation: isSuspicious
|
||||
? `Remove all references to "${host}" immediately and audit what data was sent. ` +
|
||||
'This domain pattern is used exclusively for receiving exfiltrated data.'
|
||||
: `Verify the purpose of "${host}". If legitimate, document it in README. ` +
|
||||
'If unexpected, this may indicate a compromised dependency or injected code.',
|
||||
}));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 5: Author/Email Changes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect suspicious author diversity in repository history.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectAuthorChanges(targetPath) {
|
||||
const results = [];
|
||||
|
||||
let emailList;
|
||||
try {
|
||||
emailList = git('log --format="%ae"', targetPath).split('\n').filter(Boolean);
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
const totalCommits = emailList.length;
|
||||
const uniqueEmails = new Set(emailList);
|
||||
const uniqueCount = uniqueEmails.size;
|
||||
|
||||
// Flag: many distinct emails in a small repo
|
||||
if (uniqueCount > 3 && totalCommits < 50) {
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.MEDIUM,
|
||||
title: 'High author diversity in small repository',
|
||||
description:
|
||||
`Repository has ${uniqueCount} distinct commit author email(s) across only ${totalCommits} ` +
|
||||
'commit(s). High author diversity in a small plugin/package repo can indicate ' +
|
||||
'that multiple unrelated parties have committed (e.g., compromised maintainer account, ' +
|
||||
'supply chain injection via PR merge with altered identity).',
|
||||
evidence: `${uniqueCount} unique emails in ${totalCommits} commits: ${[...uniqueEmails].join(', ')}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
'Verify each commit author is a known, trusted contributor. ' +
|
||||
'Check for commits from unfamiliar email domains or auto-generated addresses.',
|
||||
}));
|
||||
}
|
||||
|
||||
// Flag: mid-history author change (compare first commit author to later commits)
|
||||
try {
|
||||
const allAuthors = git('log --reverse --format="%ae"', targetPath);
|
||||
const firstAuthor = allAuthors.split('\n')[0].trim();
|
||||
const laterAuthors = emailList.slice(0, -1); // all except the oldest (last in desc order)
|
||||
const newAuthors = laterAuthors.filter(e => e !== firstAuthor);
|
||||
const newAuthorSet = new Set(newAuthors);
|
||||
|
||||
if (newAuthorSet.size > 0) {
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.INFO,
|
||||
title: 'Author change mid-history',
|
||||
description:
|
||||
`Repository was initially committed by "${firstAuthor}" but later commits use ` +
|
||||
`${newAuthorSet.size} different author email(s). This is normal for collaborative ` +
|
||||
'projects but worth noting for single-author plugins.',
|
||||
evidence: `Original author: ${firstAuthor}; subsequent authors: ${[...newAuthorSet].slice(0, 5).join(', ')}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
'Verify all contributing authors are known and trusted. ' +
|
||||
'For single-maintainer plugins, unexpected author changes warrant investigation.',
|
||||
}));
|
||||
}
|
||||
} catch {
|
||||
// git log may fail on some platforms — non-fatal
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 6: Binary File Additions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect unusual binary files added in recent commits.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectBinaryAdditions(targetPath) {
|
||||
const results = [];
|
||||
|
||||
let addedFiles;
|
||||
try {
|
||||
const raw = git('log --diff-filter=A --name-only --format="" -50', targetPath);
|
||||
addedFiles = raw.split('\n').filter(Boolean);
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
const binaryFiles = addedFiles.filter(f => {
|
||||
const lower = f.toLowerCase();
|
||||
return [...BINARY_EXTENSIONS].some(ext => lower.endsWith(ext));
|
||||
});
|
||||
|
||||
for (const binFile of binaryFiles) {
|
||||
// Find which commit added it
|
||||
let addCommit = 'unknown';
|
||||
try {
|
||||
addCommit = git(`log --diff-filter=A --format="%H %ae %ai" -- "${binFile}"`, targetPath)
|
||||
.split('\n')[0] || 'unknown';
|
||||
} catch {
|
||||
// non-fatal
|
||||
}
|
||||
|
||||
const shortHash = addCommit.split(' ')[0].slice(0, 8);
|
||||
const author = addCommit.split(' ')[1] || 'unknown';
|
||||
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.LOW,
|
||||
title: `Binary file added in recent commits: ${binFile}`,
|
||||
description:
|
||||
`Binary file "${binFile}" was added in the last 50 commits. ` +
|
||||
'Binary files in plugin/package repositories are unusual and cannot be easily audited. ' +
|
||||
'They may contain compiled malware, encoded payloads, or native modules with backdoors.',
|
||||
file: binFile,
|
||||
evidence: `Added in commit ${shortHash} by ${author}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
`Verify the necessity of "${binFile}". If it must exist, document its provenance ` +
|
||||
'and provide a reproducible build process. Scan with antivirus and inspect with ' +
|
||||
'strings/objdump/hexdump for suspicious embedded content.',
|
||||
}));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Category 7: Suspicious Commit Patterns
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Detect commits that add new network capabilities while modifying hook files.
|
||||
* @param {string} targetPath
|
||||
* @returns {object[]} findings
|
||||
*/
|
||||
function detectSuspiciousCommitPatterns(targetPath) {
|
||||
const results = [];
|
||||
|
||||
let commitHashes;
|
||||
try {
|
||||
const raw = git(`log --format="%H" -${MAX_COMMITS}`, targetPath);
|
||||
commitHashes = raw.split('\n').filter(Boolean).slice(0, 50); // check last 50
|
||||
} catch {
|
||||
return results;
|
||||
}
|
||||
|
||||
for (const hash of commitHashes) {
|
||||
try {
|
||||
// Get commit subject and diff stat
|
||||
const subject = git(`log -1 --format="%s" ${hash}`, targetPath).toLowerCase();
|
||||
const isCosmeticMsg = /^(update|fix|cleanup|refactor|minor|bump|chore)/.test(subject);
|
||||
|
||||
if (!isCosmeticMsg) continue;
|
||||
|
||||
// Check if this "cosmetic" commit actually touches hooks
|
||||
const changedFiles = git(`diff-tree --no-commit-id -r --name-only ${hash}`, targetPath)
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
const touchesHooks = changedFiles.some(f => f.includes('hooks/') || f.includes('hook'));
|
||||
|
||||
if (!touchesHooks) continue;
|
||||
|
||||
// Check if the diff adds network patterns
|
||||
let commitDiff;
|
||||
try {
|
||||
commitDiff = git(`show ${hash} --format=""`, targetPath);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const addedInCommit = commitDiff
|
||||
.split('\n')
|
||||
.filter(l => l.startsWith('+') && !l.startsWith('+++'))
|
||||
.join('\n');
|
||||
|
||||
if (!NETWORK_PATTERNS.test(addedInCommit)) continue;
|
||||
|
||||
const shortHash = hash.slice(0, 8);
|
||||
const author = git(`log -1 --format="%ae" ${hash}`, targetPath);
|
||||
const date = git(`log -1 --format="%ai" ${hash}`, targetPath);
|
||||
|
||||
results.push(finding({
|
||||
scanner: 'GIT',
|
||||
severity: SEVERITY.MEDIUM,
|
||||
title: `Suspicious commit: cosmetic message hides hook+network changes (${shortHash})`,
|
||||
description:
|
||||
`Commit ${shortHash} has a cosmetic message ("${subject}") but modifies hook files ` +
|
||||
'and introduces new network-access code. This pattern — disguising functional changes ' +
|
||||
'as maintenance — is used to slip malicious hook modifications past reviewers.',
|
||||
evidence: `Commit: ${shortHash} | Author: ${author} | Date: ${date} | ` +
|
||||
`Message: "${subject}" | Hooks modified: ${changedFiles.filter(f => f.includes('hook')).join(', ')}`,
|
||||
owasp: 'LLM03',
|
||||
recommendation:
|
||||
`Audit this commit in full: git show ${shortHash}. ` +
|
||||
'Verify the network calls introduced are intentional and documented. ' +
|
||||
'Enforce commit message policies that require meaningful descriptions for hook changes.',
|
||||
}));
|
||||
} catch {
|
||||
// Per-commit errors are non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main scanner export
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scan git history of targetPath for supply chain rug pull signals.
|
||||
*
|
||||
* @param {string} targetPath - Absolute root path being scanned
|
||||
* @param {object} discovery - File discovery result (not used directly; git commands enumerate)
|
||||
* @returns {Promise<object>} - scannerResult envelope
|
||||
*/
|
||||
export async function scan(targetPath, discovery) {
|
||||
const startMs = Date.now();
|
||||
|
||||
// Prerequisite: must be a git repo
|
||||
if (!isGitRepo(targetPath)) {
|
||||
return scannerResult(
|
||||
'git-forensics',
|
||||
'skipped',
|
||||
[],
|
||||
0,
|
||||
Date.now() - startMs,
|
||||
'Not a git repository — git forensics skipped',
|
||||
);
|
||||
}
|
||||
|
||||
const findings = [];
|
||||
const errors = [];
|
||||
|
||||
// Run all detection categories, collecting errors without aborting
|
||||
const categories = [
|
||||
['force-push', () => detectForcePushes(targetPath)],
|
||||
['description-drift', () => detectDescriptionDrift(targetPath)],
|
||||
['hook-modifications', () => detectHookModifications(targetPath)],
|
||||
['new-outbound-urls', () => detectNewOutboundUrls(targetPath)],
|
||||
['author-changes', () => detectAuthorChanges(targetPath)],
|
||||
['binary-additions', () => detectBinaryAdditions(targetPath)],
|
||||
['suspicious-patterns', () => detectSuspiciousCommitPatterns(targetPath)],
|
||||
];
|
||||
|
||||
for (const [name, fn] of categories) {
|
||||
try {
|
||||
const categoryFindings = fn();
|
||||
findings.push(...categoryFindings);
|
||||
} catch (err) {
|
||||
errors.push(`${name}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const durationMs = Date.now() - startMs;
|
||||
|
||||
if (errors.length > 0 && findings.length === 0) {
|
||||
// All categories failed — report as error
|
||||
return scannerResult(
|
||||
'git-forensics',
|
||||
'error',
|
||||
findings,
|
||||
0,
|
||||
durationMs,
|
||||
`All detection categories failed: ${errors.join('; ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Partial errors are logged but status is 'ok' if we have results
|
||||
const result = scannerResult('git-forensics', 'ok', findings, 0, durationMs);
|
||||
if (errors.length > 0) {
|
||||
result.partial_errors = errors;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue