Adds scanGitAttributes(repoDir) — pure function that parses
.gitattributes after a sandboxed clone and returns the
{filter,diff,merge} driver entries that would run on checkout. The
clone CLI prints each entry as a "MEDIUM" stderr advisory followed by
a recommendation to verify the smudge/clean command before moving the
clone outside the sandbox.
Why: filter drivers execute arbitrary shell during checkout (smudge
runs on read, clean on write). Even with the existing sandboxed clone,
downstream consumers that re-checkout files outside the sandbox can be
exploited. Surfacing the directive list lets the caller decide whether
to proceed.
Out-of-scope: in-line content of the smudge command is not analysed —
the advisory is for human review, not automatic blocking.
Tests:
- tests/lib/git-clone-gitattributes.test.mjs (8 cases): LFS-style,
custom driver, missing/empty/comment-only files, line-number
tracking, inline-comment stripping, unreadable path graceful return.
275 lines
8.7 KiB
JavaScript
275 lines
8.7 KiB
JavaScript
#!/usr/bin/env node
|
|
// git-clone.mjs — Clone GitHub repos to temp dirs for security scanning
|
|
// Usage:
|
|
// node git-clone.mjs clone <url> [--branch <name>] → sandboxed shallow clone, prints tmpdir path
|
|
// node git-clone.mjs cleanup <dir> → removes temp directory
|
|
// node git-clone.mjs validate <url> → exits 0 if valid GitHub URL, 1 if not
|
|
|
|
import { mkdtempSync, rmSync, existsSync, realpathSync, readFileSync } from 'node:fs';
|
|
import { join } from 'node:path';
|
|
import { tmpdir } from 'node:os';
|
|
import { spawnSync } from 'node:child_process';
|
|
|
|
const GITHUB_URL_RE = /^https:\/\/github\.com\/[\w.-]+\/[\w.-]+(\.git)?\/?$/;
|
|
const GITHUB_SSH_RE = /^git@github\.com:[\w.-]+\/[\w.-]+(\.git)?$/;
|
|
const MAX_CLONE_SIZE_MB = 100;
|
|
|
|
function isValidUrl(url) {
|
|
return GITHUB_URL_RE.test(url) || GITHUB_SSH_RE.test(url);
|
|
}
|
|
|
|
function parseArgs(argv) {
|
|
const args = { branch: null, positional: [] };
|
|
for (let i = 0; i < argv.length; i++) {
|
|
if (argv[i] === '--branch' && i + 1 < argv.length) {
|
|
args.branch = argv[++i];
|
|
} else {
|
|
args.positional.push(argv[i]);
|
|
}
|
|
}
|
|
return args;
|
|
}
|
|
|
|
/** Git config flags that neutralize known attack vectors */
|
|
const GIT_SANDBOX_CONFIG = [
|
|
'-c', 'core.hooksPath=/dev/null',
|
|
'-c', 'core.symlinks=false',
|
|
'-c', 'core.fsmonitor=false',
|
|
'-c', 'filter.lfs.process=',
|
|
'-c', 'filter.lfs.smudge=',
|
|
'-c', 'filter.lfs.clean=',
|
|
'-c', 'protocol.file.allow=never',
|
|
'-c', 'transfer.fsckObjects=true',
|
|
];
|
|
|
|
/** Environment that isolates git from system/user config */
|
|
const GIT_SANDBOX_ENV = {
|
|
...process.env,
|
|
GIT_CONFIG_NOSYSTEM: '1',
|
|
GIT_CONFIG_GLOBAL: '/dev/null',
|
|
GIT_ATTR_NOSYSTEM: '1',
|
|
GIT_TERMINAL_PROMPT: '0',
|
|
};
|
|
|
|
/**
|
|
* Build sandbox-exec profile restricting file writes to a single directory.
|
|
* macOS only — returns null on other platforms.
|
|
*/
|
|
function buildSandboxProfile(allowedWritePath) {
|
|
if (process.platform !== 'darwin') return null;
|
|
const check = spawnSync('which', ['sandbox-exec'], { encoding: 'utf8' });
|
|
if (check.status !== 0) return null;
|
|
|
|
const realPath = realpathSync(allowedWritePath);
|
|
return [
|
|
'(version 1)',
|
|
'(allow default)',
|
|
'(deny file-write*)',
|
|
`(allow file-write* (subpath "${realPath}"))`,
|
|
'(allow file-write* (literal "/dev/null"))',
|
|
'(allow file-write* (literal "/dev/tty"))',
|
|
].join('');
|
|
}
|
|
|
|
/**
|
|
* Build bwrap args restricting writes to a single directory.
|
|
* Linux only — returns null if bwrap is not installed or fails.
|
|
*/
|
|
function buildBwrapArgs(allowedWritePath, innerArgs) {
|
|
if (process.platform !== 'linux') return null;
|
|
const check = spawnSync('which', ['bwrap'], { encoding: 'utf8' });
|
|
if (check.status !== 0) return null;
|
|
|
|
// Test that bwrap actually works (fails on Ubuntu 24.04+ without admin config)
|
|
const probe = spawnSync('bwrap', ['--ro-bind', '/', '/', '--dev', '/dev', '/bin/true'], {
|
|
stdio: 'ignore', timeout: 5000,
|
|
});
|
|
if (probe.status !== 0) return null;
|
|
|
|
return [
|
|
'--ro-bind', '/', '/', // read-only root
|
|
'--bind', allowedWritePath, allowedWritePath, // writable clone dir
|
|
'--dev', '/dev', // /dev/null etc.
|
|
'--unshare-all', // isolate namespaces
|
|
'--new-session', // prevent tty hijack
|
|
'--die-with-parent', // cleanup on parent exit
|
|
...innerArgs,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Build the full sandboxed command + args for the current platform.
|
|
* Returns { cmd, args } — either wrapped in sandbox or plain git.
|
|
*/
|
|
function buildSandboxedClone(tmpDir, gitArgs) {
|
|
const innerGitArgs = [...GIT_SANDBOX_CONFIG, ...gitArgs];
|
|
|
|
// macOS: sandbox-exec
|
|
const profile = buildSandboxProfile(tmpDir);
|
|
if (profile) {
|
|
return { cmd: 'sandbox-exec', args: ['-p', profile, 'git', ...innerGitArgs], sandbox: 'sandbox-exec' };
|
|
}
|
|
|
|
// Linux: bwrap
|
|
const bwrapArgs = buildBwrapArgs(tmpDir, ['git', ...innerGitArgs]);
|
|
if (bwrapArgs) {
|
|
return { cmd: 'bwrap', args: bwrapArgs, sandbox: 'bwrap' };
|
|
}
|
|
|
|
// Fallback: git with config flags only
|
|
return { cmd: 'git', args: innerGitArgs, sandbox: null };
|
|
}
|
|
|
|
/**
|
|
* Scan a freshly cloned repo for .gitattributes filter/diff/merge directives.
|
|
* Filter drivers run on checkout via `smudge`/`clean` commands; even after the
|
|
* sandboxed clone completes, downstream consumers that check out files outside
|
|
* the sandbox can trigger arbitrary code via these drivers. Returns a list of
|
|
* advisory entries; caller is responsible for surfacing them to stderr.
|
|
*
|
|
* @param {string} repoDir Cloned-repo path
|
|
* @returns {{kind: 'filter'|'diff'|'merge', driver: string, line: number, raw: string}[]}
|
|
*/
|
|
export function scanGitAttributes(repoDir) {
|
|
const attrPath = join(repoDir, '.gitattributes');
|
|
if (!existsSync(attrPath)) return [];
|
|
let text;
|
|
try {
|
|
text = readFileSync(attrPath, 'utf8');
|
|
} catch {
|
|
return [];
|
|
}
|
|
const warnings = [];
|
|
const lines = text.split('\n');
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const raw = lines[i];
|
|
const stripped = raw.replace(/#.*$/, '');
|
|
if (!stripped.trim()) continue;
|
|
const re = /\b(filter|diff|merge)\s*=\s*(\S+)/g;
|
|
let m;
|
|
while ((m = re.exec(stripped)) !== null) {
|
|
warnings.push({
|
|
kind: m[1],
|
|
driver: m[2],
|
|
line: i + 1,
|
|
raw: raw.trim(),
|
|
});
|
|
}
|
|
}
|
|
return warnings;
|
|
}
|
|
|
|
// Export for testing
|
|
export {
|
|
GIT_SANDBOX_CONFIG, GIT_SANDBOX_ENV, buildSandboxProfile, buildBwrapArgs,
|
|
buildSandboxedClone, MAX_CLONE_SIZE_MB,
|
|
};
|
|
|
|
// CLI entry point — only run when invoked directly
|
|
import { fileURLToPath } from 'node:url';
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const isDirectRun = process.argv[1] === __filename;
|
|
|
|
if (isDirectRun) {
|
|
|
|
const [,, command, ...rest] = process.argv;
|
|
|
|
switch (command) {
|
|
case 'clone': {
|
|
const { branch, positional } = parseArgs(rest);
|
|
const url = positional[0];
|
|
|
|
if (!url) {
|
|
console.error('clone: URL required');
|
|
process.exit(1);
|
|
}
|
|
|
|
if (!isValidUrl(url)) {
|
|
console.error(`clone: invalid GitHub URL: ${url}`);
|
|
console.error('Supported: https://github.com/user/repo or git@github.com:user/repo.git');
|
|
process.exit(1);
|
|
}
|
|
|
|
const tmpDir = mkdtempSync(join(tmpdir(), 'llm-sec-'));
|
|
const gitArgs = ['clone', '--depth', '1'];
|
|
if (branch) gitArgs.push('--branch', branch);
|
|
gitArgs.push(url, tmpDir);
|
|
|
|
// Build sandboxed clone command (macOS: sandbox-exec, Linux: bwrap, fallback: git only)
|
|
const { cmd: cloneCmd, args: cloneArgs, sandbox } = buildSandboxedClone(tmpDir, gitArgs);
|
|
|
|
if (!sandbox) {
|
|
console.error('clone: WARN: no OS sandbox available, running with git config hardening only');
|
|
}
|
|
|
|
const result = spawnSync(cloneCmd, cloneArgs, {
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
timeout: 60_000,
|
|
env: GIT_SANDBOX_ENV,
|
|
});
|
|
|
|
if (result.status !== 0) {
|
|
// Clean up on failure
|
|
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
const stderr = result.stderr?.toString().trim() || 'unknown error';
|
|
console.error(`clone: git clone failed: ${stderr}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Post-clone size check
|
|
const duResult = spawnSync('du', ['-sm', tmpDir], { encoding: 'utf8' });
|
|
if (duResult.status === 0) {
|
|
const sizeMb = parseInt(duResult.stdout.split('\t')[0], 10);
|
|
if (sizeMb > MAX_CLONE_SIZE_MB) {
|
|
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
console.error(`clone: repo too large (${sizeMb}MB, max ${MAX_CLONE_SIZE_MB}MB)`);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Post-clone .gitattributes filter-driver advisory
|
|
const attrWarnings = scanGitAttributes(tmpDir);
|
|
for (const w of attrWarnings) {
|
|
console.error(`clone: MEDIUM: .gitattributes ${w.kind} driver "${w.driver}" on line ${w.line}: ${w.raw}`);
|
|
}
|
|
if (attrWarnings.length > 0) {
|
|
console.error('clone: filter drivers run on checkout — verify the smudge/clean commands before moving the clone outside the sandbox.');
|
|
}
|
|
|
|
process.stdout.write(tmpDir + '\n');
|
|
break;
|
|
}
|
|
|
|
case 'cleanup': {
|
|
const dir = rest[0];
|
|
if (!dir) {
|
|
console.error('cleanup: directory path required');
|
|
process.exit(1);
|
|
}
|
|
// Safety: only remove paths in system temp directory
|
|
const tmp = tmpdir();
|
|
if (!dir.startsWith(tmp)) {
|
|
console.error(`cleanup: refusing to remove path outside tmpdir: ${dir}`);
|
|
process.exit(1);
|
|
}
|
|
if (existsSync(dir)) {
|
|
rmSync(dir, { recursive: true, force: true });
|
|
process.stdout.write(`Removed ${dir}\n`);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 'validate': {
|
|
const url = rest[0];
|
|
if (!url || !isValidUrl(url)) {
|
|
process.exit(1);
|
|
}
|
|
process.exit(0);
|
|
}
|
|
|
|
default:
|
|
console.error('Usage: node git-clone.mjs <clone|cleanup|validate> [args...]');
|
|
process.exit(1);
|
|
}
|
|
|
|
} // end isDirectRun
|