feat(git-clone): E12 — .gitattributes filter-driver post-clone advisory

Adds scanGitAttributes(repoDir) — pure function that parses
.gitattributes after a sandboxed clone and returns the
{filter,diff,merge} driver entries that would run on checkout. The
clone CLI prints each entry as a "MEDIUM" stderr advisory followed by
a recommendation to verify the smudge/clean command before moving the
clone outside the sandbox.

Why: filter drivers execute arbitrary shell during checkout (smudge
runs on read, clean on write). Even with the existing sandboxed clone,
downstream consumers that re-checkout files outside the sandbox can be
exploited. Surfacing the directive list lets the caller decide whether
to proceed.

Out-of-scope: in-line content of the smudge command is not analysed —
the advisory is for human review, not automatic blocking.

Tests:
- tests/lib/git-clone-gitattributes.test.mjs (8 cases): LFS-style,
  custom driver, missing/empty/comment-only files, line-number
  tracking, inline-comment stripping, unreadable path graceful return.
This commit is contained in:
Kjell Tore Guttormsen 2026-04-30 15:29:13 +02:00
commit 0f4b0c5f2c
2 changed files with 162 additions and 1 deletions

View file

@ -5,7 +5,7 @@
// node git-clone.mjs cleanup <dir> → removes temp directory
// node git-clone.mjs validate <url> → exits 0 if valid GitHub URL, 1 if not
import { mkdtempSync, rmSync, existsSync, realpathSync } from 'node:fs';
import { mkdtempSync, rmSync, existsSync, realpathSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { spawnSync } from 'node:child_process';
@ -120,6 +120,45 @@ function buildSandboxedClone(tmpDir, gitArgs) {
return { cmd: 'git', args: innerGitArgs, sandbox: null };
}
/**
* Scan a freshly cloned repo for .gitattributes filter/diff/merge directives.
* Filter drivers run on checkout via `smudge`/`clean` commands; even after the
* sandboxed clone completes, downstream consumers that check out files outside
* the sandbox can trigger arbitrary code via these drivers. Returns a list of
* advisory entries; caller is responsible for surfacing them to stderr.
*
* @param {string} repoDir Cloned-repo path
* @returns {{kind: 'filter'|'diff'|'merge', driver: string, line: number, raw: string}[]}
*/
export function scanGitAttributes(repoDir) {
const attrPath = join(repoDir, '.gitattributes');
if (!existsSync(attrPath)) return [];
let text;
try {
text = readFileSync(attrPath, 'utf8');
} catch {
return [];
}
const warnings = [];
const lines = text.split('\n');
for (let i = 0; i < lines.length; i++) {
const raw = lines[i];
const stripped = raw.replace(/#.*$/, '');
if (!stripped.trim()) continue;
const re = /\b(filter|diff|merge)\s*=\s*(\S+)/g;
let m;
while ((m = re.exec(stripped)) !== null) {
warnings.push({
kind: m[1],
driver: m[2],
line: i + 1,
raw: raw.trim(),
});
}
}
return warnings;
}
// Export for testing
export {
GIT_SANDBOX_CONFIG, GIT_SANDBOX_ENV, buildSandboxProfile, buildBwrapArgs,
@ -188,6 +227,15 @@ switch (command) {
}
}
// Post-clone .gitattributes filter-driver advisory
const attrWarnings = scanGitAttributes(tmpDir);
for (const w of attrWarnings) {
console.error(`clone: MEDIUM: .gitattributes ${w.kind} driver "${w.driver}" on line ${w.line}: ${w.raw}`);
}
if (attrWarnings.length > 0) {
console.error('clone: filter drivers run on checkout — verify the smudge/clean commands before moving the clone outside the sandbox.');
}
process.stdout.write(tmpDir + '\n');
break;
}