From 0f4b0c5f2cdf928dfa2aee09ca4257a9dfe0532d Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Thu, 30 Apr 2026 15:29:13 +0200 Subject: [PATCH] =?UTF-8?q?feat(git-clone):=20E12=20=E2=80=94=20.gitattrib?= =?UTF-8?q?utes=20filter-driver=20post-clone=20advisory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scanGitAttributes(repoDir) — pure function that parses .gitattributes after a sandboxed clone and returns the {filter,diff,merge} driver entries that would run on checkout. The clone CLI prints each entry as a "MEDIUM" stderr advisory followed by a recommendation to verify the smudge/clean command before moving the clone outside the sandbox. Why: filter drivers execute arbitrary shell during checkout (smudge runs on read, clean on write). Even with the existing sandboxed clone, downstream consumers that re-checkout files outside the sandbox can be exploited. Surfacing the directive list lets the caller decide whether to proceed. Out-of-scope: in-line content of the smudge command is not analysed — the advisory is for human review, not automatic blocking. Tests: - tests/lib/git-clone-gitattributes.test.mjs (8 cases): LFS-style, custom driver, missing/empty/comment-only files, line-number tracking, inline-comment stripping, unreadable path graceful return. --- .../llm-security/scanners/lib/git-clone.mjs | 50 +++++++- .../lib/git-clone-gitattributes.test.mjs | 113 ++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 plugins/llm-security/tests/lib/git-clone-gitattributes.test.mjs diff --git a/plugins/llm-security/scanners/lib/git-clone.mjs b/plugins/llm-security/scanners/lib/git-clone.mjs index 868cb74..7f9ef3a 100644 --- a/plugins/llm-security/scanners/lib/git-clone.mjs +++ b/plugins/llm-security/scanners/lib/git-clone.mjs @@ -5,7 +5,7 @@ // node git-clone.mjs cleanup → removes temp directory // node git-clone.mjs validate → exits 0 if valid GitHub URL, 1 if not -import { mkdtempSync, rmSync, existsSync, realpathSync } from 'node:fs'; +import { mkdtempSync, rmSync, existsSync, realpathSync, readFileSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { spawnSync } from 'node:child_process'; @@ -120,6 +120,45 @@ function buildSandboxedClone(tmpDir, gitArgs) { return { cmd: 'git', args: innerGitArgs, sandbox: null }; } +/** + * Scan a freshly cloned repo for .gitattributes filter/diff/merge directives. + * Filter drivers run on checkout via `smudge`/`clean` commands; even after the + * sandboxed clone completes, downstream consumers that check out files outside + * the sandbox can trigger arbitrary code via these drivers. Returns a list of + * advisory entries; caller is responsible for surfacing them to stderr. + * + * @param {string} repoDir Cloned-repo path + * @returns {{kind: 'filter'|'diff'|'merge', driver: string, line: number, raw: string}[]} + */ +export function scanGitAttributes(repoDir) { + const attrPath = join(repoDir, '.gitattributes'); + if (!existsSync(attrPath)) return []; + let text; + try { + text = readFileSync(attrPath, 'utf8'); + } catch { + return []; + } + const warnings = []; + const lines = text.split('\n'); + for (let i = 0; i < lines.length; i++) { + const raw = lines[i]; + const stripped = raw.replace(/#.*$/, ''); + if (!stripped.trim()) continue; + const re = /\b(filter|diff|merge)\s*=\s*(\S+)/g; + let m; + while ((m = re.exec(stripped)) !== null) { + warnings.push({ + kind: m[1], + driver: m[2], + line: i + 1, + raw: raw.trim(), + }); + } + } + return warnings; +} + // Export for testing export { GIT_SANDBOX_CONFIG, GIT_SANDBOX_ENV, buildSandboxProfile, buildBwrapArgs, @@ -188,6 +227,15 @@ switch (command) { } } + // Post-clone .gitattributes filter-driver advisory + const attrWarnings = scanGitAttributes(tmpDir); + for (const w of attrWarnings) { + console.error(`clone: MEDIUM: .gitattributes ${w.kind} driver "${w.driver}" on line ${w.line}: ${w.raw}`); + } + if (attrWarnings.length > 0) { + console.error('clone: filter drivers run on checkout — verify the smudge/clean commands before moving the clone outside the sandbox.'); + } + process.stdout.write(tmpDir + '\n'); break; } diff --git a/plugins/llm-security/tests/lib/git-clone-gitattributes.test.mjs b/plugins/llm-security/tests/lib/git-clone-gitattributes.test.mjs new file mode 100644 index 0000000..4b6c584 --- /dev/null +++ b/plugins/llm-security/tests/lib/git-clone-gitattributes.test.mjs @@ -0,0 +1,113 @@ +// git-clone-gitattributes.test.mjs — Tests for E12 .gitattributes filter-driver advisory +// Pure-function tests for scanGitAttributes(); the CLI path is exercised +// indirectly via the existing git-clone-sandbox.test.mjs suite. + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +const { scanGitAttributes } = await import('../../scanners/lib/git-clone.mjs'); + +function makeRepo(contents) { + const dir = mkdtempSync(join(tmpdir(), 'gitattr-test-')); + if (contents !== null) { + writeFileSync(join(dir, '.gitattributes'), contents); + } + return dir; +} + +describe('scanGitAttributes', () => { + it('flags filter driver directive (LFS-style)', () => { + const dir = makeRepo('*.txt filter=lfs diff=lfs merge=lfs -text\n'); + try { + const warnings = scanGitAttributes(dir); + const kinds = warnings.map(w => w.kind).sort(); + assert.deepEqual(kinds, ['diff', 'filter', 'merge']); + assert.equal(warnings[0].driver, 'lfs'); + assert.equal(warnings[0].line, 1); + assert.ok(warnings[0].raw.includes('filter=lfs')); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('flags custom filter driver', () => { + const dir = makeRepo('secrets.* filter=encrypt diff=encrypt\n'); + try { + const warnings = scanGitAttributes(dir); + assert.equal(warnings.length, 2); + assert.ok(warnings.find(w => w.kind === 'filter' && w.driver === 'encrypt')); + assert.ok(warnings.find(w => w.kind === 'diff' && w.driver === 'encrypt')); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('returns empty array when .gitattributes is absent', () => { + const dir = makeRepo(null); + try { + const warnings = scanGitAttributes(dir); + assert.deepEqual(warnings, []); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('returns empty array on empty .gitattributes', () => { + const dir = makeRepo(''); + try { + assert.deepEqual(scanGitAttributes(dir), []); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('returns empty array when only blank lines and comments', () => { + const dir = makeRepo('# comment line\n\n# filter=trap inside comment\n \n'); + try { + assert.deepEqual(scanGitAttributes(dir), []); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('ignores trailing inline comments after stripping', () => { + const dir = makeRepo('*.bin -text # filter=trap (this is a comment)\n'); + try { + assert.deepEqual(scanGitAttributes(dir), []); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('reports correct line numbers across multi-line files', () => { + const dir = makeRepo([ + '# hardening', + '* -text', + '', + '*.lfs filter=lfs', + 'docs/* diff=astextplain', + ].join('\n') + '\n'); + try { + const warnings = scanGitAttributes(dir); + const filter = warnings.find(w => w.kind === 'filter'); + const diff = warnings.find(w => w.kind === 'diff'); + assert.equal(filter.line, 4); + assert.equal(filter.driver, 'lfs'); + assert.equal(diff.line, 5); + assert.equal(diff.driver, 'astextplain'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('handles unreadable .gitattributes by returning empty array', () => { + // Pass a path that exists as file (not directory) so existsSync says yes + // but join(path, '.gitattributes') is invalid — emulates a read error + // gracefully by passing a non-directory location. + const result = scanGitAttributes('/does/not/exist/at/all'); + assert.deepEqual(result, []); + }); +});