feat(llm-security-copilot): port llm-security v5.1.0 to GitHub Copilot CLI
Full port of llm-security plugin for internal use on Windows with GitHub Copilot CLI. Protocol translation layer (copilot-hook-runner.mjs) normalizes Copilot camelCase I/O to Claude Code snake_case format — all original hook scripts run unmodified. - 8 hooks with protocol translation (stdin/stdout/exit code) - 18 SKILL.md skills (Agent Skills Open Standard) - 6 .agent.md agent definitions - 20 scanners + 14 scanner lib modules (unchanged) - 14 knowledge files (unchanged) - 39 test files including copilot-port-verify.mjs (17 tests) - Windows-ready: node:path, os.tmpdir(), process.execPath, no bash Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
901bf0ae12
commit
f418a8fe08
169 changed files with 37631 additions and 0 deletions
145
plugins/llm-security-copilot/scanners/lib/file-discovery.mjs
Normal file
145
plugins/llm-security-copilot/scanners/lib/file-discovery.mjs
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
// file-discovery.mjs — Walk directory tree, filter, binary detection
|
||||
// Zero dependencies (Node.js builtins only).
|
||||
|
||||
import { readdir, stat, readFile } from 'node:fs/promises';
|
||||
import { join, relative, extname } from 'node:path';
|
||||
|
||||
// Extensions we scan (text-based)
|
||||
const TEXT_EXTENSIONS = new Set([
|
||||
'.js', '.mjs', '.cjs', '.ts', '.mts', '.cts', '.jsx', '.tsx',
|
||||
'.py', '.pyw',
|
||||
'.json', '.jsonc', '.json5',
|
||||
'.yaml', '.yml',
|
||||
'.toml',
|
||||
'.md', '.mdx',
|
||||
'.sh', '.bash', '.zsh',
|
||||
'.env', '.env.local', '.env.example',
|
||||
'.cfg', '.ini', '.conf',
|
||||
'.xml', '.html', '.htm', '.svg',
|
||||
'.css', '.scss', '.less',
|
||||
'.sql',
|
||||
'.rs', '.go', '.java', '.kt', '.cs', '.c', '.cpp', '.h', '.hpp',
|
||||
'.rb', '.php', '.lua', '.swift', '.m',
|
||||
'.txt', '.csv', '.log',
|
||||
'.lock', // package-lock.json, yarn.lock, etc.
|
||||
'.dockerfile', '', // Dockerfile, Makefile, etc. (no extension)
|
||||
]);
|
||||
|
||||
// Directories to always skip
|
||||
const SKIP_DIRS = new Set([
|
||||
'node_modules', '.git', '.hg', '.svn',
|
||||
'__pycache__', '.pytest_cache', '.mypy_cache',
|
||||
'dist', 'build', '.next', '.nuxt',
|
||||
'.venv', 'venv', 'env',
|
||||
'coverage', '.nyc_output',
|
||||
'.angular', '.cache',
|
||||
]);
|
||||
|
||||
// Max file size to read (512KB)
|
||||
const MAX_FILE_SIZE = 512 * 1024;
|
||||
|
||||
/**
|
||||
* Discover all scannable files under a target path.
|
||||
* @param {string} targetPath - Absolute path to scan
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.maxFiles=5000] - Stop after this many files
|
||||
* @param {number} [opts.maxFileSize=524288] - Skip files larger than this
|
||||
* @returns {Promise<{ files: FileInfo[], skipped: number, truncated: boolean }>}
|
||||
*
|
||||
* @typedef {{ absPath: string, relPath: string, ext: string, size: number }} FileInfo
|
||||
*/
|
||||
export async function discoverFiles(targetPath, opts = {}) {
|
||||
const maxFiles = opts.maxFiles || 5000;
|
||||
const maxFileSize = opts.maxFileSize || MAX_FILE_SIZE;
|
||||
const files = [];
|
||||
let skipped = 0;
|
||||
let truncated = false;
|
||||
|
||||
async function walk(dir) {
|
||||
if (truncated) return;
|
||||
let entries;
|
||||
try {
|
||||
entries = await readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
skipped++;
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
if (truncated) return;
|
||||
const fullPath = join(dir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) {
|
||||
// Allow .claude-plugin and .github but skip most dot dirs
|
||||
if (entry.name !== '.claude-plugin' && entry.name !== '.github' && entry.name !== '.claude') {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
await walk(fullPath);
|
||||
} else if (entry.isFile()) {
|
||||
const ext = extname(entry.name).toLowerCase();
|
||||
// Accept known text extensions or extensionless files (Dockerfile, Makefile, etc.)
|
||||
const isKnownText = TEXT_EXTENSIONS.has(ext);
|
||||
const isExtensionless = ext === '' && !entry.name.startsWith('.');
|
||||
|
||||
if (!isKnownText && !isExtensionless) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
let fileSize;
|
||||
try {
|
||||
const st = await stat(fullPath);
|
||||
if (st.size > maxFileSize) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
if (st.size === 0) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
fileSize = st.size;
|
||||
} catch {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push({
|
||||
absPath: fullPath,
|
||||
relPath: relative(targetPath, fullPath),
|
||||
ext,
|
||||
size: fileSize,
|
||||
});
|
||||
|
||||
if (files.length >= maxFiles) {
|
||||
truncated = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await walk(targetPath);
|
||||
return { files, skipped, truncated };
|
||||
}
|
||||
|
||||
/**
|
||||
* Read file content as UTF-8 string, with binary detection.
|
||||
* Returns null if file appears to be binary.
|
||||
* @param {string} absPath
|
||||
* @returns {Promise<string|null>}
|
||||
*/
|
||||
export async function readTextFile(absPath) {
|
||||
try {
|
||||
const buf = await readFile(absPath);
|
||||
// Quick binary check: look for null bytes in first 8KB
|
||||
const checkLen = Math.min(buf.length, 8192);
|
||||
for (let i = 0; i < checkLen; i++) {
|
||||
if (buf[i] === 0) return null;
|
||||
}
|
||||
return buf.toString('utf-8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue