ktg-plugin-marketplace/plugins/config-audit/scanners/lib/file-discovery.mjs

308 lines
9.5 KiB
JavaScript

/**
* Config file discovery for config-audit.
* Finds CLAUDE.md, settings.json, hooks.json, .mcp.json, rules/, plugin.json, etc.
* Zero external dependencies.
*/
import { readdir, stat, readFile } from 'node:fs/promises';
import { join, resolve, relative, extname, basename, dirname, sep } from 'node:path';
const SKIP_DIRS = new Set([
'node_modules', '.git', 'dist', 'build', 'coverage', '__pycache__',
'.next', '.nuxt', '.output', '.cache', '.turbo', '.parcel-cache',
'vendor', 'venv', '.venv', '.tox',
]);
/** Config file patterns to discover */
const CONFIG_PATTERNS = {
claudeMd: /^CLAUDE\.md$|^CLAUDE\.local\.md$/i,
settingsJson: /^settings\.json$|^settings\.local\.json$/,
mcpJson: /^\.mcp\.json$/,
pluginJson: /^plugin\.json$/,
hooksJson: /^hooks\.json$/,
rulesDir: /^rules$/,
agentsMd: /\.md$/,
commandsMd: /\.md$/,
skillsMd: /^SKILL\.md$/i,
keybindings: /^keybindings\.json$/,
claudeJson: /^\.claude\.json$/,
};
/**
* Discover all Claude Code config files under a target path.
* @param {string} targetPath
* @param {object} [opts]
* @param {number} [opts.maxFiles=500] - max files to return
* @param {boolean} [opts.includeGlobal=false] - also scan ~/.claude/
* @returns {Promise<{ files: ConfigFile[], skipped: number }>}
*
* @typedef {{ absPath: string, relPath: string, type: string, scope: string, size: number }} ConfigFile
*/
export async function discoverConfigFiles(targetPath, opts = {}) {
const maxFiles = opts.maxFiles || 2000;
const maxDepth = opts.maxDepth || 10;
const files = [];
const skippedRef = { count: 0 };
await walkForConfig(targetPath, targetPath, files, skippedRef, maxFiles, undefined, maxDepth);
if (opts.includeGlobal) {
const home = process.env.HOME || process.env.USERPROFILE || '';
const claudeDir = join(home, '.claude');
try {
await stat(claudeDir);
await walkForConfig(claudeDir, claudeDir, files, skippedRef, maxFiles, 'user', maxDepth);
} catch { /* .claude dir doesn't exist */ }
// ~/.claude.json
const claudeJson = join(home, '.claude.json');
try {
const s = await stat(claudeJson);
files.push({
absPath: claudeJson,
relPath: '.claude.json',
type: 'claude-json',
scope: 'user',
size: s.size,
});
} catch { /* doesn't exist */ }
}
return { files, skipped: skippedRef.count };
}
/**
* Walk directory tree looking for config files.
*/
async function walkForConfig(dir, basePath, files, skippedRef, maxFiles, forceScope, maxDepth) {
if (files.length >= maxFiles) return;
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries) {
if (files.length >= maxFiles) break;
const fullPath = join(dir, entry.name);
const rel = relative(basePath, fullPath);
if (entry.isDirectory()) {
if (SKIP_DIRS.has(entry.name)) {
skippedRef.count++;
continue;
}
// Check for .claude directory (contains settings, rules, etc.)
if (entry.name === '.claude' || entry.name === '.claude-plugin') {
await walkForConfig(fullPath, basePath, files, skippedRef, maxFiles, forceScope, maxDepth);
continue;
}
// Check for rules/ inside .claude
if (entry.name === 'rules' && dirname(rel).includes('.claude')) {
await walkRulesDir(fullPath, basePath, files, maxFiles, forceScope || classifyScope(rel, basePath));
continue;
}
// Check for agents/, commands/, skills/, hooks/ dirs
if (['agents', 'commands', 'skills', 'hooks'].includes(entry.name)) {
await walkForConfig(fullPath, basePath, files, skippedRef, maxFiles, forceScope, maxDepth);
continue;
}
// Recurse into subdirectories (configurable depth limit)
const depth = rel.split(sep).length;
if (depth < maxDepth) {
await walkForConfig(fullPath, basePath, files, skippedRef, maxFiles, forceScope, maxDepth);
}
} else if (entry.isFile()) {
const fileType = classifyFile(entry.name, rel);
if (fileType) {
let s;
try {
s = await stat(fullPath);
} catch {
continue;
}
files.push({
absPath: fullPath,
relPath: rel,
type: fileType,
scope: forceScope || classifyScope(rel, basePath),
size: s.size,
});
}
}
}
}
/**
* Walk a rules directory and collect all files (including non-.md for validation).
*/
async function walkRulesDir(dir, basePath, files, maxFiles, scope) {
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return;
}
for (const entry of entries) {
if (files.length >= maxFiles) break;
const fullPath = join(dir, entry.name);
if (entry.isFile()) {
let s;
try {
s = await stat(fullPath);
} catch {
continue;
}
files.push({
absPath: fullPath,
relPath: relative(basePath, fullPath),
type: 'rule',
scope,
size: s.size,
});
} else if (entry.isDirectory()) {
await walkRulesDir(fullPath, basePath, files, maxFiles, scope);
}
}
}
/**
* Classify a file by name and path.
* @returns {string | null}
*/
function classifyFile(name, relPath) {
if (CONFIG_PATTERNS.claudeMd.test(name)) return 'claude-md';
if (name === 'settings.json' || name === 'settings.local.json') {
if (relPath.includes('.claude')) return 'settings-json';
}
if (name === '.mcp.json') return 'mcp-json';
if (name === 'plugin.json' && relPath.includes('.claude-plugin')) return 'plugin-json';
if (name === 'hooks.json' && relPath.includes('hooks')) return 'hooks-json';
if (name === 'keybindings.json') return 'keybindings-json';
if (name === '.claude.json') return 'claude-json';
// Agent/command/skill markdown files
if (name.endsWith('.md') && relPath.includes(`agents${sep}`)) return 'agent-md';
if (name.endsWith('.md') && relPath.includes(`commands${sep}`)) return 'command-md';
if (/^SKILL\.md$/i.test(name)) return 'skill-md';
return null;
}
/**
* Determine the scope of a config file.
* @returns {'managed' | 'user' | 'project' | 'local' | 'plugin'}
*/
function classifyScope(relPath, basePath) {
if (relPath.includes('managed-settings')) return 'managed';
if (basePath.includes(`.claude${sep}plugins`)) return 'plugin';
if (relPath.includes('.local.')) return 'local';
const home = process.env.HOME || process.env.USERPROFILE || '';
if (basePath.startsWith(join(home, '.claude'))) return 'user';
return 'project';
}
/** Common developer directory names under $HOME */
const DEV_DIRS = ['repos', 'projects', 'src', 'code', 'dev', 'work', 'Sites', 'Developer'];
/**
* Discover all root paths for a full-machine scan.
* Only returns paths that actually exist on the filesystem.
* @returns {Promise<Array<{ path: string, maxDepth: number }>>}
*/
export async function discoverFullMachinePaths() {
const home = process.env.HOME || process.env.USERPROFILE || '';
const candidates = [
// ~/.claude — deepest (plugins can be 6+ levels deep)
{ path: join(home, '.claude'), maxDepth: 10 },
// Managed system paths
{ path: '/Library/Application Support/ClaudeCode', maxDepth: 5 },
{ path: '/etc/claude-code', maxDepth: 5 },
// Common developer directories
...DEV_DIRS.map(d => ({ path: join(home, d), maxDepth: 5 })),
];
const existing = [];
for (const c of candidates) {
try {
const s = await stat(c.path);
if (s.isDirectory()) existing.push(c);
} catch { /* not present */ }
}
return existing;
}
/**
* Discover config files across multiple root paths.
* Calls discoverConfigFiles() per root with correct basePath (preserves scope/relPath).
* Deduplicates files by absPath — first occurrence wins.
* @param {Array<{ path: string, maxDepth: number }>} roots
* @param {object} [opts]
* @param {number} [opts.maxFiles=2000] - global max across all roots
* @returns {Promise<{ files: ConfigFile[], skipped: number }>}
*/
export async function discoverConfigFilesMulti(roots, opts = {}) {
const maxFiles = opts.maxFiles || 2000;
const seen = new Set();
const allFiles = [];
let totalSkipped = 0;
for (const root of roots) {
if (allFiles.length >= maxFiles) break;
const result = await discoverConfigFiles(root.path, {
maxFiles: maxFiles - allFiles.length,
maxDepth: root.maxDepth,
});
totalSkipped += result.skipped;
for (const f of result.files) {
if (!seen.has(f.absPath)) {
seen.add(f.absPath);
allFiles.push(f);
}
}
}
// Handle ~/.claude.json separately (single file, not a directory)
const home = process.env.HOME || process.env.USERPROFILE || '';
const claudeJson = join(home, '.claude.json');
if (allFiles.length < maxFiles && !seen.has(claudeJson)) {
try {
const s = await stat(claudeJson);
allFiles.push({
absPath: claudeJson,
relPath: '.claude.json',
type: 'claude-json',
scope: 'user',
size: s.size,
});
} catch { /* doesn't exist */ }
}
return { files: allFiles, skipped: totalSkipped };
}
/**
* Read a file as UTF-8 text. Returns null on error or if binary.
* @param {string} absPath
* @returns {Promise<string | null>}
*/
export async function readTextFile(absPath) {
try {
const content = await readFile(absPath, 'utf-8');
// Check for binary (null bytes in first 8KB)
const sample = content.slice(0, 8192);
if (sample.includes('\0')) return null;
return content;
} catch {
return null;
}
}