ktg-plugin-marketplace/plugins/llm-security/scanners/lib/file-discovery.mjs
Kjell Tore Guttormsen 6f86de937a feat(llm-security)!: v7.0.0 commit 6 — tests, docs, version bump
Final commit in the trustworthy-scoring series. Bundles verdict cutoff
alignment, the last suite of tests, and all documentation touch-points
that quote version numbers or describe v7.0.0 behaviour.

Verdict/band co-monotonicity
- `scanners/lib/severity.mjs` — verdict cutoffs moved from 61/21 to 65/15
  so `BLOCK >= 65`, `WARNING >= 15` locks onto the v2 riskBand() boundaries.
  Prevents "BLOCK / Medium band" contradictions under the v2 formula.

Scanner hardening (bug fixes from v7.0.0 testing)
- `scanners/entropy-scanner.mjs` — `policy_source` now uses
  `existsSync('.llm-security/policy.json')` instead of value-based check.
  Old heuristic always reported 'policy.json' because DEFAULT_POLICY now
  carries an `entropy.thresholds` section.
- `scanners/lib/file-discovery.mjs` — `.sass` and GPU shader extensions
  (`.glsl, .frag, .vert, .shader, .wgsl`) added to TEXT_EXTENSIONS. Without
  this, shader files were invisible to file-discovery, so they were never
  counted as skipped by the entropy-scanner extension filter.

Tests
- `tests/scanners/entropy-context.test.mjs` (new, 24 tests) — A. File-ext
  skip (4), B. Line-level rules 11-17 (8), C. Policy overrides (3).
  Fixtures generate 80-char base64 payloads at runtime via
  `crypto.randomBytes` to dodge the plugin's own pre-edit credential hook
  on the test source.
- `tests/lib/severity.test.mjs` — rewritten with v2 scoring table (70
  tests total, was 52).
- `tests/lib/output.test.mjs:243` — "1 critical = score 80" under v2
  (was 25 under v1).
- Full suite: 1485/1485 green (was 1461).

Docs
- `CHANGELOG.md` — v7.0.0 entry with BREAKING CHANGES section.
- `README.md` (plugin + marketplace root) — version badge, history table,
  plugin-card version string, test count.
- `CLAUDE.md` — header version, "v7.0.0 — Trustworthy scoring" summary
  paragraph at the top.
- `docs/security-hardening-guide.md` — new section 6 "Calibration & false
  positives" documenting v2 formula, context-aware entropy scanner,
  typosquat allowlist, and §6.4 tuning workflow. Existing "Recommended
  baseline" section renumbered to §7.

Version bump
- `6.6.0 -> 7.0.0` across package.json, .claude-plugin/plugin.json,
  scanners/ide-extension-scanner.mjs VERSION const, README badge,
  CLAUDE.md header, marketplace root README card.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-19 22:26:35 +02:00

146 lines
4.2 KiB
JavaScript

// file-discovery.mjs — Walk directory tree, filter, binary detection
// Zero dependencies (Node.js builtins only).
import { readdir, stat, readFile } from 'node:fs/promises';
import { join, relative, extname } from 'node:path';
// Extensions we scan (text-based)
const TEXT_EXTENSIONS = new Set([
'.js', '.mjs', '.cjs', '.ts', '.mts', '.cts', '.jsx', '.tsx',
'.py', '.pyw',
'.json', '.jsonc', '.json5',
'.yaml', '.yml',
'.toml',
'.md', '.mdx',
'.sh', '.bash', '.zsh',
'.env', '.env.local', '.env.example',
'.cfg', '.ini', '.conf',
'.xml', '.html', '.htm', '.svg',
'.css', '.scss', '.sass', '.less',
'.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shader source
'.sql',
'.rs', '.go', '.java', '.kt', '.cs', '.c', '.cpp', '.h', '.hpp',
'.rb', '.php', '.lua', '.swift', '.m',
'.txt', '.csv', '.log',
'.lock', // package-lock.json, yarn.lock, etc.
'.dockerfile', '', // Dockerfile, Makefile, etc. (no extension)
]);
// Directories to always skip
const SKIP_DIRS = new Set([
'node_modules', '.git', '.hg', '.svn',
'__pycache__', '.pytest_cache', '.mypy_cache',
'dist', 'build', '.next', '.nuxt',
'.venv', 'venv', 'env',
'coverage', '.nyc_output',
'.angular', '.cache',
]);
// Max file size to read (512KB)
const MAX_FILE_SIZE = 512 * 1024;
/**
* Discover all scannable files under a target path.
* @param {string} targetPath - Absolute path to scan
* @param {object} [opts]
* @param {number} [opts.maxFiles=5000] - Stop after this many files
* @param {number} [opts.maxFileSize=524288] - Skip files larger than this
* @returns {Promise<{ files: FileInfo[], skipped: number, truncated: boolean }>}
*
* @typedef {{ absPath: string, relPath: string, ext: string, size: number }} FileInfo
*/
export async function discoverFiles(targetPath, opts = {}) {
const maxFiles = opts.maxFiles || 5000;
const maxFileSize = opts.maxFileSize || MAX_FILE_SIZE;
const files = [];
let skipped = 0;
let truncated = false;
async function walk(dir) {
if (truncated) return;
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
skipped++;
return;
}
for (const entry of entries) {
if (truncated) return;
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) {
// Allow .claude-plugin and .github but skip most dot dirs
if (entry.name !== '.claude-plugin' && entry.name !== '.github' && entry.name !== '.claude') {
continue;
}
}
await walk(fullPath);
} else if (entry.isFile()) {
const ext = extname(entry.name).toLowerCase();
// Accept known text extensions or extensionless files (Dockerfile, Makefile, etc.)
const isKnownText = TEXT_EXTENSIONS.has(ext);
const isExtensionless = ext === '' && !entry.name.startsWith('.');
if (!isKnownText && !isExtensionless) {
skipped++;
continue;
}
let fileSize;
try {
const st = await stat(fullPath);
if (st.size > maxFileSize) {
skipped++;
continue;
}
if (st.size === 0) {
skipped++;
continue;
}
fileSize = st.size;
} catch {
skipped++;
continue;
}
files.push({
absPath: fullPath,
relPath: relative(targetPath, fullPath),
ext,
size: fileSize,
});
if (files.length >= maxFiles) {
truncated = true;
return;
}
}
}
}
await walk(targetPath);
return { files, skipped, truncated };
}
/**
* Read file content as UTF-8 string, with binary detection.
* Returns null if file appears to be binary.
* @param {string} absPath
* @returns {Promise<string|null>}
*/
export async function readTextFile(absPath) {
try {
const buf = await readFile(absPath);
// Quick binary check: look for null bytes in first 8KB
const checkLen = Math.min(buf.length, 8192);
for (let i = 0; i < checkLen; i++) {
if (buf[i] === 0) return null;
}
return buf.toString('utf-8');
} catch {
return null;
}
}