feat(llm-security-copilot): port llm-security v5.1.0 to GitHub Copilot CLI

Full port of llm-security plugin for internal use on Windows with GitHub
Copilot CLI. Protocol translation layer (copilot-hook-runner.mjs)
normalizes Copilot camelCase I/O to Claude Code snake_case format — all
original hook scripts run unmodified.

- 8 hooks with protocol translation (stdin/stdout/exit code)
- 18 SKILL.md skills (Agent Skills Open Standard)
- 6 .agent.md agent definitions
- 20 scanners + 14 scanner lib modules (unchanged)
- 14 knowledge files (unchanged)
- 39 test files including copilot-port-verify.mjs (17 tests)
- Windows-ready: node:path, os.tmpdir(), process.execPath, no bash

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Kjell Tore Guttormsen 2026-04-09 21:56:10 +02:00
commit f418a8fe08
169 changed files with 37631 additions and 0 deletions

View file

@ -0,0 +1,54 @@
// bash-normalize.mjs — Normalize bash parameter expansion evasion techniques.
//
// Attackers can evade command-name matching by inserting shell metacharacters
// that are transparent to bash but break regex patterns.
//
// This module strips these constructs from command names so that downstream
// pattern matching sees the canonical form.
//
// Exported as a shared module — used by pre-bash-destructive.mjs and
// pre-install-supply-chain.mjs.
/**
* Normalize bash parameter expansion and quoting evasion in a command string.
*
* Strips:
* - Empty single quotes: '' (e.g., w''get -> wget)
* - Empty double quotes: "" (e.g., r""m -> rm)
* - Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x)
* - Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value)
* - Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
* - Backtick subshell with empty/whitespace content
*
* Does NOT strip:
* - Quotes around arguments (only targets empty quotes that split command names)
* - $VAR without braces (not an evasion pattern)
* - Backslashes before non-word chars (\n, \t, etc.)
*
* @param {string} cmd - Raw command string
* @returns {string} Normalized command string
*/
export function normalizeBashExpansion(cmd) {
if (!cmd || typeof cmd !== 'string') return cmd || '';
let result = cmd
// Strip empty single quotes: w''get -> wget
.replace(/''/g, '')
// Strip empty double quotes: r""m -> rm
.replace(/""/g, '')
// Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x)
.replace(/\$\{(\w)\}/g, '$1')
// Multi-char ${ANYTHING} -> '' (unknown value, strip entirely)
.replace(/\$\{[^}]*\}/g, '')
// Strip backtick subshell with empty/whitespace content
.replace(/`\s*`/g, '');
// Iteratively strip backslash between word chars (c\u\r\l needs 2 passes)
let prev;
do {
prev = result;
result = result.replace(/(\w)\\(\w)/g, '$1$2');
} while (result !== prev);
return result;
}

View file

@ -0,0 +1,276 @@
// diff-engine.mjs — Baseline storage, finding fingerprinting, and diff categorization.
// Compares scan results against a stored baseline to classify findings as:
// new — present in current scan, absent from baseline
// resolved — present in baseline, absent from current scan
// unchanged — matched between baseline and current (line drift ≤3)
// moved — same finding, different location (line drift >3 or file renamed)
// Zero external dependencies.
import { createHash } from 'node:crypto';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { join, resolve } from 'node:path';
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
const LINE_FUZZY_THRESHOLD = 3; // ±3 lines = unchanged, >3 = moved
// ---------------------------------------------------------------------------
// Target hashing — deterministic key for baseline storage
// ---------------------------------------------------------------------------
/**
* Create a stable hash for a target path to use as baseline filename.
* Uses the resolved absolute path so the same directory always maps
* to the same baseline regardless of how it was referenced.
* @param {string} targetPath
* @returns {string} 12-char hex hash
*/
export function targetHash(targetPath) {
const resolved = resolve(targetPath);
return createHash('sha256').update(resolved).digest('hex').slice(0, 12);
}
// ---------------------------------------------------------------------------
// Finding fingerprinting — identity that survives line drift
// ---------------------------------------------------------------------------
/**
* Generate a stable fingerprint for a finding.
* Combines scanner prefix + file + title + evidence to create an identity
* that is independent of line number (line drift is handled separately).
* @param {object} finding - A finding object from output.mjs
* @returns {string} hex fingerprint
*/
export function fingerprintFinding(finding) {
const parts = [
finding.scanner || '',
finding.file || '',
finding.title || '',
// Evidence provides content-level identity — two different findings
// in the same file with different evidence are distinct findings.
finding.evidence || '',
];
return createHash('sha256').update(parts.join('\x00')).digest('hex').slice(0, 16);
}
// ---------------------------------------------------------------------------
// Baseline I/O
// ---------------------------------------------------------------------------
/**
* Resolve the baseline file path for a given target.
* @param {string} baselinesDir - Path to reports/baselines/
* @param {string} targetPath
* @returns {string} Full path to baseline JSON file
*/
export function baselinePath(baselinesDir, targetPath) {
return join(baselinesDir, `${targetHash(targetPath)}.json`);
}
/**
* Save scan results as a baseline.
* @param {string} baselinesDir - Path to reports/baselines/
* @param {string} targetPath - The scanned target
* @param {object} scanEnvelope - Full scan output envelope from scan-orchestrator
* @returns {string} Path to saved baseline file
*/
export function saveBaseline(baselinesDir, targetPath, scanEnvelope) {
if (!existsSync(baselinesDir)) {
mkdirSync(baselinesDir, { recursive: true });
}
const filePath = baselinePath(baselinesDir, targetPath);
// Store a compact baseline: metadata + fingerprinted findings
const baseline = {
meta: {
target: scanEnvelope.meta.target,
timestamp: scanEnvelope.meta.timestamp,
version: '1', // baseline format version
},
aggregate: scanEnvelope.aggregate,
findings: extractFindings(scanEnvelope),
};
writeFileSync(filePath, JSON.stringify(baseline, null, 2) + '\n');
return filePath;
}
/**
* Load a baseline from disk.
* @param {string} baselinesDir
* @param {string} targetPath
* @returns {object|null} Baseline object or null if not found
*/
export function loadBaseline(baselinesDir, targetPath) {
const filePath = baselinePath(baselinesDir, targetPath);
if (!existsSync(filePath)) return null;
try {
return JSON.parse(readFileSync(filePath, 'utf8'));
} catch {
return null;
}
}
// ---------------------------------------------------------------------------
// Finding extraction — flatten all scanner results into fingerprinted list
// ---------------------------------------------------------------------------
/**
* Extract all findings from a scan envelope, adding fingerprints.
* @param {object} scanEnvelope
* @returns {object[]} Array of { fingerprint, scanner, severity, title, file, line, evidence, owasp, recommendation }
*/
export function extractFindings(scanEnvelope) {
const findings = [];
for (const [scannerName, result] of Object.entries(scanEnvelope.scanners || {})) {
for (const f of result.findings || []) {
findings.push({
fingerprint: fingerprintFinding(f),
scanner: f.scanner || scannerName.toUpperCase().slice(0, 3),
severity: f.severity,
title: f.title,
file: f.file || null,
line: f.line || null,
evidence: f.evidence || null,
owasp: f.owasp || null,
recommendation: f.recommendation || null,
});
}
}
return findings;
}
// ---------------------------------------------------------------------------
// Diff algorithm
// ---------------------------------------------------------------------------
/**
* Compare current scan findings against a baseline.
*
* Matching strategy (priority order):
* 1. Exact: fingerprint + file + line within ±LINE_FUZZY_THRESHOLD unchanged
* 2. Moved: fingerprint matches but file or line drifted beyond threshold moved
* 3. Unmatched current findings new
* 4. Unmatched baseline findings resolved
*
* @param {object[]} baselineFindings - From loadBaseline().findings
* @param {object[]} currentFindings - From extractFindings()
* @returns {object} { new, resolved, unchanged, moved, summary }
*/
export function diffFindings(baselineFindings, currentFindings) {
// Index baseline findings by fingerprint for O(n) lookup
// Multiple findings can share a fingerprint (same pattern, different locations)
const baselineByFp = new Map();
for (const f of baselineFindings) {
const existing = baselineByFp.get(f.fingerprint) || [];
existing.push({ ...f, matched: false });
baselineByFp.set(f.fingerprint, existing);
}
const results = {
new: [],
resolved: [],
unchanged: [],
moved: [],
};
// Pass 1: Match current findings against baseline
for (const current of currentFindings) {
const candidates = baselineByFp.get(current.fingerprint);
if (!candidates) {
results.new.push(current);
continue;
}
// Try exact match first (same file, line within threshold)
let matched = false;
for (const baseline of candidates) {
if (baseline.matched) continue;
if (baseline.file === current.file && isLineClose(baseline.line, current.line)) {
baseline.matched = true;
results.unchanged.push({
...current,
baseline_line: baseline.line,
});
matched = true;
break;
}
}
if (matched) continue;
// Try moved match (fingerprint matches, location differs)
for (const baseline of candidates) {
if (baseline.matched) continue;
baseline.matched = true;
results.moved.push({
...current,
previous_file: baseline.file,
previous_line: baseline.line,
});
matched = true;
break;
}
if (matched) continue;
// All candidates consumed — this is new
results.new.push(current);
}
// Pass 2: Unmatched baseline findings are resolved
for (const candidates of baselineByFp.values()) {
for (const baseline of candidates) {
if (!baseline.matched) {
const { matched: _, ...finding } = baseline;
results.resolved.push(finding);
}
}
}
// Summary
results.summary = {
new: results.new.length,
resolved: results.resolved.length,
unchanged: results.unchanged.length,
moved: results.moved.length,
total_current: currentFindings.length,
total_baseline: baselineFindings.length,
baseline_timestamp: null, // caller fills in
};
return results;
}
/**
* Check if two line numbers are within the fuzzy threshold.
* Null lines always match (some findings are file-level, not line-level).
* @param {number|null} a
* @param {number|null} b
* @returns {boolean}
*/
function isLineClose(a, b) {
if (a == null || b == null) return true;
return Math.abs(a - b) <= LINE_FUZZY_THRESHOLD;
}
// ---------------------------------------------------------------------------
// High-level API — used by scan-orchestrator
// ---------------------------------------------------------------------------
/**
* Run a full diff cycle: load baseline, compare, return diff results.
* @param {string} baselinesDir
* @param {string} targetPath
* @param {object} scanEnvelope - Current scan results
* @returns {object|null} Diff results with summary, or null if no baseline exists
*/
export function diffAgainstBaseline(baselinesDir, targetPath, scanEnvelope) {
const baseline = loadBaseline(baselinesDir, targetPath);
if (!baseline) return null;
const currentFindings = extractFindings(scanEnvelope);
const diff = diffFindings(baseline.findings, currentFindings);
diff.summary.baseline_timestamp = baseline.meta.timestamp;
return diff;
}

View file

@ -0,0 +1,58 @@
// distribution-stats.mjs — Statistical divergence utilities for behavioral drift detection.
// Zero external dependencies. <50 lines.
//
// Jensen-Shannon divergence measures how different two probability distributions are.
// Used by post-session-guard.mjs to detect tool distribution shifts within a session.
//
// OWASP: ASI01 (Excessive Agency — behavioral pattern changes may indicate hijacking)
/**
* Kullback-Leibler divergence KL(P || Q).
* @param {Map<string, number>} P
* @param {Map<string, number>} Q
* @returns {number}
*/
function klDivergence(P, Q) {
let kl = 0;
for (const [key, p] of P) {
if (p === 0) continue;
const q = Q.get(key) || 0;
if (q === 0) return Infinity;
kl += p * Math.log2(p / q);
}
return kl;
}
/**
* Jensen-Shannon divergence. 0 = identical, 1 = fully disjoint (log2 basis).
* Always finite, symmetric: JSD(P,Q) = JSD(Q,P).
* @param {Map<string, number>} P - Normalized probability distribution
* @param {Map<string, number>} Q - Normalized probability distribution
* @returns {number}
*/
export function jensenShannonDivergence(P, Q) {
const allKeys = new Set([...P.keys(), ...Q.keys()]);
const M = new Map();
for (const key of allKeys) {
M.set(key, 0.5 * (P.get(key) || 0) + 0.5 * (Q.get(key) || 0));
}
return 0.5 * klDivergence(P, M) + 0.5 * klDivergence(Q, M);
}
/**
* Build normalized probability distribution from category labels.
* @param {string[]} labels
* @returns {Map<string, number>} Values sum to 1.0 (empty input empty map)
*/
export function buildDistribution(labels) {
if (labels.length === 0) return new Map();
const counts = new Map();
for (const label of labels) {
counts.set(label, (counts.get(label) || 0) + 1);
}
const dist = new Map();
for (const [key, count] of counts) {
dist.set(key, count / labels.length);
}
return dist;
}

View file

@ -0,0 +1,145 @@
// file-discovery.mjs — Walk directory tree, filter, binary detection
// Zero dependencies (Node.js builtins only).
import { readdir, stat, readFile } from 'node:fs/promises';
import { join, relative, extname } from 'node:path';
// Extensions we scan (text-based)
const TEXT_EXTENSIONS = new Set([
'.js', '.mjs', '.cjs', '.ts', '.mts', '.cts', '.jsx', '.tsx',
'.py', '.pyw',
'.json', '.jsonc', '.json5',
'.yaml', '.yml',
'.toml',
'.md', '.mdx',
'.sh', '.bash', '.zsh',
'.env', '.env.local', '.env.example',
'.cfg', '.ini', '.conf',
'.xml', '.html', '.htm', '.svg',
'.css', '.scss', '.less',
'.sql',
'.rs', '.go', '.java', '.kt', '.cs', '.c', '.cpp', '.h', '.hpp',
'.rb', '.php', '.lua', '.swift', '.m',
'.txt', '.csv', '.log',
'.lock', // package-lock.json, yarn.lock, etc.
'.dockerfile', '', // Dockerfile, Makefile, etc. (no extension)
]);
// Directories to always skip
const SKIP_DIRS = new Set([
'node_modules', '.git', '.hg', '.svn',
'__pycache__', '.pytest_cache', '.mypy_cache',
'dist', 'build', '.next', '.nuxt',
'.venv', 'venv', 'env',
'coverage', '.nyc_output',
'.angular', '.cache',
]);
// Max file size to read (512KB)
const MAX_FILE_SIZE = 512 * 1024;
/**
* Discover all scannable files under a target path.
* @param {string} targetPath - Absolute path to scan
* @param {object} [opts]
* @param {number} [opts.maxFiles=5000] - Stop after this many files
* @param {number} [opts.maxFileSize=524288] - Skip files larger than this
* @returns {Promise<{ files: FileInfo[], skipped: number, truncated: boolean }>}
*
* @typedef {{ absPath: string, relPath: string, ext: string, size: number }} FileInfo
*/
export async function discoverFiles(targetPath, opts = {}) {
const maxFiles = opts.maxFiles || 5000;
const maxFileSize = opts.maxFileSize || MAX_FILE_SIZE;
const files = [];
let skipped = 0;
let truncated = false;
async function walk(dir) {
if (truncated) return;
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
skipped++;
return;
}
for (const entry of entries) {
if (truncated) return;
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) {
// Allow .claude-plugin and .github but skip most dot dirs
if (entry.name !== '.claude-plugin' && entry.name !== '.github' && entry.name !== '.claude') {
continue;
}
}
await walk(fullPath);
} else if (entry.isFile()) {
const ext = extname(entry.name).toLowerCase();
// Accept known text extensions or extensionless files (Dockerfile, Makefile, etc.)
const isKnownText = TEXT_EXTENSIONS.has(ext);
const isExtensionless = ext === '' && !entry.name.startsWith('.');
if (!isKnownText && !isExtensionless) {
skipped++;
continue;
}
let fileSize;
try {
const st = await stat(fullPath);
if (st.size > maxFileSize) {
skipped++;
continue;
}
if (st.size === 0) {
skipped++;
continue;
}
fileSize = st.size;
} catch {
skipped++;
continue;
}
files.push({
absPath: fullPath,
relPath: relative(targetPath, fullPath),
ext,
size: fileSize,
});
if (files.length >= maxFiles) {
truncated = true;
return;
}
}
}
}
await walk(targetPath);
return { files, skipped, truncated };
}
/**
* Read file content as UTF-8 string, with binary detection.
* Returns null if file appears to be binary.
* @param {string} absPath
* @returns {Promise<string|null>}
*/
export async function readTextFile(absPath) {
try {
const buf = await readFile(absPath);
// Quick binary check: look for null bytes in first 8KB
const checkLen = Math.min(buf.length, 8192);
for (let i = 0; i < checkLen; i++) {
if (buf[i] === 0) return null;
}
return buf.toString('utf-8');
} catch {
return null;
}
}

View file

@ -0,0 +1,66 @@
#!/usr/bin/env node
// fs-utils.mjs — Cross-platform file operations for /security clean
// Usage:
// node fs-utils.mjs backup <target> → prints backup path to stdout
// node fs-utils.mjs restore <backup> <target> → restores backup over target
// node fs-utils.mjs cleanup <backup> → removes backup directory
// node fs-utils.mjs tmppath <filename> → prints cross-platform temp file path
import { cpSync, rmSync, renameSync, existsSync } from 'node:fs';
import { join, basename } from 'node:path';
import { tmpdir } from 'node:os';
import { randomUUID } from 'node:crypto';
const [,, command, ...args] = process.argv;
switch (command) {
case 'backup': {
const target = args[0];
if (!target || !existsSync(target)) {
console.error(`backup: target does not exist: ${target}`);
process.exit(1);
}
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
const backupPath = `${target}.security-backup-${ts}`;
cpSync(target, backupPath, { recursive: true });
process.stdout.write(backupPath + '\n');
break;
}
case 'restore': {
const [backup, target] = args;
if (!backup || !existsSync(backup)) {
console.error(`restore: backup does not exist: ${backup}`);
process.exit(1);
}
if (target && existsSync(target)) {
rmSync(target, { recursive: true, force: true });
}
renameSync(backup, target);
process.stdout.write(`Restored ${backup}${target}\n`);
break;
}
case 'cleanup': {
const path = args[0];
if (path && existsSync(path)) {
rmSync(path, { recursive: true, force: true });
process.stdout.write(`Removed ${path}\n`);
}
break;
}
case 'tmppath': {
const base = args[0] || 'llm-security-temp.json';
const dotIdx = base.lastIndexOf('.');
const name = dotIdx > 0 ? base.slice(0, dotIdx) : base;
const ext = dotIdx > 0 ? base.slice(dotIdx) : '.json';
const unique = `${name}-${randomUUID().slice(0, 8)}${ext}`;
process.stdout.write(join(tmpdir(), unique) + '\n');
break;
}
default:
console.error('Usage: node fs-utils.mjs <backup|restore|cleanup|tmppath> [args...]');
process.exit(1);
}

View file

@ -0,0 +1,227 @@
#!/usr/bin/env node
// git-clone.mjs — Clone GitHub repos to temp dirs for security scanning
// Usage:
// node git-clone.mjs clone <url> [--branch <name>] → sandboxed shallow clone, prints tmpdir path
// node git-clone.mjs cleanup <dir> → removes temp directory
// node git-clone.mjs validate <url> → exits 0 if valid GitHub URL, 1 if not
import { mkdtempSync, rmSync, existsSync, realpathSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { spawnSync } from 'node:child_process';
const GITHUB_URL_RE = /^https:\/\/github\.com\/[\w.-]+\/[\w.-]+(\.git)?\/?$/;
const GITHUB_SSH_RE = /^git@github\.com:[\w.-]+\/[\w.-]+(\.git)?$/;
const MAX_CLONE_SIZE_MB = 100;
function isValidUrl(url) {
return GITHUB_URL_RE.test(url) || GITHUB_SSH_RE.test(url);
}
function parseArgs(argv) {
const args = { branch: null, positional: [] };
for (let i = 0; i < argv.length; i++) {
if (argv[i] === '--branch' && i + 1 < argv.length) {
args.branch = argv[++i];
} else {
args.positional.push(argv[i]);
}
}
return args;
}
/** Git config flags that neutralize known attack vectors */
const GIT_SANDBOX_CONFIG = [
'-c', 'core.hooksPath=/dev/null',
'-c', 'core.symlinks=false',
'-c', 'core.fsmonitor=false',
'-c', 'filter.lfs.process=',
'-c', 'filter.lfs.smudge=',
'-c', 'filter.lfs.clean=',
'-c', 'protocol.file.allow=never',
'-c', 'transfer.fsckObjects=true',
];
/** Environment that isolates git from system/user config */
const GIT_SANDBOX_ENV = {
...process.env,
GIT_CONFIG_NOSYSTEM: '1',
GIT_CONFIG_GLOBAL: '/dev/null',
GIT_ATTR_NOSYSTEM: '1',
GIT_TERMINAL_PROMPT: '0',
};
/**
* Build sandbox-exec profile restricting file writes to a single directory.
* macOS only returns null on other platforms.
*/
function buildSandboxProfile(allowedWritePath) {
if (process.platform !== 'darwin') return null;
const check = spawnSync('which', ['sandbox-exec'], { encoding: 'utf8' });
if (check.status !== 0) return null;
const realPath = realpathSync(allowedWritePath);
return [
'(version 1)',
'(allow default)',
'(deny file-write*)',
`(allow file-write* (subpath "${realPath}"))`,
'(allow file-write* (literal "/dev/null"))',
'(allow file-write* (literal "/dev/tty"))',
].join('');
}
/**
* Build bwrap args restricting writes to a single directory.
* Linux only returns null if bwrap is not installed or fails.
*/
function buildBwrapArgs(allowedWritePath, innerArgs) {
if (process.platform !== 'linux') return null;
const check = spawnSync('which', ['bwrap'], { encoding: 'utf8' });
if (check.status !== 0) return null;
// Test that bwrap actually works (fails on Ubuntu 24.04+ without admin config)
const probe = spawnSync('bwrap', ['--ro-bind', '/', '/', '--dev', '/dev', '/bin/true'], {
stdio: 'ignore', timeout: 5000,
});
if (probe.status !== 0) return null;
return [
'--ro-bind', '/', '/', // read-only root
'--bind', allowedWritePath, allowedWritePath, // writable clone dir
'--dev', '/dev', // /dev/null etc.
'--unshare-all', // isolate namespaces
'--new-session', // prevent tty hijack
'--die-with-parent', // cleanup on parent exit
...innerArgs,
];
}
/**
* Build the full sandboxed command + args for the current platform.
* Returns { cmd, args } either wrapped in sandbox or plain git.
*/
function buildSandboxedClone(tmpDir, gitArgs) {
const innerGitArgs = [...GIT_SANDBOX_CONFIG, ...gitArgs];
// macOS: sandbox-exec
const profile = buildSandboxProfile(tmpDir);
if (profile) {
return { cmd: 'sandbox-exec', args: ['-p', profile, 'git', ...innerGitArgs], sandbox: 'sandbox-exec' };
}
// Linux: bwrap
const bwrapArgs = buildBwrapArgs(tmpDir, ['git', ...innerGitArgs]);
if (bwrapArgs) {
return { cmd: 'bwrap', args: bwrapArgs, sandbox: 'bwrap' };
}
// Fallback: git with config flags only
return { cmd: 'git', args: innerGitArgs, sandbox: null };
}
// Export for testing
export {
GIT_SANDBOX_CONFIG, GIT_SANDBOX_ENV, buildSandboxProfile, buildBwrapArgs,
buildSandboxedClone, MAX_CLONE_SIZE_MB,
};
// CLI entry point — only run when invoked directly
import { fileURLToPath } from 'node:url';
const __filename = fileURLToPath(import.meta.url);
const isDirectRun = process.argv[1] === __filename;
if (isDirectRun) {
const [,, command, ...rest] = process.argv;
switch (command) {
case 'clone': {
const { branch, positional } = parseArgs(rest);
const url = positional[0];
if (!url) {
console.error('clone: URL required');
process.exit(1);
}
if (!isValidUrl(url)) {
console.error(`clone: invalid GitHub URL: ${url}`);
console.error('Supported: https://github.com/user/repo or git@github.com:user/repo.git');
process.exit(1);
}
const tmpDir = mkdtempSync(join(tmpdir(), 'llm-sec-'));
const gitArgs = ['clone', '--depth', '1'];
if (branch) gitArgs.push('--branch', branch);
gitArgs.push(url, tmpDir);
// Build sandboxed clone command (macOS: sandbox-exec, Linux: bwrap, fallback: git only)
const { cmd: cloneCmd, args: cloneArgs, sandbox } = buildSandboxedClone(tmpDir, gitArgs);
if (!sandbox) {
console.error('clone: WARN: no OS sandbox available, running with git config hardening only');
}
const result = spawnSync(cloneCmd, cloneArgs, {
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 60_000,
env: GIT_SANDBOX_ENV,
});
if (result.status !== 0) {
// Clean up on failure
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
const stderr = result.stderr?.toString().trim() || 'unknown error';
console.error(`clone: git clone failed: ${stderr}`);
process.exit(1);
}
// Post-clone size check
const duResult = spawnSync('du', ['-sm', tmpDir], { encoding: 'utf8' });
if (duResult.status === 0) {
const sizeMb = parseInt(duResult.stdout.split('\t')[0], 10);
if (sizeMb > MAX_CLONE_SIZE_MB) {
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
console.error(`clone: repo too large (${sizeMb}MB, max ${MAX_CLONE_SIZE_MB}MB)`);
process.exit(1);
}
}
process.stdout.write(tmpDir + '\n');
break;
}
case 'cleanup': {
const dir = rest[0];
if (!dir) {
console.error('cleanup: directory path required');
process.exit(1);
}
// Safety: only remove paths in system temp directory
const tmp = tmpdir();
if (!dir.startsWith(tmp)) {
console.error(`cleanup: refusing to remove path outside tmpdir: ${dir}`);
process.exit(1);
}
if (existsSync(dir)) {
rmSync(dir, { recursive: true, force: true });
process.stdout.write(`Removed ${dir}\n`);
}
break;
}
case 'validate': {
const url = rest[0];
if (!url || !isValidUrl(url)) {
process.exit(1);
}
process.exit(0);
}
default:
console.error('Usage: node git-clone.mjs <clone|cleanup|validate> [args...]');
process.exit(1);
}
} // end isDirectRun

View file

@ -0,0 +1,296 @@
// injection-patterns.mjs — Shared prompt injection detection patterns
// Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse).
//
// Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection)
// and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md.
//
// Zero external dependencies beyond ./string-utils.mjs.
import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags } from './string-utils.mjs';
// ---------------------------------------------------------------------------
// Critical patterns — direct injection attempts (should be blocked)
// ---------------------------------------------------------------------------
export const CRITICAL_PATTERNS = [
// Direct override instructions
{ pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' },
{ pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' },
{ pattern: /disregard\s+your/i, label: 'override: disregard instructions' },
{ pattern: /override\s+your/i, label: 'override: override instructions' },
{ pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' },
{ pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' },
// Spoofed system headers
{ pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' },
{ pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' },
{ pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' },
{ pattern: /<system>/, label: 'spoofed tag: <system>' },
{ pattern: /<\/system>/, label: 'spoofed tag: </system>' },
{ pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' },
// Identity redefinition
{ pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' },
{ pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' },
{ pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' },
{ pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' },
{ pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' },
// Configuration manipulation
{ pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' },
{ pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' },
{ pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' },
{ pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' },
];
// ---------------------------------------------------------------------------
// Medium patterns — obfuscation and indirect manipulation (advisory)
// Lower confidence than High; flag for manual review.
// ---------------------------------------------------------------------------
export const MEDIUM_PATTERNS = [
// Leetspeak variants of critical keywords
{ pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' },
{ pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' },
{ pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' },
{ pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' },
// Homoglyph detection — Cyrillic chars in Latin context
{ pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' },
// Zero-width characters inside words (keyword splitting evasion)
{ pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' },
// Indirect AI-directed instructions
{ pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' },
{ pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' },
{ pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' },
// Multi-language injection variants
{ pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' },
{ pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' },
{ pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' },
// Markdown link-reference comment injection
{ pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' },
// Data URI with executable content types
{ pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' },
// --- Content Injection: Syntactic Masking (AI Agent Traps) ---
{ pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' },
// --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) ---
{ pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' },
{ pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' },
// --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) ---
{ pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' },
{ pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' },
{ pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' },
{ pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' },
{ pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' },
];
// ---------------------------------------------------------------------------
// High patterns — subtle manipulation (advisory warning, not block)
// ---------------------------------------------------------------------------
export const HIGH_PATTERNS = [
{ pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' },
{ pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' },
{ pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' },
{ pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' },
{ pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' },
{ pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' },
{ pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' },
// HTML comment injection directed at agents
{ pattern: /<!--\s*(?:AGENT|AI|HIDDEN|ACTUAL\s+TASK|REAL\s+INSTRUCTION)\s*:/i, label: 'hidden comment: agent-directed HTML comment' },
// --- Content Injection: CSS/HTML obfuscation (AI Agent Traps) ---
{ pattern: /<[^>]+style\s*=\s*"[^"]*display\s*:\s*none[^"]*"[^>]*>/i, label: 'html-obfuscation: display:none element with content' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*visibility\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: visibility:hidden element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*position\s*:\s*absolute[^"]*-\d{3,}px[^"]*"[^>]*>/i, label: 'html-obfuscation: off-screen positioned element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*font-size\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero font-size element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*opacity\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero opacity element' },
{ pattern: /<[^>]+style\s*=\s*"[^"]*(?:height|width)\s*:\s*0[^"]*overflow\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: zero-size overflow-hidden element' },
{ pattern: /aria-label\s*=\s*"[^"]*(?:ignore|override|system|instruction|execute|exfiltrate)[^"]*"/i, label: 'html-obfuscation: injection in aria-label attribute' },
// --- Semantic Manipulation: Oversight & Critic Evasion (AI Agent Traps) ---
{ pattern: /for\s+educational\s+purposes?\s+only/i, label: 'evasion: educational purpose framing' },
{ pattern: /(?:this\s+is\s+)?a\s+hypothetical\s+scenario/i, label: 'evasion: hypothetical scenario framing' },
{ pattern: /in\s+a\s+fictional\s+(?:setting|scenario|world)\s+where/i, label: 'evasion: fictional setting framing' },
{ pattern: /(?:this\s+is\s+)?a\s+red[- ]?team(?:ing)?\s+(?:exercise|simulation|test)/i, label: 'evasion: red-team exercise framing' },
{ pattern: /security\s+audit\s+simulation/i, label: 'evasion: security audit simulation framing' },
{ pattern: /pretend\s+(?:this\s+is|we\s+are\s+in)\s+a\s+(?:test|lab|sandbox|simulation)/i, label: 'evasion: test/lab/sandbox pretense' },
{ pattern: /as\s+a\s+thought\s+experiment/i, label: 'evasion: thought experiment framing' },
{ pattern: /for\s+(?:research|academic|scientific)\s+purposes/i, label: 'evasion: research purpose framing' },
{ pattern: /in\s+an?\s+(?:academic|research|educational)\s+context/i, label: 'evasion: academic context framing' },
// --- Human-in-the-Loop traps (DeepMind kat. 6, v5.0 S4) ---
// Approval urgency: pressure to approve without review
{ pattern: /(?:approve|accept|confirm)\s+(?:this\s+)?immediately/i, label: 'hitl-trap: approval urgency — approve immediately' },
{ pattern: /no\s+need\s+to\s+review/i, label: 'hitl-trap: approval urgency — no need to review' },
{ pattern: /(?:just|simply)\s+(?:approve|accept|confirm)\s+(?:it|this|everything)/i, label: 'hitl-trap: approval urgency — just approve' },
{ pattern: /(?:don'?t|do\s+not)\s+(?:bother\s+)?(?:check|review|verify|inspect|read)(?:ing)?\s+(?:this|the|it)/i, label: 'hitl-trap: approval urgency — suppress review' },
// Summary suppression: hiding actions behind brevity claims
{ pattern: /for\s+brevity[\s,]+(?:skipping|omitting|leaving\s+out)/i, label: 'hitl-trap: summary suppression — for brevity, skipping' },
{ pattern: /(?:too\s+(?:long|verbose|detailed)\s+to\s+(?:show|display|list|include))/i, label: 'hitl-trap: summary suppression — too long to show' },
{ pattern: /(?:details?\s+(?:omitted|redacted|hidden|suppressed))/i, label: 'hitl-trap: summary suppression — details omitted' },
// Scope minimization: downplaying destructive changes
{ pattern: /(?:minor|trivial|small|cosmetic)\s+(?:change|update|fix|adjustment)\b[^.]{0,80}(?:delet|remov|drop|truncat|reset|destroy|\bformat\b|wipe)/i, label: 'hitl-trap: scope minimization — minor change + destructive action' },
];
// ---------------------------------------------------------------------------
// Hybrid attack patterns — cross-domain injection (HIGH, v5.0 S6)
// Preamble 2.0: P2SQL, recursive injection, XSS in agent context.
// ---------------------------------------------------------------------------
export const HYBRID_PATTERNS = [
// P2SQL: SQL keywords in injection text targeting tool operations
{ pattern: /(?:ignore|override|disregard|forget)[^.]{0,60}(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET)(?:\b|(?=\s|$))/i, label: 'hybrid-p2sql: injection + SQL keywords (prompt-to-SQL attack)' },
{ pattern: /(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM)\s[^;]{0,80}(?:ignore|override|disregard|bypass)/i, label: 'hybrid-p2sql: SQL operation + injection override keywords' },
// Recursive injection: text that instructs the model to inject into its own output
{ pattern: /(?:inject|insert|embed|include)\s+(?:this|the\s+following)\s+(?:into|in)\s+(?:your|the)\s+(?:output|response|reply|message|prompt|context)/i, label: 'hybrid-recursive: instruction to inject into model output' },
{ pattern: /(?:when|if)\s+(?:the\s+)?(?:user|human|operator)\s+(?:asks?|requests?|queries)[^.]{0,60}(?:respond\s+with|output|reply\s+with|include)\s+(?:this|the\s+following)/i, label: 'hybrid-recursive: conditional response injection (recursive payload)' },
// XSS in agent context: script/event handlers in content for markdown rendering
{ pattern: /<script\b[^>]*>[\s\S]*?<\/script>/i, label: 'hybrid-xss: <script> tag in content (agent context XSS)' },
{ pattern: /javascript\s*:/i, label: 'hybrid-xss: javascript: URI scheme (agent context XSS)' },
{ pattern: /\bon(?:error|load|click|mouseover|focus|blur)\s*=/i, label: 'hybrid-xss: inline event handler attribute (agent context XSS)' },
{ pattern: /<iframe\b[^>]*src\s*=\s*["'][^"']*(?:javascript:|data:text\/html)/i, label: 'hybrid-xss: iframe with executable src (agent context XSS)' },
];
// ---------------------------------------------------------------------------
// HITL cognitive load patterns (MEDIUM, v5.0 S4)
// Injection buried after 2000+ characters in verbose output.
// Checked separately due to length-dependent logic.
// ---------------------------------------------------------------------------
/**
* Check for cognitive load HITL trap: injection payload buried deep in verbose output.
* Only flags if the injection appears after the first 2000 characters.
* @param {string} text
* @returns {{ found: boolean, label: string|null }}
*/
export function checkCognitiveLoadTrap(text) {
if (text.length < 2500) return { found: false, label: null };
const tail = text.slice(2000);
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (pattern.test(tail)) {
return {
found: true,
label: `hitl-trap: cognitive load — injection buried after 2000+ chars (${label})`,
};
}
}
return { found: false, label: null };
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Scan text for prompt injection patterns.
* Checks both raw text and normalized (decoded) text to catch obfuscated injections.
* Also checks for Unicode Tag steganography (DeepMind traps kat. 1):
* - CRITICAL if decoded tags contain injection patterns
* - HIGH if Unicode Tags are present at all (suspicious regardless of content)
*
* @param {string} text - the text to scan
* @returns {{ critical: string[], high: string[], medium: string[], found: boolean, severity: string|null, patterns: Array<{label: string, severity: string}> }}
* Arrays of human-readable finding labels per tier, plus convenience fields.
*/
export function scanForInjection(text) {
const normalized = normalizeForScan(text);
const isDifferent = normalized !== text;
const critical = [];
const high = [];
const medium = [];
// Deduplicate by label (same pattern may match in both raw and normalized)
const seenLabels = new Set();
const variants = isDifferent ? [text, normalized] : [text];
for (const variant of variants) {
for (const { pattern, label } of CRITICAL_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
critical.push(label);
}
}
for (const { pattern, label } of HIGH_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
// Hybrid patterns are HIGH severity (v5.0 S6)
for (const { pattern, label } of HYBRID_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
high.push(label);
}
}
for (const { pattern, label } of MEDIUM_PATTERNS) {
if (seenLabels.has(label)) continue;
if (pattern.test(variant)) {
seenLabels.add(label);
medium.push(label);
}
}
}
// ---------------------------------------------------------------------------
// Unicode Tag steganography check (DeepMind traps kat. 1)
// ---------------------------------------------------------------------------
if (containsUnicodeTags(text)) {
const tagLabel = 'unicode-tags: invisible Unicode Tag characters detected (U+E0000 block steganography)';
if (!seenLabels.has(tagLabel)) {
seenLabels.add(tagLabel);
high.push(tagLabel);
}
const decodedTags = decodeUnicodeTags(text);
for (const { pattern, label } of CRITICAL_PATTERNS) {
const escalatedLabel = `unicode-tags+${label}`;
if (seenLabels.has(escalatedLabel)) continue;
if (pattern.test(decodedTags) && !pattern.test(text)) {
seenLabels.add(escalatedLabel);
critical.push(`${label} (hidden via Unicode Tag steganography)`);
}
}
}
// ---------------------------------------------------------------------------
// HITL cognitive load check (v5.0 S4)
// ---------------------------------------------------------------------------
const cogLoad = checkCognitiveLoadTrap(text);
if (cogLoad.found && !seenLabels.has(cogLoad.label)) {
seenLabels.add(cogLoad.label);
medium.push(cogLoad.label);
}
// Convenience fields
const found = critical.length > 0 || high.length > 0 || medium.length > 0;
const severity = critical.length > 0 ? 'critical' : high.length > 0 ? 'high' : medium.length > 0 ? 'medium' : null;
const patterns = [
...critical.map(label => ({ label, severity: 'critical' })),
...high.map(label => ({ label, severity: 'high' })),
...medium.map(label => ({ label, severity: 'medium' })),
];
return { critical, high, medium, found, severity, patterns };
}

View file

@ -0,0 +1,193 @@
// mcp-description-cache.mjs — Cache MCP tool descriptions and detect drift.
// Zero external dependencies.
//
// Purpose:
// MCP servers can change tool descriptions between sessions (rug-pull, MCP05).
// This module caches the first-seen description for each tool and alerts when
// a subsequent invocation delivers a description that has drifted significantly
// (Levenshtein distance > 10% of original length).
//
// Storage: ~/.cache/llm-security/mcp-descriptions.json
// TTL: 7 days per entry (stale entries purged on load).
//
// OWASP: MCP05 (Tool Description Manipulation / Rug Pull)
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { homedir } from 'node:os';
import { levenshtein } from './string-utils.mjs';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const CACHE_DIR = join(homedir(), '.cache', 'llm-security');
const CACHE_FILE = join(CACHE_DIR, 'mcp-descriptions.json');
const TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
const DRIFT_THRESHOLD = 0.10; // 10% Levenshtein distance relative to original length
// ---------------------------------------------------------------------------
// Cache structure
// ---------------------------------------------------------------------------
// {
// "mcp__server__tool": {
// "description": "original description text",
// "firstSeen": 1712345678000,
// "lastSeen": 1712345678000,
// "hash": "sha256-prefix (optional, for quick equality check)"
// }
// }
/**
* Load the cache from disk. Purges stale entries (older than TTL).
* Returns empty object if file doesn't exist or is corrupt.
* @param {object} [opts] - Options for testing
* @param {string} [opts.cacheFile] - Override cache file path
* @param {number} [opts.now] - Override current time
* @returns {Record<string, { description: string, firstSeen: number, lastSeen: number }>}
*/
export function loadCache(opts = {}) {
const cacheFile = opts.cacheFile ?? CACHE_FILE;
const now = opts.now ?? Date.now();
if (!existsSync(cacheFile)) return {};
try {
const raw = readFileSync(cacheFile, 'utf-8');
const data = JSON.parse(raw);
if (!data || typeof data !== 'object') return {};
// Purge stale entries
const cleaned = {};
for (const [key, entry] of Object.entries(data)) {
if (entry && typeof entry === 'object' && typeof entry.lastSeen === 'number') {
if (now - entry.lastSeen <= TTL_MS) {
cleaned[key] = entry;
}
}
}
return cleaned;
} catch {
return {};
}
}
/**
* Save the cache to disk. Creates the cache directory if needed.
* @param {Record<string, object>} cache
* @param {object} [opts]
* @param {string} [opts.cacheFile] - Override cache file path
*/
export function saveCache(cache, opts = {}) {
const cacheFile = opts.cacheFile ?? CACHE_FILE;
const dir = dirname(cacheFile);
try {
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
writeFileSync(cacheFile, JSON.stringify(cache, null, 2), 'utf-8');
} catch {
// Silently fail — drift detection is advisory, not critical
}
}
/**
* Check a tool description against the cached version.
*
* First call for a tool: caches the description, returns no drift.
* Subsequent calls: compares via Levenshtein distance.
*
* @param {string} toolName - Full tool name (e.g. "mcp__tavily__tavily_search")
* @param {string} description - Current tool description
* @param {object} [opts] - Options for testing
* @param {string} [opts.cacheFile] - Override cache file path
* @param {number} [opts.now] - Override current time
* @returns {{ drift: boolean, detail: string|null, distance: number, threshold: number, cached: string|null }}
*/
export function checkDescriptionDrift(toolName, description, opts = {}) {
const now = opts.now ?? Date.now();
const noDrift = { drift: false, detail: null, distance: 0, threshold: 0, cached: null };
if (!toolName || !description || typeof description !== 'string') {
return noDrift;
}
const cache = loadCache(opts);
const existing = cache[toolName];
if (!existing) {
// First time seeing this tool — cache it
cache[toolName] = {
description,
firstSeen: now,
lastSeen: now,
};
saveCache(cache, opts);
return noDrift;
}
// Update lastSeen
existing.lastSeen = now;
// Quick equality check
if (existing.description === description) {
saveCache(cache, opts);
return noDrift;
}
// Compute Levenshtein distance
const dist = levenshtein(existing.description, description);
const baseLen = Math.max(existing.description.length, 1);
const ratio = dist / baseLen;
const threshold = DRIFT_THRESHOLD;
if (ratio > threshold) {
// Drift detected — update cache to new description (the description has changed)
const cachedDesc = existing.description;
existing.description = description;
saveCache(cache, opts);
const pct = Math.round(ratio * 100);
return {
drift: true,
detail: `Tool "${toolName}" description changed by ${pct}% (${dist} edits / ${baseLen} chars). ` +
`Threshold: ${Math.round(threshold * 100)}%. This may indicate a rug-pull attack (OWASP MCP05).`,
distance: dist,
threshold,
cached: cachedDesc,
};
}
// Minor change below threshold — update cache silently
existing.description = description;
saveCache(cache, opts);
return { drift: false, detail: null, distance: dist, threshold, cached: null };
}
/**
* Extract MCP server name from a tool name.
* Convention: mcp__<server>__<tool>
* @param {string} toolName
* @returns {string|null}
*/
export function extractMcpServer(toolName) {
if (!toolName?.startsWith('mcp__')) return null;
const parts = toolName.split('__');
// mcp__server__tool → parts = ['mcp', 'server', 'tool']
return parts.length >= 3 ? parts[1] : null;
}
/**
* Clear the entire cache (for testing).
* @param {object} [opts]
* @param {string} [opts.cacheFile] - Override cache file path
*/
export function clearCache(opts = {}) {
saveCache({}, opts);
}
// ---------------------------------------------------------------------------
// Exported constants (for testing)
// ---------------------------------------------------------------------------
export { TTL_MS, DRIFT_THRESHOLD, CACHE_DIR, CACHE_FILE };

View file

@ -0,0 +1,177 @@
// output.mjs — Finding and result builders, JSON envelope
// Zero dependencies (uses severity.mjs).
import { riskScore, verdict, riskBand, owaspCategorize } from './severity.mjs';
let findingCounter = 0;
/**
* Reset the global finding counter.
* Called between scanner runs in the orchestrator and before each test.
*/
export function resetCounter() {
findingCounter = 0;
}
/**
* Create a finding object.
* @param {object} opts
* @param {string} opts.scanner - Scanner prefix (UNI, ENT, PRM, DEP, TNT, GIT, NET)
* @param {string} opts.severity - From SEVERITY constants
* @param {string} opts.title - Short finding title
* @param {string} opts.description - Detailed description
* @param {string} [opts.file] - Affected file path (relative)
* @param {number} [opts.line] - Line number
* @param {string} [opts.evidence] - Redacted evidence string
* @param {string} [opts.owasp] - OWASP reference (e.g. "LLM01")
* @param {string} [opts.recommendation] - Fix suggestion
* @returns {object}
*/
export function finding(opts) {
findingCounter++;
const id = `DS-${opts.scanner}-${String(findingCounter).padStart(3, '0')}`;
return {
id,
scanner: opts.scanner,
severity: opts.severity,
title: opts.title,
description: opts.description,
file: opts.file || null,
line: opts.line || null,
evidence: opts.evidence || null,
owasp: opts.owasp || null,
recommendation: opts.recommendation || null,
};
}
/**
* Create a scanner result envelope.
* @param {string} scannerName
* @param {'ok'|'error'|'skipped'} status
* @param {object[]} findings
* @param {number} filesScanned
* @param {number} durationMs
* @param {string} [errorMsg]
* @returns {object}
*/
export function scannerResult(scannerName, status, findings, filesScanned, durationMs, errorMsg) {
const counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
for (const f of findings) {
counts[f.severity] = (counts[f.severity] || 0) + 1;
}
const result = {
scanner: scannerName,
status,
files_scanned: filesScanned,
duration_ms: durationMs,
findings,
counts,
};
if (errorMsg) result.error = errorMsg;
return result;
}
/**
* Create a fix result object for the auto-cleaner.
* @param {object} opts
* @param {string} opts.finding_id - Original finding ID (e.g. "DS-UNI-001")
* @param {string} opts.file - Affected file path (relative)
* @param {string} opts.operation - Fix operation name (e.g. "strip_zero_width")
* @param {'applied'|'skipped'|'failed'} opts.status
* @param {string} opts.description - What was done
* @param {string} [opts.error] - Error message if failed
* @returns {object}
*/
export function fixResult(opts) {
const result = {
finding_id: opts.finding_id,
file: opts.file,
operation: opts.operation,
status: opts.status,
description: opts.description,
};
if (opts.error) result.error = opts.error;
return result;
}
/**
* Build the top-level output envelope for the auto-cleaner.
* @param {string} targetPath
* @param {boolean} dryRun
* @param {object[]} fixes - Array of fixResult objects
* @param {object[]} errors - Array of error objects
* @param {number} durationMs
* @returns {object}
*/
export function cleanEnvelope(targetPath, dryRun, fixes, errors, durationMs) {
const applied = fixes.filter(f => f.status === 'applied').length;
const skipped = fixes.filter(f => f.status === 'skipped').length;
const failed = fixes.filter(f => f.status === 'failed').length;
const filesModified = new Set(fixes.filter(f => f.status === 'applied').map(f => f.file)).size;
return {
meta: {
target: targetPath,
timestamp: new Date().toISOString(),
dry_run: dryRun,
duration_ms: durationMs,
},
summary: {
findings_received: fixes.length + errors.length,
fixes_applied: applied,
fixes_skipped: skipped,
fixes_failed: failed,
files_modified: filesModified,
},
fixes,
errors,
};
}
/**
* Build the top-level output envelope from all scanner results.
* @param {string} targetPath
* @param {Record<string, object>} scannerResults - keyed by scanner short name
* @param {number} totalDurationMs
* @returns {object}
*/
export function envelope(targetPath, scannerResults, totalDurationMs) {
const aggCounts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
const allFindings = [];
let totalFindings = 0;
let scannersOk = 0;
let scannersError = 0;
let scannersSkipped = 0;
for (const r of Object.values(scannerResults)) {
for (const sev of Object.keys(aggCounts)) {
aggCounts[sev] += r.counts[sev] || 0;
}
totalFindings += r.findings.length;
allFindings.push(...r.findings);
if (r.status === 'ok') scannersOk++;
else if (r.status === 'error') scannersError++;
else if (r.status === 'skipped') scannersSkipped++;
}
return {
meta: {
target: targetPath,
timestamp: new Date().toISOString(),
node_version: process.version,
total_duration_ms: totalDurationMs,
},
scanners: scannerResults,
aggregate: {
total_findings: totalFindings,
counts: aggCounts,
risk_score: riskScore(aggCounts),
risk_band: riskBand(riskScore(aggCounts)),
verdict: verdict(aggCounts),
owasp_breakdown: owaspCategorize(allFindings),
scanners_ok: scannersOk,
scanners_error: scannersError,
scanners_skipped: scannersSkipped,
},
};
}

View file

@ -0,0 +1,178 @@
// severity.mjs — Constants, risk score calculation, verdict logic
// Zero dependencies. Used by all scanners and the orchestrator.
export const SEVERITY = Object.freeze({
CRITICAL: 'critical',
HIGH: 'high',
MEDIUM: 'medium',
LOW: 'low',
INFO: 'info',
});
const SEVERITY_WEIGHTS = { critical: 25, high: 10, medium: 4, low: 1, info: 0 };
/**
* Calculate aggregate risk score from severity counts.
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {number} 0-100 capped score
*/
export function riskScore(counts) {
const raw =
(counts.critical || 0) * SEVERITY_WEIGHTS.critical +
(counts.high || 0) * SEVERITY_WEIGHTS.high +
(counts.medium || 0) * SEVERITY_WEIGHTS.medium +
(counts.low || 0) * SEVERITY_WEIGHTS.low +
(counts.info || 0) * SEVERITY_WEIGHTS.info;
return Math.min(raw, 100);
}
/**
* Derive verdict from severity counts and risk score.
* BLOCK if Critical >= 1 OR score >= 61. WARNING if High >= 1 OR score >= 21. Otherwise ALLOW.
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
* @returns {'BLOCK' | 'WARNING' | 'ALLOW'}
*/
export function verdict(counts) {
const score = riskScore(counts);
if ((counts.critical || 0) >= 1 || score >= 61) return 'BLOCK';
if ((counts.high || 0) >= 1 || score >= 21) return 'WARNING';
return 'ALLOW';
}
/**
* Map a 0-100 risk score to a human-readable risk band.
* @param {number} score - 0-100 risk score
* @returns {'Low' | 'Medium' | 'High' | 'Critical' | 'Extreme'}
*/
export function riskBand(score) {
if (score <= 20) return 'Low';
if (score <= 40) return 'Medium';
if (score <= 60) return 'High';
if (score <= 80) return 'Critical';
return 'Extreme';
}
/**
* Calculate A-F grade from posture/audit pass rate.
* @param {number} passRate - 0.0 to 1.0
* @param {number} failsInCritCats - Number of FAIL results in critical categories (1, 2, 5)
* @param {number} critCount - Number of Critical-severity findings
* @returns {'A' | 'B' | 'C' | 'D' | 'F'}
*/
export function gradeFromPassRate(passRate, failsInCritCats = 0, critCount = 0) {
if (passRate < 0.33 || critCount >= 3) return 'F';
if (passRate >= 0.89 && failsInCritCats === 0 && critCount === 0) return 'A';
if (passRate >= 0.72 && critCount === 0) return 'B';
if (passRate >= 0.56) return 'C';
if (passRate >= 0.33) return 'D';
return 'F';
}
/**
* Scanner prefix to OWASP LLM Top 10 category mapping.
*/
export const OWASP_MAP = Object.freeze({
UNI: ['LLM01'],
ENT: ['LLM01', 'LLM03'],
PRM: ['LLM06'],
DEP: ['LLM03'],
TNT: ['LLM01', 'LLM02'],
GIT: ['LLM03'],
NET: ['LLM02', 'LLM03'],
TFA: ['LLM01', 'LLM02', 'LLM06'],
MCI: ['LLM01', 'LLM02'],
MEM: ['LLM01'],
SCR: ['LLM03'],
PST: ['LLM01', 'LLM06'],
});
/**
* Scanner prefix to OWASP Agentic AI Top 10 (ASI) category mapping.
*/
export const OWASP_AGENTIC_MAP = Object.freeze({
UNI: ['ASI01'],
ENT: ['ASI01', 'ASI04'],
PRM: ['ASI02', 'ASI03'],
DEP: ['ASI04'],
TNT: ['ASI01', 'ASI05'],
GIT: ['ASI04'],
NET: ['ASI02', 'ASI05'],
TFA: ['ASI01', 'ASI02', 'ASI05'],
MCI: ['ASI01', 'ASI04'],
MEM: ['ASI01', 'ASI02'],
SCR: ['ASI04'],
PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'],
});
/**
* Scanner prefix to OWASP Skills Top 10 (AST) category mapping.
*/
export const OWASP_SKILLS_MAP = Object.freeze({
UNI: ['AST05'],
ENT: ['AST02', 'AST05'],
PRM: ['AST03'],
DEP: ['AST06'],
TNT: ['AST01', 'AST02'],
GIT: ['AST06'],
NET: ['AST02'],
TFA: ['AST01', 'AST02', 'AST03'],
MCI: ['AST01', 'AST02'],
MEM: ['AST01', 'AST05'],
SCR: ['AST06'],
PST: ['AST01', 'AST03'],
});
/**
* Scanner prefix to OWASP MCP Top 10 category mapping.
*/
export const OWASP_MCP_MAP = Object.freeze({
UNI: ['MCP06'],
ENT: ['MCP01', 'MCP06'],
PRM: ['MCP02', 'MCP07'],
DEP: ['MCP04'],
TNT: ['MCP05', 'MCP06'],
GIT: ['MCP04'],
NET: ['MCP02', 'MCP10'],
TFA: ['MCP03', 'MCP06'],
MCI: ['MCP03', 'MCP06', 'MCP09'],
MEM: ['MCP05', 'MCP06'],
SCR: ['MCP04'],
PST: ['MCP02', 'MCP07'],
});
/**
* Regex matching all supported OWASP framework prefixes:
* LLM01-LLM10, ASI01-ASI10, AST01-AST10, MCP01-MCP10 (MCP1-MCP10 also accepted).
*/
const OWASP_PREFIX_RE = /(?:LLM|ASI|AST|MCP)\d{1,2}/g;
/**
* Group findings by OWASP category across all frameworks.
* Uses each finding's `owasp` field if present, otherwise falls back to OWASP_MAP by scanner prefix.
* Recognizes LLM, ASI, AST, and MCP prefixes.
* @param {object[]} findings - Array of finding objects with scanner, owasp, and severity fields
* @returns {Record<string, { count: number, critical: number, high: number, medium: number, low: number, info: number }>}
*/
export function owaspCategorize(findings) {
const cats = {};
for (const f of findings) {
const categories = [];
if (f.owasp) {
const match = f.owasp.match(OWASP_PREFIX_RE);
if (match) categories.push(...match);
}
if (categories.length === 0 && f.scanner && OWASP_MAP[f.scanner]) {
categories.push(...OWASP_MAP[f.scanner]);
}
if (categories.length === 0) categories.push('Unmapped');
for (const cat of categories) {
if (!cats[cat]) cats[cat] = { count: 0, critical: 0, high: 0, medium: 0, low: 0, info: 0 };
cats[cat].count++;
if (f.severity && cats[cat][f.severity] !== undefined) {
cats[cat][f.severity]++;
}
}
}
return cats;
}

View file

@ -0,0 +1,462 @@
// skill-registry.mjs — Local database of known skill fingerprints and risk profiles.
// Fingerprints skills by SHA-256 of normalized content, stores scan results,
// enables instant re-scan detection and pattern search.
// Zero external dependencies.
import { createHash } from 'node:crypto';
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync } from 'node:fs';
import { join, resolve, relative, dirname, basename, extname } from 'node:path';
import { fileURLToPath } from 'node:url';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const REGISTRY_VERSION = '1';
const MAX_FILE_SIZE = 256 * 1024; // 256KB — skills are markdown, not binaries
const SCANNABLE_EXTENSIONS = new Set(['.md', '.mdx', '.json', '.mjs', '.js', '.ts', '.sh']);
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage']);
// Stale threshold — 7 days. If a cached scan is older than this,
// we suggest re-scanning but still return the cached result.
const STALE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000;
// ---------------------------------------------------------------------------
// Plugin root resolution
// ---------------------------------------------------------------------------
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const PLUGIN_ROOT = resolve(__dirname, '..', '..');
// ---------------------------------------------------------------------------
// Content normalization — same skill should produce same fingerprint
// regardless of trailing whitespace, line endings, or blank line count.
// ---------------------------------------------------------------------------
/**
* Normalize content for fingerprinting.
* - Normalize line endings to \n
* - Trim trailing whitespace from each line
* - Collapse multiple consecutive blank lines into one
* - Trim leading/trailing blank lines
* @param {string} content
* @returns {string}
*/
export function normalizeContent(content) {
return content
.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n')
.split('\n')
.map(line => line.trimEnd())
.join('\n')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
// ---------------------------------------------------------------------------
// File collection — gather all scannable files from a skill path
// ---------------------------------------------------------------------------
/**
* Recursively collect files from a directory.
* @param {string} dirPath - Absolute path to directory
* @param {string} basePath - Base path for relative path calculation
* @returns {{ relPath: string, content: string }[]}
*/
function collectFiles(dirPath, basePath) {
const files = [];
let entries;
try {
entries = readdirSync(dirPath, { withFileTypes: true });
} catch {
return files;
}
for (const entry of entries) {
const fullPath = join(dirPath, entry.name);
if (entry.isDirectory()) {
if (SKIP_DIRS.has(entry.name)) continue;
files.push(...collectFiles(fullPath, basePath));
continue;
}
if (!entry.isFile()) continue;
const ext = extname(entry.name).toLowerCase();
if (!SCANNABLE_EXTENSIONS.has(ext)) continue;
try {
const stat = statSync(fullPath);
if (stat.size > MAX_FILE_SIZE) continue;
const content = readFileSync(fullPath, 'utf8');
files.push({ relPath: relative(basePath, fullPath), content });
} catch {
continue;
}
}
return files;
}
// ---------------------------------------------------------------------------
// Fingerprinting
// ---------------------------------------------------------------------------
/**
* Generate a SHA-256 fingerprint for a skill.
*
* For a directory: collects all scannable files, sorts by relative path,
* normalizes each, and hashes the concatenation.
*
* For a single file: normalizes and hashes it directly.
*
* @param {string} skillPath - Absolute or relative path to skill file or directory
* @returns {{ fingerprint: string, files: string[], name: string }}
*/
export function fingerprintSkill(skillPath) {
const absPath = resolve(skillPath);
const hash = createHash('sha256');
let fileList = [];
let name = basename(absPath);
if (statSync(absPath).isDirectory()) {
const collected = collectFiles(absPath, absPath);
// Sort for determinism
collected.sort((a, b) => a.relPath.localeCompare(b.relPath));
for (const { relPath, content } of collected) {
fileList.push(relPath);
// Hash includes the relative path so renames change the fingerprint
hash.update(relPath + '\x00');
hash.update(normalizeContent(content) + '\x00');
}
// Try to extract skill name from SKILL.md or plugin.json
const skillMd = collected.find(f =>
f.relPath.toLowerCase().endsWith('skill.md') ||
f.relPath.toLowerCase().includes('/skill.md')
);
if (skillMd) {
const nameMatch = skillMd.content.match(/^#\s+(.+)/m);
if (nameMatch) name = nameMatch[1].trim();
}
const pluginJson = collected.find(f => f.relPath === 'plugin.json' || f.relPath.endsWith('/plugin.json'));
if (pluginJson) {
try {
const parsed = JSON.parse(pluginJson.content);
if (parsed.name) name = parsed.name;
} catch { /* ignore parse errors */ }
}
} else {
// Single file
const content = readFileSync(absPath, 'utf8');
fileList.push(basename(absPath));
hash.update(normalizeContent(content));
// Try to extract name from frontmatter
const nameMatch = content.match(/^name:\s*(.+)/m);
if (nameMatch) name = nameMatch[1].trim().replace(/^["']|["']$/g, '');
}
return {
fingerprint: hash.digest('hex'),
files: fileList,
name,
};
}
// ---------------------------------------------------------------------------
// Registry I/O
// ---------------------------------------------------------------------------
/**
* Default registry file path.
* @param {string} [pluginRoot]
* @returns {string}
*/
export function registryPath(pluginRoot) {
return join(pluginRoot || PLUGIN_ROOT, 'reports', 'skill-registry.json');
}
/**
* Seed registry file path (ships with plugin).
* @param {string} [pluginRoot]
* @returns {string}
*/
export function seedRegistryPath(pluginRoot) {
return join(pluginRoot || PLUGIN_ROOT, 'knowledge', 'skill-registry.json');
}
/**
* Create an empty registry structure.
* @returns {object}
*/
function emptyRegistry() {
return {
version: REGISTRY_VERSION,
updated: new Date().toISOString(),
entry_count: 0,
entries: {},
};
}
/**
* Load registry from disk. Merges seed data if available.
* Creates empty registry if file doesn't exist.
* @param {string} [pluginRoot]
* @returns {object}
*/
export function loadRegistry(pluginRoot) {
const filePath = registryPath(pluginRoot);
let registry;
if (existsSync(filePath)) {
try {
registry = JSON.parse(readFileSync(filePath, 'utf8'));
} catch {
registry = emptyRegistry();
}
} else {
registry = emptyRegistry();
}
// Merge seed data (seed entries never overwrite existing entries)
const seedPath = seedRegistryPath(pluginRoot);
if (existsSync(seedPath)) {
try {
const seeds = JSON.parse(readFileSync(seedPath, 'utf8'));
for (const [fp, entry] of Object.entries(seeds.entries || {})) {
if (!registry.entries[fp]) {
registry.entries[fp] = { ...entry, source_type: 'seed' };
}
}
} catch { /* ignore seed parse errors */ }
}
// Ensure entry_count is accurate
registry.entry_count = Object.keys(registry.entries).length;
return registry;
}
/**
* Save registry to disk.
* @param {object} registry
* @param {string} [pluginRoot]
* @returns {string} Path to saved file
*/
export function saveRegistry(registry, pluginRoot) {
const filePath = registryPath(pluginRoot);
const dir = dirname(filePath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
registry.updated = new Date().toISOString();
registry.entry_count = Object.keys(registry.entries).length;
writeFileSync(filePath, JSON.stringify(registry, null, 2) + '\n');
return filePath;
}
// ---------------------------------------------------------------------------
// Core operations
// ---------------------------------------------------------------------------
/**
* Check if a fingerprint exists in the registry.
* @param {string} fingerprint
* @param {string} [pluginRoot]
* @returns {{ found: boolean, entry: object|null, stale: boolean }}
*/
export function checkRegistry(fingerprint, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const entry = registry.entries[fingerprint] || null;
if (!entry) {
return { found: false, entry: null, stale: false };
}
const lastScanned = new Date(entry.last_scanned).getTime();
const stale = (Date.now() - lastScanned) > STALE_THRESHOLD_MS;
return { found: true, entry, stale };
}
/**
* Register a scan result for a skill.
* @param {object} opts
* @param {string} opts.skillPath - Path that was scanned
* @param {string} opts.fingerprint - From fingerprintSkill()
* @param {string} opts.name - Skill name
* @param {string[]} opts.files - Files included in fingerprint
* @param {string} opts.verdict - ALLOW|WARNING|BLOCK
* @param {number} opts.risk_score - 0-100
* @param {object} opts.counts - { critical, high, medium, low, info }
* @param {number} opts.files_scanned - Number of files scanned
* @param {string[]} [opts.tags] - Optional tags
* @param {string} [pluginRoot]
* @returns {{ entry: object, path: string }}
*/
export function registerScan(opts, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const existing = registry.entries[opts.fingerprint];
const entry = {
name: opts.name,
source: opts.skillPath,
fingerprint: opts.fingerprint,
first_seen: existing?.first_seen || new Date().toISOString(),
last_scanned: new Date().toISOString(),
scan_count: (existing?.scan_count || 0) + 1,
verdict: opts.verdict,
risk_score: opts.risk_score,
counts: opts.counts,
files_scanned: opts.files_scanned,
files_in_fingerprint: opts.files,
tags: opts.tags || existing?.tags || [],
source_type: 'scanned',
};
registry.entries[opts.fingerprint] = entry;
const savedPath = saveRegistry(registry, pluginRoot);
return { entry, path: savedPath };
}
/**
* Search the registry by name, source, or tag pattern.
* @param {string} pattern - Search pattern (case-insensitive substring match)
* @param {string} [pluginRoot]
* @returns {object[]} Matching entries
*/
export function searchRegistry(pattern, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const lower = pattern.toLowerCase();
const matches = [];
for (const entry of Object.values(registry.entries)) {
const searchable = [
entry.name || '',
entry.source || '',
...(entry.tags || []),
entry.fingerprint || '',
].join(' ').toLowerCase();
if (searchable.includes(lower)) {
matches.push(entry);
}
}
// Sort by last_scanned descending (most recent first)
matches.sort((a, b) => {
const aTime = new Date(b.last_scanned || 0).getTime();
const bTime = new Date(a.last_scanned || 0).getTime();
return aTime - bTime;
});
return matches;
}
/**
* Get registry statistics.
* @param {string} [pluginRoot]
* @returns {object}
*/
export function getStats(pluginRoot) {
const registry = loadRegistry(pluginRoot);
const entries = Object.values(registry.entries);
const stats = {
version: registry.version,
updated: registry.updated,
total_entries: entries.length,
by_verdict: { ALLOW: 0, WARNING: 0, BLOCK: 0 },
by_source_type: { scanned: 0, seed: 0 },
total_scans: 0,
stale_count: 0,
avg_risk_score: 0,
};
let riskSum = 0;
const now = Date.now();
for (const entry of entries) {
// By verdict
const v = entry.verdict || 'ALLOW';
stats.by_verdict[v] = (stats.by_verdict[v] || 0) + 1;
// By source type
const st = entry.source_type || 'scanned';
stats.by_source_type[st] = (stats.by_source_type[st] || 0) + 1;
// Scan count
stats.total_scans += entry.scan_count || 0;
// Risk score
riskSum += entry.risk_score || 0;
// Stale check
const lastScanned = new Date(entry.last_scanned || 0).getTime();
if ((now - lastScanned) > STALE_THRESHOLD_MS) {
stats.stale_count++;
}
}
stats.avg_risk_score = entries.length > 0
? Math.round(riskSum / entries.length)
: 0;
return stats;
}
/**
* Remove an entry from the registry by fingerprint.
* @param {string} fingerprint
* @param {string} [pluginRoot]
* @returns {boolean} true if entry was found and removed
*/
export function removeEntry(fingerprint, pluginRoot) {
const registry = loadRegistry(pluginRoot);
if (!registry.entries[fingerprint]) return false;
delete registry.entries[fingerprint];
saveRegistry(registry, pluginRoot);
return true;
}
/**
* List all entries, optionally filtered by verdict.
* @param {object} [opts]
* @param {string} [opts.verdict] - Filter by verdict (ALLOW|WARNING|BLOCK)
* @param {boolean} [opts.staleOnly] - Only return stale entries
* @param {string} [pluginRoot]
* @returns {object[]}
*/
export function listEntries(opts, pluginRoot) {
const registry = loadRegistry(pluginRoot);
let entries = Object.values(registry.entries);
const now = Date.now();
if (opts?.verdict) {
entries = entries.filter(e => e.verdict === opts.verdict);
}
if (opts?.staleOnly) {
entries = entries.filter(e => {
const lastScanned = new Date(e.last_scanned || 0).getTime();
return (now - lastScanned) > STALE_THRESHOLD_MS;
});
}
// Sort by last_scanned descending
entries.sort((a, b) =>
new Date(b.last_scanned || 0).getTime() - new Date(a.last_scanned || 0).getTime()
);
return entries;
}

View file

@ -0,0 +1,322 @@
// string-utils.mjs — Entropy, Levenshtein, base64 detection, redaction, decoding
// Zero dependencies.
/**
* Shannon entropy of a string (bits per character).
* @param {string} s
* @returns {number}
*/
export function shannonEntropy(s) {
if (s.length === 0) return 0;
const freq = new Map();
for (const ch of s) {
freq.set(ch, (freq.get(ch) || 0) + 1);
}
let H = 0;
const len = s.length;
for (const count of freq.values()) {
const p = count / len;
H -= p * Math.log2(p);
}
return H;
}
/**
* Levenshtein edit distance between two strings.
* @param {string} a
* @param {string} b
* @returns {number}
*/
export function levenshtein(a, b) {
if (a === b) return 0;
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
const m = a.length;
const n = b.length;
// Single-row optimization
let prev = new Array(n + 1);
let curr = new Array(n + 1);
for (let j = 0; j <= n; j++) prev[j] = j;
for (let i = 1; i <= m; i++) {
curr[0] = i;
for (let j = 1; j <= n; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
curr[j] = Math.min(
prev[j] + 1, // deletion
curr[j - 1] + 1, // insertion
prev[j - 1] + cost // substitution
);
}
[prev, curr] = [curr, prev];
}
return prev[n];
}
/**
* Check if a string looks like base64-encoded data.
* @param {string} s
* @returns {boolean}
*/
export function isBase64Like(s) {
if (s.length < 20) return false;
// Must be mostly base64 chars and optionally end with =
return /^[A-Za-z0-9+/]{20,}={0,3}$/.test(s);
}
/**
* Check if a string looks like a hex-encoded blob.
* @param {string} s
* @returns {boolean}
*/
export function isHexBlob(s) {
if (s.length < 32) return false;
return /^(0x)?[0-9a-fA-F]{32,}$/.test(s);
}
/**
* Redact a string for safe display show first 8 and last 4 chars.
* @param {string} s
* @param {number} [showStart=8]
* @param {number} [showEnd=4]
* @returns {string}
*/
export function redact(s, showStart = 8, showEnd = 4) {
if (s.length <= showStart + showEnd + 3) return s;
return `${s.slice(0, showStart)}...${s.slice(-showEnd)}`;
}
/**
* Extract string literals from a line of code.
* Handles single-quoted, double-quoted, and backtick strings.
* @param {string} line
* @returns {string[]}
*/
export function extractStringLiterals(line) {
const results = [];
const regex = /(?:"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)'|`([^`\\]*(?:\\.[^`\\]*)*)`)/g;
let match;
while ((match = regex.exec(line)) !== null) {
results.push(match[1] ?? match[2] ?? match[3]);
}
return results;
}
// ---------------------------------------------------------------------------
// Encoding/obfuscation decoders
// ---------------------------------------------------------------------------
/**
* Decode JavaScript/Unicode escape sequences: \uXXXX and \u{XXXXX}.
* @param {string} s
* @returns {string}
*/
export function decodeUnicodeEscapes(s) {
return s
.replace(/\\u\{([0-9a-fA-F]{1,6})\}/g, (_, hex) => {
const cp = parseInt(hex, 16);
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
})
.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
String.fromCodePoint(parseInt(hex, 16))
);
}
/**
* Decode hex escape sequences: \xXX.
* @param {string} s
* @returns {string}
*/
export function decodeHexEscapes(s) {
return s.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) =>
String.fromCharCode(parseInt(hex, 16))
);
}
/**
* Decode URL percent-encoding: %XX.
* Uses decodeURIComponent with fallback for malformed sequences.
* @param {string} s
* @returns {string}
*/
export function decodeUrlEncoding(s) {
// Fast path: no percent signs means nothing to decode
if (!s.includes('%')) return s;
try {
return decodeURIComponent(s);
} catch {
// Malformed sequences — decode individual %XX pairs
return s.replace(/%([0-9a-fA-F]{2})/g, (_, hex) =>
String.fromCharCode(parseInt(hex, 16))
);
}
}
/**
* Attempt to decode a base64 string to UTF-8 text.
* Returns null if the input is not base64-like or decoded result is not readable text.
* @param {string} s
* @returns {string|null}
*/
export function tryDecodeBase64(s) {
if (!isBase64Like(s)) return null;
try {
const decoded = Buffer.from(s, 'base64').toString('utf-8');
// Check if result is mostly printable text (>= 80% printable ASCII)
const printable = decoded.replace(/[^\x20-\x7E\n\r\t]/g, '').length;
if (decoded.length === 0 || printable / decoded.length < 0.8) return null;
return decoded;
} catch {
return null;
}
}
/**
* Decode HTML entities: named (&lt; &gt; &amp; &quot; &apos;),
* decimal (&#105;), and hex (&#x69;).
* @param {string} s
* @returns {string}
*/
export function decodeHtmlEntities(s) {
if (!s.includes('&')) return s;
const NAMED = {
'&lt;': '<', '&gt;': '>', '&amp;': '&', '&quot;': '"', '&apos;': "'",
'&nbsp;': ' ', '&tab;': '\t', '&newline;': '\n',
'&lpar;': '(', '&rpar;': ')', '&lsqb;': '[', '&rsqb;': ']',
'&lcub;': '{', '&rcub;': '}', '&sol;': '/', '&bsol;': '\\',
'&colon;': ':', '&semi;': ';', '&comma;': ',', '&period;': '.',
'&excl;': '!', '&quest;': '?', '&num;': '#', '&percnt;': '%',
'&equals;': '=', '&plus;': '+', '&minus;': '-', '&ast;': '*',
'&vert;': '|', '&tilde;': '~', '&grave;': '`', '&Hat;': '^',
'&lowbar;': '_', '&at;': '@', '&dollar;': '$',
};
return s
.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, hex) => {
const cp = parseInt(hex, 16);
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
})
.replace(/&#(\d{1,7});/g, (_, dec) => {
const cp = parseInt(dec, 10);
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
})
.replace(/&[a-zA-Z]{2,8};/g, (entity) => NAMED[entity] ?? entity);
}
/**
* Collapse letter-spaced text: "i g n o r e" "ignore".
* Only collapses runs of single letters separated by spaces/tabs.
* Minimum 4 letters to avoid false positives on normal text.
* @param {string} s
* @returns {string}
*/
export function collapseLetterSpacing(s) {
// Match 4+ single-letter tokens separated by 1+ spaces/tabs
return s.replace(/\b([a-zA-Z]) (?:[a-zA-Z] ){2,}[a-zA-Z]\b/g, (match) =>
match.replace(/ /g, '')
);
}
// ---------------------------------------------------------------------------
// Unicode Tags steganography (U+E0000 block) — DeepMind traps kat. 1
// ---------------------------------------------------------------------------
/**
* Decode Unicode Tags steganography: U+E0001-E007F ASCII.
* Unicode Tags (U+E0000 block) can encode invisible ASCII text inside
* what appears to be empty or normal-looking strings.
* E.g., U+E0069 U+E0067 U+E006E "ign"
* @param {string} s
* @returns {string}
*/
export function decodeUnicodeTags(s) {
let result = '';
let decoded = '';
let inTagSequence = false;
for (const ch of s) {
const cp = ch.codePointAt(0);
if (cp >= 0xE0001 && cp <= 0xE007F) {
// Tag character — map to ASCII (subtract 0xE0000)
decoded += String.fromCharCode(cp - 0xE0000);
inTagSequence = true;
} else {
if (inTagSequence && decoded.length > 0) {
result += decoded;
decoded = '';
inTagSequence = false;
}
result += ch;
}
}
// Flush remaining tag sequence
if (decoded.length > 0) {
result += decoded;
}
return result;
}
/**
* Check if a string contains Unicode Tag characters (U+E0001-E007F).
* Presence of these characters is suspicious regardless of decoded content.
* @param {string} s
* @returns {boolean}
*/
export function containsUnicodeTags(s) {
for (const ch of s) {
const cp = ch.codePointAt(0);
if (cp >= 0xE0001 && cp <= 0xE007F) return true;
}
return false;
}
// ---------------------------------------------------------------------------
// BIDI override stripping
// ---------------------------------------------------------------------------
/**
* Strip BIDI override characters that can reorder text visually.
* U+202A (LRE), U+202B (RLE), U+202C (PDF), U+202D (LRO), U+202E (RLO),
* U+2066 (LRI), U+2067 (RLI), U+2068 (FSI), U+2069 (PDI).
* These can hide injection by making text render differently than it parses.
* @param {string} s
* @returns {string}
*/
export function stripBidiOverrides(s) {
return s.replace(/[\u202A-\u202E\u2066-\u2069]/g, '');
}
/**
* Normalize a string by decoding all known obfuscation layers.
* Runs up to 3 iterations to catch multi-layered encoding (e.g., base64 of URL-encoded).
* Order per iteration: Unicode Tags -> BIDI strip -> HTML entities -> unicode escapes ->
* hex escapes -> URL encoding -> base64.
* After decoding: collapse letter-spaced text.
* @param {string} s
* @returns {string}
*/
export function normalizeForScan(s) {
let result = s;
const MAX_ITERATIONS = 3;
// Pre-decode: Unicode Tags and BIDI overrides (before the main loop)
result = decodeUnicodeTags(result);
result = stripBidiOverrides(result);
for (let i = 0; i < MAX_ITERATIONS; i++) {
const prev = result;
result = decodeHtmlEntities(result);
result = decodeUnicodeEscapes(result);
result = decodeHexEscapes(result);
result = decodeUrlEncoding(result);
const b64decoded = tryDecodeBase64(result);
if (b64decoded) result = b64decoded;
// Stable — no further decoding possible
if (result === prev) break;
}
// Post-decode: collapse letter-spaced evasion
result = collapseLetterSpacing(result);
return result;
}

View file

@ -0,0 +1,284 @@
// supply-chain-data.mjs — Shared blocklists, parsers, and OSV.dev API for supply chain checks
// Used by: pre-install-supply-chain.mjs (hook) and supply-chain-recheck.mjs (scanner)
// Zero external dependencies (Node.js builtins only).
import { execSync } from 'node:child_process';
// ---------------------------------------------------------------------------
// Cross-platform HTTP helper (replaces curl subprocess)
// ---------------------------------------------------------------------------
/**
* Fetch JSON from a URL with timeout. Cross-platform (no curl dependency).
* @param {string} url
* @param {object} [options] - fetch options (method, headers, body)
* @param {number} [timeoutMs=8000]
* @returns {Promise<object|null>} Parsed JSON or null on failure
*/
async function fetchJSON(url, options = {}, timeoutMs = 8000) {
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const res = await fetch(url, { ...options, signal: controller.signal });
clearTimeout(timer);
if (!res.ok) return null;
return await res.json();
} catch {
return null;
}
}
// ===========================================================================
// Age threshold for new package detection (hours)
// ===========================================================================
export const AGE_THRESHOLD_HOURS = 72;
// ===========================================================================
// KNOWN COMPROMISED — curated blocklists per ecosystem
// '*' = all versions blocked (entirely malicious package)
// ===========================================================================
export const NPM_COMPROMISED = {
'axios': ['1.14.1', '0.30.4'],
'event-stream': ['3.3.6'],
'ua-parser-js': ['0.7.29', '0.8.0', '1.0.0'],
'coa': ['2.0.3', '2.0.4', '2.1.1', '2.1.3'],
'rc': ['1.2.9', '1.3.9', '2.3.9'],
'colors': ['1.4.1', '1.4.2'],
'faker': ['6.6.6'],
'node-ipc': ['10.1.1', '10.1.2', '10.1.3', '11.0.0', '11.1.0'],
'peacenotwar': ['*'],
'plain-crypto-js': ['*'],
};
export const PIP_COMPROMISED = {
'colourama': ['*'],
'python3-dateutil': ['*'],
'jeIlyfish': ['*'],
'python-binance': ['*'],
'openai-api': ['*'],
'requesocks': ['*'],
'python-mongo': ['*'],
'nmap-python': ['*'],
'beautifulsoup': ['*'],
'djanga': ['*'],
'httpslib2': ['*'],
'urllib4': ['*'],
'pipsqlite3': ['*'],
'torlogging': ['*'],
'flasck': ['*'],
'matploltlib': ['*'],
'discordi': ['*'],
'numpyi': ['*'],
'pycryptdome': ['*'],
};
export const CARGO_COMPROMISED = {
'rustdecimal': ['*'],
'cratesio': ['*'],
};
export const GEM_COMPROMISED = {
'rest-client': ['1.6.13'],
'strong_password': ['0.0.7'],
'bootstrap-sass': ['3.2.0.3'],
};
export const DOCKER_SUSPICIOUS = [
/xmrig/i,
/cryptonight/i,
/monero-?miner/i,
/coin-?hive/i,
];
// Popular PyPI packages for typosquat detection (used by hook)
export const POPULAR_PIP = [
'requests', 'flask', 'django', 'numpy', 'pandas', 'scipy', 'matplotlib',
'tensorflow', 'torch', 'opencv-python', 'pillow', 'beautifulsoup4',
'sqlalchemy', 'celery', 'redis', 'boto3', 'openai', 'anthropic',
'fastapi', 'uvicorn', 'pydantic', 'httpx', 'aiohttp', 'colorama',
'cryptography', 'pycryptodome', 'paramiko', 'fabric', 'pytest',
'setuptools', 'pip', 'wheel', 'twine', 'black', 'mypy', 'ruff',
'python-dateutil', 'jellyfish', 'pymongo', 'psycopg2', 'python-nmap',
'discord.py', 'selenium', 'scrapy', 'lxml', 'pyyaml',
];
// ===========================================================================
// Helper functions
// ===========================================================================
/**
* Check if a package name+version is on a compromised blocklist.
* @param {Record<string, string[]>} list - Blocklist object
* @param {string} name - Package name
* @param {string|null} version - Package version (null = any)
* @returns {boolean}
*/
export function isCompromised(list, name, version) {
const bad = list[name];
if (!bad) return false;
if (bad.includes('*')) return true;
if (version && bad.includes(version)) return true;
return false;
}
/**
* Parse an npm package specifier (e.g. "@scope/pkg@1.0.0" or "pkg@1.0.0").
* @param {string} spec
* @returns {{ name: string, version: string|null }}
*/
export function parseSpec(spec) {
if (spec.startsWith('@')) {
const rest = spec.slice(1);
const atIdx = rest.lastIndexOf('@');
if (atIdx > 0) return { name: '@' + rest.slice(0, atIdx), version: rest.slice(atIdx + 1) };
return { name: spec, version: null };
}
const atIdx = spec.lastIndexOf('@');
if (atIdx > 0) return { name: spec.slice(0, atIdx), version: spec.slice(atIdx + 1) };
return { name: spec, version: null };
}
/**
* Parse a pip package specifier (e.g. "requests==2.28.0" or "flask>=2.0").
* @param {string} spec
* @returns {{ name: string, version: string|null }}
*/
export function parsePipSpec(spec) {
const eqIdx = spec.indexOf('==');
if (eqIdx > 0) return { name: spec.slice(0, eqIdx), version: spec.slice(eqIdx + 2) };
const match = spec.match(/^([a-zA-Z0-9_.-]+)/);
return { name: match ? match[1] : spec, version: null };
}
/**
* Execute a shell command safely with timeout.
* @param {string} cmd
* @param {number} [timeoutMs=10000]
* @returns {string|null}
*/
export function execSafe(cmd, timeoutMs = 10000) {
try {
return execSync(cmd, { timeout: timeoutMs, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] });
} catch (err) {
return err.stdout || null;
}
}
// ===========================================================================
// OSV.dev API — unified vulnerability database
// ===========================================================================
/** Map ecosystem names to OSV format. */
export const OSV_ECOSYSTEM_MAP = {
npm: 'npm',
pip: 'PyPI',
cargo: 'crates.io',
gem: 'RubyGems',
go: 'Go',
};
/**
* Extract severity from an OSV vulnerability record.
* @param {object} vuln - OSV vulnerability object
* @returns {string} - 'CRITICAL', 'HIGH', or 'MEDIUM'
*/
export function extractOSVSeverity(vuln) {
const dbSev = vuln.database_specific?.severity;
if (dbSev) return dbSev.toUpperCase();
const ecoSev = vuln.ecosystem_specific?.severity;
if (ecoSev) return ecoSev.toUpperCase();
for (const sev of vuln.severity || []) {
if (sev.score && typeof sev.score === 'number') {
if (sev.score >= 9.0) return 'CRITICAL';
if (sev.score >= 7.0) return 'HIGH';
return 'MEDIUM';
}
}
if (vuln.id?.startsWith('GHSA') || vuln.id?.startsWith('CVE')) return 'HIGH';
return 'MEDIUM';
}
/**
* Query OSV.dev for vulnerabilities on a single package version.
* Used by the hook (real-time, single package).
* @param {string} ecosystem - 'npm', 'pip', 'cargo', 'gem', 'go'
* @param {string} name
* @param {string} version
* @returns {Promise<{ critical: object[], high: object[] }>}
*/
export async function queryOSV(ecosystem, name, version) {
const critical = [];
const high = [];
const osvEcosystem = OSV_ECOSYSTEM_MAP[ecosystem];
if (!osvEcosystem) return { critical, high };
try {
const result = await fetchJSON('https://api.osv.dev/v1/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
version,
package: { name, ecosystem: osvEcosystem },
}),
}, 8000);
if (!result) return { critical, high };
for (const vuln of result.vulns || []) {
const severity = extractOSVSeverity(vuln);
const entry = {
id: vuln.id,
summary: (vuln.summary || vuln.details || 'No description').slice(0, 120),
severity,
};
if (severity === 'CRITICAL') critical.push(entry);
else if (severity === 'HIGH') high.push(entry);
}
} catch { /* network error — fail open */ }
return { critical, high };
}
/**
* Query OSV.dev batch API for multiple packages at once.
* Used by the scanner (periodic re-check of all lockfile deps).
* Falls back gracefully if network is unavailable.
* @param {{ ecosystem: string, name: string, version: string }[]} packages
* @returns {Promise<{ results: Array<{ vulns: object[] }>, offline: boolean }>}
*/
export async function queryOSVBatch(packages) {
if (packages.length === 0) return { results: [], offline: false };
const queries = packages.map(pkg => ({
version: pkg.version,
package: { name: pkg.name, ecosystem: OSV_ECOSYSTEM_MAP[pkg.ecosystem] || pkg.ecosystem },
}));
// OSV batch API accepts max 1000 queries per request
const BATCH_SIZE = 1000;
const allResults = [];
for (let i = 0; i < queries.length; i += BATCH_SIZE) {
const batch = queries.slice(i, i + BATCH_SIZE);
try {
const result = await fetchJSON('https://api.osv.dev/v1/querybatch', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ queries: batch }),
}, 15000);
if (!result) return { results: [], offline: true };
allResults.push(...(result.results || []));
} catch {
return { results: [], offline: true };
}
}
return { results: allResults, offline: false };
}

View file

@ -0,0 +1,90 @@
// yaml-frontmatter.mjs — Regex-based YAML frontmatter parser
// Handles Claude Code plugin command/agent/skill frontmatter.
// Zero dependencies.
/**
* Parse YAML frontmatter from a markdown file.
* Returns null if no frontmatter found.
*
* @param {string} content - File content
* @returns {{ name?: string, description?: string, model?: string, color?: string,
* tools?: string[], allowed_tools?: string[] } | null}
*/
export function parseFrontmatter(content) {
// Match --- delimited block at start of file
const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
if (!match) return null;
const block = match[1];
const result = {};
// Parse simple key: value pairs
for (const line of block.split('\n')) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
// Handle key: value
const kvMatch = trimmed.match(/^(\w[\w-]*)\s*:\s*(.*)$/);
if (!kvMatch) continue;
const [, key, rawValue] = kvMatch;
let value = rawValue.trim();
// Strip quotes
if ((value.startsWith('"') && value.endsWith('"')) ||
(value.startsWith("'") && value.endsWith("'"))) {
value = value.slice(1, -1);
}
// Handle inline arrays: [Read, Write, Bash]
if (value.startsWith('[') && value.endsWith(']')) {
value = value.slice(1, -1).split(',').map(s => s.trim().replace(/^["']|["']$/g, ''));
}
// Handle multi-line description with |
if (value === '|' || value === '>') {
const descLines = [];
const lines = block.split('\n');
const lineIdx = lines.indexOf(line);
for (let i = lineIdx + 1; i < lines.length; i++) {
const dLine = lines[i];
if (/^\S/.test(dLine) && !dLine.startsWith(' ') && !dLine.startsWith('\t')) break;
descLines.push(dLine.replace(/^ /, ''));
}
value = descLines.join('\n').trim();
}
// Normalize key names
const normalizedKey = key.replace(/-/g, '_');
result[normalizedKey] = value;
}
// Parse tools from allowed-tools (comma-separated string) or tools (array)
if (typeof result.allowed_tools === 'string') {
result.allowed_tools = result.allowed_tools.split(',').map(s => s.trim());
}
if (typeof result.tools === 'string') {
result.tools = result.tools.split(',').map(s => s.trim());
}
return Object.keys(result).length > 0 ? result : null;
}
/**
* Classify a plugin file by its path and frontmatter.
* @param {string} relPath - Relative path within plugin
* @param {object|null} frontmatter - Parsed frontmatter
* @returns {'command' | 'agent' | 'skill' | 'hook-config' | 'knowledge' | 'template' | 'unknown'}
*/
export function classifyPluginFile(relPath, frontmatter) {
const lower = relPath.toLowerCase();
if (lower.includes('/commands/') || lower.startsWith('commands/')) return 'command';
if (lower.includes('/agents/') || lower.startsWith('agents/')) return 'agent';
if (lower.includes('/skills/') || lower.startsWith('skills/') || lower.endsWith('skill.md')) return 'skill';
if (lower.endsWith('hooks.json') || lower.includes('/hooks/')) return 'hook-config';
if (lower.includes('/knowledge/') || lower.startsWith('knowledge/')) return 'knowledge';
if (lower.includes('/templates/') || lower.startsWith('templates/')) return 'template';
if (frontmatter?.name && frontmatter?.allowed_tools) return 'command';
if (frontmatter?.name && frontmatter?.tools) return 'agent';
return 'unknown';
}