feat(llm-security-copilot): port llm-security v5.1.0 to GitHub Copilot CLI
Full port of llm-security plugin for internal use on Windows with GitHub Copilot CLI. Protocol translation layer (copilot-hook-runner.mjs) normalizes Copilot camelCase I/O to Claude Code snake_case format — all original hook scripts run unmodified. - 8 hooks with protocol translation (stdin/stdout/exit code) - 18 SKILL.md skills (Agent Skills Open Standard) - 6 .agent.md agent definitions - 20 scanners + 14 scanner lib modules (unchanged) - 14 knowledge files (unchanged) - 39 test files including copilot-port-verify.mjs (17 tests) - Windows-ready: node:path, os.tmpdir(), process.execPath, no bash Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
901bf0ae12
commit
f418a8fe08
169 changed files with 37631 additions and 0 deletions
54
plugins/llm-security-copilot/scanners/lib/bash-normalize.mjs
Normal file
54
plugins/llm-security-copilot/scanners/lib/bash-normalize.mjs
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
// bash-normalize.mjs — Normalize bash parameter expansion evasion techniques.
|
||||
//
|
||||
// Attackers can evade command-name matching by inserting shell metacharacters
|
||||
// that are transparent to bash but break regex patterns.
|
||||
//
|
||||
// This module strips these constructs from command names so that downstream
|
||||
// pattern matching sees the canonical form.
|
||||
//
|
||||
// Exported as a shared module — used by pre-bash-destructive.mjs and
|
||||
// pre-install-supply-chain.mjs.
|
||||
|
||||
/**
|
||||
* Normalize bash parameter expansion and quoting evasion in a command string.
|
||||
*
|
||||
* Strips:
|
||||
* - Empty single quotes: '' (e.g., w''get -> wget)
|
||||
* - Empty double quotes: "" (e.g., r""m -> rm)
|
||||
* - Single-char parameter expansion: ${x} -> x (evasion: attacker sets x=x)
|
||||
* - Multi-char parameter expansion: ${ANYTHING} -> '' (unknown value)
|
||||
* - Backslash escapes between word chars, iteratively (c\u\r\l -> curl)
|
||||
* - Backtick subshell with empty/whitespace content
|
||||
*
|
||||
* Does NOT strip:
|
||||
* - Quotes around arguments (only targets empty quotes that split command names)
|
||||
* - $VAR without braces (not an evasion pattern)
|
||||
* - Backslashes before non-word chars (\n, \t, etc.)
|
||||
*
|
||||
* @param {string} cmd - Raw command string
|
||||
* @returns {string} Normalized command string
|
||||
*/
|
||||
export function normalizeBashExpansion(cmd) {
|
||||
if (!cmd || typeof cmd !== 'string') return cmd || '';
|
||||
|
||||
let result = cmd
|
||||
// Strip empty single quotes: w''get -> wget
|
||||
.replace(/''/g, '')
|
||||
// Strip empty double quotes: r""m -> rm
|
||||
.replace(/""/g, '')
|
||||
// Single-char ${x} -> x (evasion: c${u}rl -> curl, assumes x=x)
|
||||
.replace(/\$\{(\w)\}/g, '$1')
|
||||
// Multi-char ${ANYTHING} -> '' (unknown value, strip entirely)
|
||||
.replace(/\$\{[^}]*\}/g, '')
|
||||
// Strip backtick subshell with empty/whitespace content
|
||||
.replace(/`\s*`/g, '');
|
||||
|
||||
// Iteratively strip backslash between word chars (c\u\r\l needs 2 passes)
|
||||
let prev;
|
||||
do {
|
||||
prev = result;
|
||||
result = result.replace(/(\w)\\(\w)/g, '$1$2');
|
||||
} while (result !== prev);
|
||||
|
||||
return result;
|
||||
}
|
||||
276
plugins/llm-security-copilot/scanners/lib/diff-engine.mjs
Normal file
276
plugins/llm-security-copilot/scanners/lib/diff-engine.mjs
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
// diff-engine.mjs — Baseline storage, finding fingerprinting, and diff categorization.
|
||||
// Compares scan results against a stored baseline to classify findings as:
|
||||
// new — present in current scan, absent from baseline
|
||||
// resolved — present in baseline, absent from current scan
|
||||
// unchanged — matched between baseline and current (line drift ≤3)
|
||||
// moved — same finding, different location (line drift >3 or file renamed)
|
||||
// Zero external dependencies.
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { join, resolve } from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
const LINE_FUZZY_THRESHOLD = 3; // ±3 lines = unchanged, >3 = moved
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Target hashing — deterministic key for baseline storage
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a stable hash for a target path to use as baseline filename.
|
||||
* Uses the resolved absolute path so the same directory always maps
|
||||
* to the same baseline regardless of how it was referenced.
|
||||
* @param {string} targetPath
|
||||
* @returns {string} 12-char hex hash
|
||||
*/
|
||||
export function targetHash(targetPath) {
|
||||
const resolved = resolve(targetPath);
|
||||
return createHash('sha256').update(resolved).digest('hex').slice(0, 12);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Finding fingerprinting — identity that survives line drift
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Generate a stable fingerprint for a finding.
|
||||
* Combines scanner prefix + file + title + evidence to create an identity
|
||||
* that is independent of line number (line drift is handled separately).
|
||||
* @param {object} finding - A finding object from output.mjs
|
||||
* @returns {string} hex fingerprint
|
||||
*/
|
||||
export function fingerprintFinding(finding) {
|
||||
const parts = [
|
||||
finding.scanner || '',
|
||||
finding.file || '',
|
||||
finding.title || '',
|
||||
// Evidence provides content-level identity — two different findings
|
||||
// in the same file with different evidence are distinct findings.
|
||||
finding.evidence || '',
|
||||
];
|
||||
return createHash('sha256').update(parts.join('\x00')).digest('hex').slice(0, 16);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Baseline I/O
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve the baseline file path for a given target.
|
||||
* @param {string} baselinesDir - Path to reports/baselines/
|
||||
* @param {string} targetPath
|
||||
* @returns {string} Full path to baseline JSON file
|
||||
*/
|
||||
export function baselinePath(baselinesDir, targetPath) {
|
||||
return join(baselinesDir, `${targetHash(targetPath)}.json`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save scan results as a baseline.
|
||||
* @param {string} baselinesDir - Path to reports/baselines/
|
||||
* @param {string} targetPath - The scanned target
|
||||
* @param {object} scanEnvelope - Full scan output envelope from scan-orchestrator
|
||||
* @returns {string} Path to saved baseline file
|
||||
*/
|
||||
export function saveBaseline(baselinesDir, targetPath, scanEnvelope) {
|
||||
if (!existsSync(baselinesDir)) {
|
||||
mkdirSync(baselinesDir, { recursive: true });
|
||||
}
|
||||
const filePath = baselinePath(baselinesDir, targetPath);
|
||||
|
||||
// Store a compact baseline: metadata + fingerprinted findings
|
||||
const baseline = {
|
||||
meta: {
|
||||
target: scanEnvelope.meta.target,
|
||||
timestamp: scanEnvelope.meta.timestamp,
|
||||
version: '1', // baseline format version
|
||||
},
|
||||
aggregate: scanEnvelope.aggregate,
|
||||
findings: extractFindings(scanEnvelope),
|
||||
};
|
||||
|
||||
writeFileSync(filePath, JSON.stringify(baseline, null, 2) + '\n');
|
||||
return filePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a baseline from disk.
|
||||
* @param {string} baselinesDir
|
||||
* @param {string} targetPath
|
||||
* @returns {object|null} Baseline object or null if not found
|
||||
*/
|
||||
export function loadBaseline(baselinesDir, targetPath) {
|
||||
const filePath = baselinePath(baselinesDir, targetPath);
|
||||
if (!existsSync(filePath)) return null;
|
||||
try {
|
||||
return JSON.parse(readFileSync(filePath, 'utf8'));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Finding extraction — flatten all scanner results into fingerprinted list
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract all findings from a scan envelope, adding fingerprints.
|
||||
* @param {object} scanEnvelope
|
||||
* @returns {object[]} Array of { fingerprint, scanner, severity, title, file, line, evidence, owasp, recommendation }
|
||||
*/
|
||||
export function extractFindings(scanEnvelope) {
|
||||
const findings = [];
|
||||
for (const [scannerName, result] of Object.entries(scanEnvelope.scanners || {})) {
|
||||
for (const f of result.findings || []) {
|
||||
findings.push({
|
||||
fingerprint: fingerprintFinding(f),
|
||||
scanner: f.scanner || scannerName.toUpperCase().slice(0, 3),
|
||||
severity: f.severity,
|
||||
title: f.title,
|
||||
file: f.file || null,
|
||||
line: f.line || null,
|
||||
evidence: f.evidence || null,
|
||||
owasp: f.owasp || null,
|
||||
recommendation: f.recommendation || null,
|
||||
});
|
||||
}
|
||||
}
|
||||
return findings;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Diff algorithm
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Compare current scan findings against a baseline.
|
||||
*
|
||||
* Matching strategy (priority order):
|
||||
* 1. Exact: fingerprint + file + line within ±LINE_FUZZY_THRESHOLD → unchanged
|
||||
* 2. Moved: fingerprint matches but file or line drifted beyond threshold → moved
|
||||
* 3. Unmatched current findings → new
|
||||
* 4. Unmatched baseline findings → resolved
|
||||
*
|
||||
* @param {object[]} baselineFindings - From loadBaseline().findings
|
||||
* @param {object[]} currentFindings - From extractFindings()
|
||||
* @returns {object} { new, resolved, unchanged, moved, summary }
|
||||
*/
|
||||
export function diffFindings(baselineFindings, currentFindings) {
|
||||
// Index baseline findings by fingerprint for O(n) lookup
|
||||
// Multiple findings can share a fingerprint (same pattern, different locations)
|
||||
const baselineByFp = new Map();
|
||||
for (const f of baselineFindings) {
|
||||
const existing = baselineByFp.get(f.fingerprint) || [];
|
||||
existing.push({ ...f, matched: false });
|
||||
baselineByFp.set(f.fingerprint, existing);
|
||||
}
|
||||
|
||||
const results = {
|
||||
new: [],
|
||||
resolved: [],
|
||||
unchanged: [],
|
||||
moved: [],
|
||||
};
|
||||
|
||||
// Pass 1: Match current findings against baseline
|
||||
for (const current of currentFindings) {
|
||||
const candidates = baselineByFp.get(current.fingerprint);
|
||||
if (!candidates) {
|
||||
results.new.push(current);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try exact match first (same file, line within threshold)
|
||||
let matched = false;
|
||||
for (const baseline of candidates) {
|
||||
if (baseline.matched) continue;
|
||||
if (baseline.file === current.file && isLineClose(baseline.line, current.line)) {
|
||||
baseline.matched = true;
|
||||
results.unchanged.push({
|
||||
...current,
|
||||
baseline_line: baseline.line,
|
||||
});
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matched) continue;
|
||||
|
||||
// Try moved match (fingerprint matches, location differs)
|
||||
for (const baseline of candidates) {
|
||||
if (baseline.matched) continue;
|
||||
baseline.matched = true;
|
||||
results.moved.push({
|
||||
...current,
|
||||
previous_file: baseline.file,
|
||||
previous_line: baseline.line,
|
||||
});
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (matched) continue;
|
||||
|
||||
// All candidates consumed — this is new
|
||||
results.new.push(current);
|
||||
}
|
||||
|
||||
// Pass 2: Unmatched baseline findings are resolved
|
||||
for (const candidates of baselineByFp.values()) {
|
||||
for (const baseline of candidates) {
|
||||
if (!baseline.matched) {
|
||||
const { matched: _, ...finding } = baseline;
|
||||
results.resolved.push(finding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
results.summary = {
|
||||
new: results.new.length,
|
||||
resolved: results.resolved.length,
|
||||
unchanged: results.unchanged.length,
|
||||
moved: results.moved.length,
|
||||
total_current: currentFindings.length,
|
||||
total_baseline: baselineFindings.length,
|
||||
baseline_timestamp: null, // caller fills in
|
||||
};
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if two line numbers are within the fuzzy threshold.
|
||||
* Null lines always match (some findings are file-level, not line-level).
|
||||
* @param {number|null} a
|
||||
* @param {number|null} b
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function isLineClose(a, b) {
|
||||
if (a == null || b == null) return true;
|
||||
return Math.abs(a - b) <= LINE_FUZZY_THRESHOLD;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// High-level API — used by scan-orchestrator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Run a full diff cycle: load baseline, compare, return diff results.
|
||||
* @param {string} baselinesDir
|
||||
* @param {string} targetPath
|
||||
* @param {object} scanEnvelope - Current scan results
|
||||
* @returns {object|null} Diff results with summary, or null if no baseline exists
|
||||
*/
|
||||
export function diffAgainstBaseline(baselinesDir, targetPath, scanEnvelope) {
|
||||
const baseline = loadBaseline(baselinesDir, targetPath);
|
||||
if (!baseline) return null;
|
||||
|
||||
const currentFindings = extractFindings(scanEnvelope);
|
||||
const diff = diffFindings(baseline.findings, currentFindings);
|
||||
diff.summary.baseline_timestamp = baseline.meta.timestamp;
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
// distribution-stats.mjs — Statistical divergence utilities for behavioral drift detection.
|
||||
// Zero external dependencies. <50 lines.
|
||||
//
|
||||
// Jensen-Shannon divergence measures how different two probability distributions are.
|
||||
// Used by post-session-guard.mjs to detect tool distribution shifts within a session.
|
||||
//
|
||||
// OWASP: ASI01 (Excessive Agency — behavioral pattern changes may indicate hijacking)
|
||||
|
||||
/**
|
||||
* Kullback-Leibler divergence KL(P || Q).
|
||||
* @param {Map<string, number>} P
|
||||
* @param {Map<string, number>} Q
|
||||
* @returns {number}
|
||||
*/
|
||||
function klDivergence(P, Q) {
|
||||
let kl = 0;
|
||||
for (const [key, p] of P) {
|
||||
if (p === 0) continue;
|
||||
const q = Q.get(key) || 0;
|
||||
if (q === 0) return Infinity;
|
||||
kl += p * Math.log2(p / q);
|
||||
}
|
||||
return kl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Jensen-Shannon divergence. 0 = identical, 1 = fully disjoint (log2 basis).
|
||||
* Always finite, symmetric: JSD(P,Q) = JSD(Q,P).
|
||||
* @param {Map<string, number>} P - Normalized probability distribution
|
||||
* @param {Map<string, number>} Q - Normalized probability distribution
|
||||
* @returns {number}
|
||||
*/
|
||||
export function jensenShannonDivergence(P, Q) {
|
||||
const allKeys = new Set([...P.keys(), ...Q.keys()]);
|
||||
const M = new Map();
|
||||
for (const key of allKeys) {
|
||||
M.set(key, 0.5 * (P.get(key) || 0) + 0.5 * (Q.get(key) || 0));
|
||||
}
|
||||
return 0.5 * klDivergence(P, M) + 0.5 * klDivergence(Q, M);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build normalized probability distribution from category labels.
|
||||
* @param {string[]} labels
|
||||
* @returns {Map<string, number>} Values sum to 1.0 (empty input → empty map)
|
||||
*/
|
||||
export function buildDistribution(labels) {
|
||||
if (labels.length === 0) return new Map();
|
||||
const counts = new Map();
|
||||
for (const label of labels) {
|
||||
counts.set(label, (counts.get(label) || 0) + 1);
|
||||
}
|
||||
const dist = new Map();
|
||||
for (const [key, count] of counts) {
|
||||
dist.set(key, count / labels.length);
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
145
plugins/llm-security-copilot/scanners/lib/file-discovery.mjs
Normal file
145
plugins/llm-security-copilot/scanners/lib/file-discovery.mjs
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
// file-discovery.mjs — Walk directory tree, filter, binary detection
|
||||
// Zero dependencies (Node.js builtins only).
|
||||
|
||||
import { readdir, stat, readFile } from 'node:fs/promises';
|
||||
import { join, relative, extname } from 'node:path';
|
||||
|
||||
// Extensions we scan (text-based)
|
||||
const TEXT_EXTENSIONS = new Set([
|
||||
'.js', '.mjs', '.cjs', '.ts', '.mts', '.cts', '.jsx', '.tsx',
|
||||
'.py', '.pyw',
|
||||
'.json', '.jsonc', '.json5',
|
||||
'.yaml', '.yml',
|
||||
'.toml',
|
||||
'.md', '.mdx',
|
||||
'.sh', '.bash', '.zsh',
|
||||
'.env', '.env.local', '.env.example',
|
||||
'.cfg', '.ini', '.conf',
|
||||
'.xml', '.html', '.htm', '.svg',
|
||||
'.css', '.scss', '.less',
|
||||
'.sql',
|
||||
'.rs', '.go', '.java', '.kt', '.cs', '.c', '.cpp', '.h', '.hpp',
|
||||
'.rb', '.php', '.lua', '.swift', '.m',
|
||||
'.txt', '.csv', '.log',
|
||||
'.lock', // package-lock.json, yarn.lock, etc.
|
||||
'.dockerfile', '', // Dockerfile, Makefile, etc. (no extension)
|
||||
]);
|
||||
|
||||
// Directories to always skip
|
||||
const SKIP_DIRS = new Set([
|
||||
'node_modules', '.git', '.hg', '.svn',
|
||||
'__pycache__', '.pytest_cache', '.mypy_cache',
|
||||
'dist', 'build', '.next', '.nuxt',
|
||||
'.venv', 'venv', 'env',
|
||||
'coverage', '.nyc_output',
|
||||
'.angular', '.cache',
|
||||
]);
|
||||
|
||||
// Max file size to read (512KB)
|
||||
const MAX_FILE_SIZE = 512 * 1024;
|
||||
|
||||
/**
|
||||
* Discover all scannable files under a target path.
|
||||
* @param {string} targetPath - Absolute path to scan
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.maxFiles=5000] - Stop after this many files
|
||||
* @param {number} [opts.maxFileSize=524288] - Skip files larger than this
|
||||
* @returns {Promise<{ files: FileInfo[], skipped: number, truncated: boolean }>}
|
||||
*
|
||||
* @typedef {{ absPath: string, relPath: string, ext: string, size: number }} FileInfo
|
||||
*/
|
||||
export async function discoverFiles(targetPath, opts = {}) {
|
||||
const maxFiles = opts.maxFiles || 5000;
|
||||
const maxFileSize = opts.maxFileSize || MAX_FILE_SIZE;
|
||||
const files = [];
|
||||
let skipped = 0;
|
||||
let truncated = false;
|
||||
|
||||
async function walk(dir) {
|
||||
if (truncated) return;
|
||||
let entries;
|
||||
try {
|
||||
entries = await readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
skipped++;
|
||||
return;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
if (truncated) return;
|
||||
const fullPath = join(dir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) {
|
||||
// Allow .claude-plugin and .github but skip most dot dirs
|
||||
if (entry.name !== '.claude-plugin' && entry.name !== '.github' && entry.name !== '.claude') {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
await walk(fullPath);
|
||||
} else if (entry.isFile()) {
|
||||
const ext = extname(entry.name).toLowerCase();
|
||||
// Accept known text extensions or extensionless files (Dockerfile, Makefile, etc.)
|
||||
const isKnownText = TEXT_EXTENSIONS.has(ext);
|
||||
const isExtensionless = ext === '' && !entry.name.startsWith('.');
|
||||
|
||||
if (!isKnownText && !isExtensionless) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
let fileSize;
|
||||
try {
|
||||
const st = await stat(fullPath);
|
||||
if (st.size > maxFileSize) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
if (st.size === 0) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
fileSize = st.size;
|
||||
} catch {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push({
|
||||
absPath: fullPath,
|
||||
relPath: relative(targetPath, fullPath),
|
||||
ext,
|
||||
size: fileSize,
|
||||
});
|
||||
|
||||
if (files.length >= maxFiles) {
|
||||
truncated = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await walk(targetPath);
|
||||
return { files, skipped, truncated };
|
||||
}
|
||||
|
||||
/**
|
||||
* Read file content as UTF-8 string, with binary detection.
|
||||
* Returns null if file appears to be binary.
|
||||
* @param {string} absPath
|
||||
* @returns {Promise<string|null>}
|
||||
*/
|
||||
export async function readTextFile(absPath) {
|
||||
try {
|
||||
const buf = await readFile(absPath);
|
||||
// Quick binary check: look for null bytes in first 8KB
|
||||
const checkLen = Math.min(buf.length, 8192);
|
||||
for (let i = 0; i < checkLen; i++) {
|
||||
if (buf[i] === 0) return null;
|
||||
}
|
||||
return buf.toString('utf-8');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
66
plugins/llm-security-copilot/scanners/lib/fs-utils.mjs
Normal file
66
plugins/llm-security-copilot/scanners/lib/fs-utils.mjs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env node
|
||||
// fs-utils.mjs — Cross-platform file operations for /security clean
|
||||
// Usage:
|
||||
// node fs-utils.mjs backup <target> → prints backup path to stdout
|
||||
// node fs-utils.mjs restore <backup> <target> → restores backup over target
|
||||
// node fs-utils.mjs cleanup <backup> → removes backup directory
|
||||
// node fs-utils.mjs tmppath <filename> → prints cross-platform temp file path
|
||||
|
||||
import { cpSync, rmSync, renameSync, existsSync } from 'node:fs';
|
||||
import { join, basename } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
const [,, command, ...args] = process.argv;
|
||||
|
||||
switch (command) {
|
||||
case 'backup': {
|
||||
const target = args[0];
|
||||
if (!target || !existsSync(target)) {
|
||||
console.error(`backup: target does not exist: ${target}`);
|
||||
process.exit(1);
|
||||
}
|
||||
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
|
||||
const backupPath = `${target}.security-backup-${ts}`;
|
||||
cpSync(target, backupPath, { recursive: true });
|
||||
process.stdout.write(backupPath + '\n');
|
||||
break;
|
||||
}
|
||||
|
||||
case 'restore': {
|
||||
const [backup, target] = args;
|
||||
if (!backup || !existsSync(backup)) {
|
||||
console.error(`restore: backup does not exist: ${backup}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (target && existsSync(target)) {
|
||||
rmSync(target, { recursive: true, force: true });
|
||||
}
|
||||
renameSync(backup, target);
|
||||
process.stdout.write(`Restored ${backup} → ${target}\n`);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'cleanup': {
|
||||
const path = args[0];
|
||||
if (path && existsSync(path)) {
|
||||
rmSync(path, { recursive: true, force: true });
|
||||
process.stdout.write(`Removed ${path}\n`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'tmppath': {
|
||||
const base = args[0] || 'llm-security-temp.json';
|
||||
const dotIdx = base.lastIndexOf('.');
|
||||
const name = dotIdx > 0 ? base.slice(0, dotIdx) : base;
|
||||
const ext = dotIdx > 0 ? base.slice(dotIdx) : '.json';
|
||||
const unique = `${name}-${randomUUID().slice(0, 8)}${ext}`;
|
||||
process.stdout.write(join(tmpdir(), unique) + '\n');
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
console.error('Usage: node fs-utils.mjs <backup|restore|cleanup|tmppath> [args...]');
|
||||
process.exit(1);
|
||||
}
|
||||
227
plugins/llm-security-copilot/scanners/lib/git-clone.mjs
Normal file
227
plugins/llm-security-copilot/scanners/lib/git-clone.mjs
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
#!/usr/bin/env node
|
||||
// git-clone.mjs — Clone GitHub repos to temp dirs for security scanning
|
||||
// Usage:
|
||||
// node git-clone.mjs clone <url> [--branch <name>] → sandboxed shallow clone, prints tmpdir path
|
||||
// node git-clone.mjs cleanup <dir> → removes temp directory
|
||||
// node git-clone.mjs validate <url> → exits 0 if valid GitHub URL, 1 if not
|
||||
|
||||
import { mkdtempSync, rmSync, existsSync, realpathSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { spawnSync } from 'node:child_process';
|
||||
|
||||
const GITHUB_URL_RE = /^https:\/\/github\.com\/[\w.-]+\/[\w.-]+(\.git)?\/?$/;
|
||||
const GITHUB_SSH_RE = /^git@github\.com:[\w.-]+\/[\w.-]+(\.git)?$/;
|
||||
const MAX_CLONE_SIZE_MB = 100;
|
||||
|
||||
function isValidUrl(url) {
|
||||
return GITHUB_URL_RE.test(url) || GITHUB_SSH_RE.test(url);
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = { branch: null, positional: [] };
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
if (argv[i] === '--branch' && i + 1 < argv.length) {
|
||||
args.branch = argv[++i];
|
||||
} else {
|
||||
args.positional.push(argv[i]);
|
||||
}
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
/** Git config flags that neutralize known attack vectors */
|
||||
const GIT_SANDBOX_CONFIG = [
|
||||
'-c', 'core.hooksPath=/dev/null',
|
||||
'-c', 'core.symlinks=false',
|
||||
'-c', 'core.fsmonitor=false',
|
||||
'-c', 'filter.lfs.process=',
|
||||
'-c', 'filter.lfs.smudge=',
|
||||
'-c', 'filter.lfs.clean=',
|
||||
'-c', 'protocol.file.allow=never',
|
||||
'-c', 'transfer.fsckObjects=true',
|
||||
];
|
||||
|
||||
/** Environment that isolates git from system/user config */
|
||||
const GIT_SANDBOX_ENV = {
|
||||
...process.env,
|
||||
GIT_CONFIG_NOSYSTEM: '1',
|
||||
GIT_CONFIG_GLOBAL: '/dev/null',
|
||||
GIT_ATTR_NOSYSTEM: '1',
|
||||
GIT_TERMINAL_PROMPT: '0',
|
||||
};
|
||||
|
||||
/**
|
||||
* Build sandbox-exec profile restricting file writes to a single directory.
|
||||
* macOS only — returns null on other platforms.
|
||||
*/
|
||||
function buildSandboxProfile(allowedWritePath) {
|
||||
if (process.platform !== 'darwin') return null;
|
||||
const check = spawnSync('which', ['sandbox-exec'], { encoding: 'utf8' });
|
||||
if (check.status !== 0) return null;
|
||||
|
||||
const realPath = realpathSync(allowedWritePath);
|
||||
return [
|
||||
'(version 1)',
|
||||
'(allow default)',
|
||||
'(deny file-write*)',
|
||||
`(allow file-write* (subpath "${realPath}"))`,
|
||||
'(allow file-write* (literal "/dev/null"))',
|
||||
'(allow file-write* (literal "/dev/tty"))',
|
||||
].join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Build bwrap args restricting writes to a single directory.
|
||||
* Linux only — returns null if bwrap is not installed or fails.
|
||||
*/
|
||||
function buildBwrapArgs(allowedWritePath, innerArgs) {
|
||||
if (process.platform !== 'linux') return null;
|
||||
const check = spawnSync('which', ['bwrap'], { encoding: 'utf8' });
|
||||
if (check.status !== 0) return null;
|
||||
|
||||
// Test that bwrap actually works (fails on Ubuntu 24.04+ without admin config)
|
||||
const probe = spawnSync('bwrap', ['--ro-bind', '/', '/', '--dev', '/dev', '/bin/true'], {
|
||||
stdio: 'ignore', timeout: 5000,
|
||||
});
|
||||
if (probe.status !== 0) return null;
|
||||
|
||||
return [
|
||||
'--ro-bind', '/', '/', // read-only root
|
||||
'--bind', allowedWritePath, allowedWritePath, // writable clone dir
|
||||
'--dev', '/dev', // /dev/null etc.
|
||||
'--unshare-all', // isolate namespaces
|
||||
'--new-session', // prevent tty hijack
|
||||
'--die-with-parent', // cleanup on parent exit
|
||||
...innerArgs,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the full sandboxed command + args for the current platform.
|
||||
* Returns { cmd, args } — either wrapped in sandbox or plain git.
|
||||
*/
|
||||
function buildSandboxedClone(tmpDir, gitArgs) {
|
||||
const innerGitArgs = [...GIT_SANDBOX_CONFIG, ...gitArgs];
|
||||
|
||||
// macOS: sandbox-exec
|
||||
const profile = buildSandboxProfile(tmpDir);
|
||||
if (profile) {
|
||||
return { cmd: 'sandbox-exec', args: ['-p', profile, 'git', ...innerGitArgs], sandbox: 'sandbox-exec' };
|
||||
}
|
||||
|
||||
// Linux: bwrap
|
||||
const bwrapArgs = buildBwrapArgs(tmpDir, ['git', ...innerGitArgs]);
|
||||
if (bwrapArgs) {
|
||||
return { cmd: 'bwrap', args: bwrapArgs, sandbox: 'bwrap' };
|
||||
}
|
||||
|
||||
// Fallback: git with config flags only
|
||||
return { cmd: 'git', args: innerGitArgs, sandbox: null };
|
||||
}
|
||||
|
||||
// Export for testing
|
||||
export {
|
||||
GIT_SANDBOX_CONFIG, GIT_SANDBOX_ENV, buildSandboxProfile, buildBwrapArgs,
|
||||
buildSandboxedClone, MAX_CLONE_SIZE_MB,
|
||||
};
|
||||
|
||||
// CLI entry point — only run when invoked directly
|
||||
import { fileURLToPath } from 'node:url';
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const isDirectRun = process.argv[1] === __filename;
|
||||
|
||||
if (isDirectRun) {
|
||||
|
||||
const [,, command, ...rest] = process.argv;
|
||||
|
||||
switch (command) {
|
||||
case 'clone': {
|
||||
const { branch, positional } = parseArgs(rest);
|
||||
const url = positional[0];
|
||||
|
||||
if (!url) {
|
||||
console.error('clone: URL required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!isValidUrl(url)) {
|
||||
console.error(`clone: invalid GitHub URL: ${url}`);
|
||||
console.error('Supported: https://github.com/user/repo or git@github.com:user/repo.git');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), 'llm-sec-'));
|
||||
const gitArgs = ['clone', '--depth', '1'];
|
||||
if (branch) gitArgs.push('--branch', branch);
|
||||
gitArgs.push(url, tmpDir);
|
||||
|
||||
// Build sandboxed clone command (macOS: sandbox-exec, Linux: bwrap, fallback: git only)
|
||||
const { cmd: cloneCmd, args: cloneArgs, sandbox } = buildSandboxedClone(tmpDir, gitArgs);
|
||||
|
||||
if (!sandbox) {
|
||||
console.error('clone: WARN: no OS sandbox available, running with git config hardening only');
|
||||
}
|
||||
|
||||
const result = spawnSync(cloneCmd, cloneArgs, {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 60_000,
|
||||
env: GIT_SANDBOX_ENV,
|
||||
});
|
||||
|
||||
if (result.status !== 0) {
|
||||
// Clean up on failure
|
||||
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
const stderr = result.stderr?.toString().trim() || 'unknown error';
|
||||
console.error(`clone: git clone failed: ${stderr}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Post-clone size check
|
||||
const duResult = spawnSync('du', ['-sm', tmpDir], { encoding: 'utf8' });
|
||||
if (duResult.status === 0) {
|
||||
const sizeMb = parseInt(duResult.stdout.split('\t')[0], 10);
|
||||
if (sizeMb > MAX_CLONE_SIZE_MB) {
|
||||
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
console.error(`clone: repo too large (${sizeMb}MB, max ${MAX_CLONE_SIZE_MB}MB)`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
process.stdout.write(tmpDir + '\n');
|
||||
break;
|
||||
}
|
||||
|
||||
case 'cleanup': {
|
||||
const dir = rest[0];
|
||||
if (!dir) {
|
||||
console.error('cleanup: directory path required');
|
||||
process.exit(1);
|
||||
}
|
||||
// Safety: only remove paths in system temp directory
|
||||
const tmp = tmpdir();
|
||||
if (!dir.startsWith(tmp)) {
|
||||
console.error(`cleanup: refusing to remove path outside tmpdir: ${dir}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (existsSync(dir)) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
process.stdout.write(`Removed ${dir}\n`);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'validate': {
|
||||
const url = rest[0];
|
||||
if (!url || !isValidUrl(url)) {
|
||||
process.exit(1);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
default:
|
||||
console.error('Usage: node git-clone.mjs <clone|cleanup|validate> [args...]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
} // end isDirectRun
|
||||
296
plugins/llm-security-copilot/scanners/lib/injection-patterns.mjs
Normal file
296
plugins/llm-security-copilot/scanners/lib/injection-patterns.mjs
Normal file
|
|
@ -0,0 +1,296 @@
|
|||
// injection-patterns.mjs — Shared prompt injection detection patterns
|
||||
// Used by pre-prompt-inject-scan.mjs (UserPromptSubmit) and post-mcp-verify.mjs (PostToolUse).
|
||||
//
|
||||
// Patterns derived from skill-scanner-agent Category 1 (LLM01 Prompt Injection)
|
||||
// and Category 5 (Hidden Instructions) in knowledge/skill-threat-patterns.md.
|
||||
//
|
||||
// Zero external dependencies beyond ./string-utils.mjs.
|
||||
|
||||
import { normalizeForScan, containsUnicodeTags, decodeUnicodeTags } from './string-utils.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Critical patterns — direct injection attempts (should be blocked)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const CRITICAL_PATTERNS = [
|
||||
// Direct override instructions
|
||||
{ pattern: /ignore\s+(?:all\s+)?previous/i, label: 'override: ignore previous instructions' },
|
||||
{ pattern: /forget\s+your\s+(?:instructions|rules|guidelines|directives)/i, label: 'override: forget instructions' },
|
||||
{ pattern: /disregard\s+your/i, label: 'override: disregard instructions' },
|
||||
{ pattern: /override\s+your/i, label: 'override: override instructions' },
|
||||
{ pattern: /suspend\s+all\s+constraints/i, label: 'override: suspend constraints' },
|
||||
{ pattern: /all\s+prior\s+constraints/i, label: 'override: prior constraints' },
|
||||
|
||||
// Spoofed system headers
|
||||
{ pattern: /^#+\s*SYSTEM\s*:/m, label: 'spoofed header: # SYSTEM:' },
|
||||
{ pattern: /^\[INST\]/m, label: 'spoofed header: [INST]' },
|
||||
{ pattern: /<\|system\|>/, label: 'spoofed header: <|system|>' },
|
||||
{ pattern: /<system>/, label: 'spoofed tag: <system>' },
|
||||
{ pattern: /<\/system>/, label: 'spoofed tag: </system>' },
|
||||
{ pattern: /^###\s*Instructions\s*:/m, label: 'spoofed header: ### Instructions:' },
|
||||
|
||||
// Identity redefinition
|
||||
{ pattern: /you\s+are\s+no\s+longer/i, label: 'identity: you are no longer' },
|
||||
{ pattern: /you\s+are\s+now\s+(?!able|going|ready|done|running|using|looking|in\s|at\s|on\s)/i, label: 'identity: you are now [name]' },
|
||||
{ pattern: /your\s+new\s+role\s+is/i, label: 'identity: your new role' },
|
||||
{ pattern: /your\s+new\s+primary\s+directive/i, label: 'identity: new primary directive' },
|
||||
{ pattern: /pretend\s+you\s+are/i, label: 'identity: pretend you are' },
|
||||
|
||||
// Configuration manipulation
|
||||
{ pattern: /ignore\s+your\s+CLAUDE\.md/i, label: 'config: ignore CLAUDE.md' },
|
||||
{ pattern: /override\s+your\s+system\s+prompt/i, label: 'config: override system prompt' },
|
||||
{ pattern: /bypass\s+your\s+safety/i, label: 'config: bypass safety' },
|
||||
{ pattern: /disable\s+output\s+filtering/i, label: 'config: disable output filtering' },
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Medium patterns — obfuscation and indirect manipulation (advisory)
|
||||
// Lower confidence than High; flag for manual review.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const MEDIUM_PATTERNS = [
|
||||
// Leetspeak variants of critical keywords
|
||||
{ pattern: /[1!|l][gq9]n[o0]r[e3]\s+(?:all\s+)?pr[e3]v[i1!|l][o0]us/i, label: 'leetspeak: ignore previous (obfuscated)' },
|
||||
{ pattern: /f[o0]rg[e3]t\s+y[o0]ur\s+[i1!|l]nstruct[i1!|l][o0]ns/i, label: 'leetspeak: forget instructions (obfuscated)' },
|
||||
{ pattern: /d[i1!|l]sr[e3]g[a4@]rd\s+y[o0]ur/i, label: 'leetspeak: disregard your (obfuscated)' },
|
||||
{ pattern: /[o0]v[e3]rr[i1!|l]d[e3]\s+y[o0]ur/i, label: 'leetspeak: override your (obfuscated)' },
|
||||
|
||||
// Homoglyph detection — Cyrillic chars in Latin context
|
||||
{ pattern: /[a-zA-Z][\u0430\u0435\u043E\u0440\u0441\u0456\u0443]|[\u0430\u0435\u043E\u0440\u0441\u0456\u0443][a-zA-Z]/, label: 'homoglyph: Cyrillic-Latin mixing in adjacent characters' },
|
||||
|
||||
// Zero-width characters inside words (keyword splitting evasion)
|
||||
{ pattern: /\w[\u200B\u200C\u200D\uFEFF]\w/, label: 'unicode: zero-width character inside word (keyword splitting)' },
|
||||
|
||||
// Indirect AI-directed instructions
|
||||
{ pattern: /(?:note|message|instruction)\s+(?:to|for)\s+(?:the\s+)?(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: instruction addressed to AI/assistant' },
|
||||
{ pattern: /(?:dear|attention)\s+(?:AI|assistant|model|LLM|Claude)\b/i, label: 'indirect: direct address to AI/assistant' },
|
||||
{ pattern: /when\s+(?:you|the\s+AI|the\s+assistant|Claude)\s+(?:read|see|encounter|process)\s+this/i, label: 'indirect: trigger-based instruction for AI' },
|
||||
|
||||
// Multi-language injection variants
|
||||
{ pattern: /ignor(?:ez?|er?)\s+(?:les?\s+)?instructions?\s+pr[e\u00e9]c[e\u00e9]dentes?/i, label: 'multi-lang: French "ignore previous instructions"' },
|
||||
{ pattern: /ignor(?:ar?|e)\s+(?:las?\s+)?instrucciones?\s+anteriores?/i, label: 'multi-lang: Spanish "ignore previous instructions"' },
|
||||
{ pattern: /ignorier(?:e|en)?\s+(?:alle\s+)?vorherigen?\s+(?:Anweisungen|Instruktionen)/i, label: 'multi-lang: German "ignore previous instructions"' },
|
||||
|
||||
// Markdown link-reference comment injection
|
||||
{ pattern: /\[\/\/\]:\s*#\s*\(.*(?:ignore|override|system|instruction|execute)/i, label: 'markdown: suspicious instruction in link-reference comment' },
|
||||
|
||||
// Data URI with executable content types
|
||||
{ pattern: /data:(?:text\/html|application\/javascript|text\/javascript)[;,]/i, label: 'data-uri: executable content type' },
|
||||
|
||||
// --- Content Injection: Syntactic Masking (AI Agent Traps) ---
|
||||
{ pattern: /\[[^\]]*(?:system|ignore|override|exfiltrate|execute)[^\]]*\]\([^)]+\)/i, label: 'markdown: injection payload in link anchor text' },
|
||||
|
||||
// --- Sub-agent spawning traps (DeepMind kat. 4, v5.0 S4) ---
|
||||
{ pattern: /(?:create|spawn|launch|start|run)\s+(?:an?\s+)?(?:new\s+)?(?:sub-?agent|agent|task|worker)\s+(?:that|to|which|with)\s+(?:.*?\s+)?(?:execute|run|delete|remove|send|post|exfiltrate|access|reads?\s+(?:.*?\s+)?(?:secret|credential|key|token|\.env|\.ssh))/i, label: 'sub-agent: spawn instruction with dangerous capability keywords' },
|
||||
{ pattern: /(?:delegate|dispatch)\s+(?:to\s+)?(?:an?\s+)?(?:new\s+)?(?:agent|sub-?agent|task)\s+.*?(?:bypass|override|ignore|without\s+(?:review|confirmation|approval))/i, label: 'sub-agent: delegation with safety bypass instruction' },
|
||||
|
||||
// --- Natural Language Indirection (Preamble, CaMeL — v5.0 S4) ---
|
||||
{ pattern: /fetch\s+(?:this|the|that)\s+(?:URL|link|endpoint)\s+and\s+(?:execute|run|eval)/i, label: 'nl-indirection: fetch URL and execute' },
|
||||
{ pattern: /send\s+(?:this|the|that|all)\s+(?:data|content|output|result|information|file)\s+to\s+(?:https?:\/\/|ftp:\/\/|\S+@\S+)/i, label: 'nl-indirection: send data to external destination' },
|
||||
{ pattern: /read\s+(?:the\s+)?(?:contents?\s+of\s+)?(?:~\/)?\.(?:ssh|aws|gnupg|env)\b/i, label: 'nl-indirection: read sensitive dotfile path' },
|
||||
{ pattern: /(?:extract|copy|dump|exfiltrate)\s+(?:all\s+)?(?:the\s+)?(?:credentials?|secrets?|tokens?|keys?|passwords?)\s+(?:and|then)\s+(?:send|post|upload|transmit)/i, label: 'nl-indirection: extract credentials and exfiltrate' },
|
||||
{ pattern: /(?:download|fetch|retrieve)\s+(?:from\s+)?\S+\s+(?:and|then)\s+(?:execute|run|eval|pipe\s+to\s+(?:bash|sh|node|python))/i, label: 'nl-indirection: download and execute' },
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// High patterns — subtle manipulation (advisory warning, not block)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const HIGH_PATTERNS = [
|
||||
{ pattern: /always\s+prioritize\s+user\s+requests?\s+over/i, label: 'normalization: prioritize user over rules' },
|
||||
{ pattern: /user\s+intent\s+takes?\s+precedence\s+over/i, label: 'normalization: user intent precedence' },
|
||||
{ pattern: /if\s+CLAUDE\.md\s+conflicts/i, label: 'normalization: CLAUDE.md conflict override' },
|
||||
{ pattern: /skip\s+confirmation\s+for/i, label: 'normalization: skip confirmation' },
|
||||
{ pattern: /your\s+actual\s+task\s+is/i, label: 'reframing: your actual task' },
|
||||
{ pattern: /your\s+real\s+purpose\s+is/i, label: 'reframing: your real purpose' },
|
||||
{ pattern: /what\s+you\s+should\s+actually\s+do/i, label: 'reframing: what you should actually do' },
|
||||
|
||||
// HTML comment injection directed at agents
|
||||
{ pattern: /<!--\s*(?:AGENT|AI|HIDDEN|ACTUAL\s+TASK|REAL\s+INSTRUCTION)\s*:/i, label: 'hidden comment: agent-directed HTML comment' },
|
||||
|
||||
// --- Content Injection: CSS/HTML obfuscation (AI Agent Traps) ---
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*display\s*:\s*none[^"]*"[^>]*>/i, label: 'html-obfuscation: display:none element with content' },
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*visibility\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: visibility:hidden element' },
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*position\s*:\s*absolute[^"]*-\d{3,}px[^"]*"[^>]*>/i, label: 'html-obfuscation: off-screen positioned element' },
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*font-size\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero font-size element' },
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*opacity\s*:\s*0[^"]*"[^>]*>/i, label: 'html-obfuscation: zero opacity element' },
|
||||
{ pattern: /<[^>]+style\s*=\s*"[^"]*(?:height|width)\s*:\s*0[^"]*overflow\s*:\s*hidden[^"]*"[^>]*>/i, label: 'html-obfuscation: zero-size overflow-hidden element' },
|
||||
{ pattern: /aria-label\s*=\s*"[^"]*(?:ignore|override|system|instruction|execute|exfiltrate)[^"]*"/i, label: 'html-obfuscation: injection in aria-label attribute' },
|
||||
|
||||
// --- Semantic Manipulation: Oversight & Critic Evasion (AI Agent Traps) ---
|
||||
{ pattern: /for\s+educational\s+purposes?\s+only/i, label: 'evasion: educational purpose framing' },
|
||||
{ pattern: /(?:this\s+is\s+)?a\s+hypothetical\s+scenario/i, label: 'evasion: hypothetical scenario framing' },
|
||||
{ pattern: /in\s+a\s+fictional\s+(?:setting|scenario|world)\s+where/i, label: 'evasion: fictional setting framing' },
|
||||
{ pattern: /(?:this\s+is\s+)?a\s+red[- ]?team(?:ing)?\s+(?:exercise|simulation|test)/i, label: 'evasion: red-team exercise framing' },
|
||||
{ pattern: /security\s+audit\s+simulation/i, label: 'evasion: security audit simulation framing' },
|
||||
{ pattern: /pretend\s+(?:this\s+is|we\s+are\s+in)\s+a\s+(?:test|lab|sandbox|simulation)/i, label: 'evasion: test/lab/sandbox pretense' },
|
||||
{ pattern: /as\s+a\s+thought\s+experiment/i, label: 'evasion: thought experiment framing' },
|
||||
{ pattern: /for\s+(?:research|academic|scientific)\s+purposes/i, label: 'evasion: research purpose framing' },
|
||||
{ pattern: /in\s+an?\s+(?:academic|research|educational)\s+context/i, label: 'evasion: academic context framing' },
|
||||
|
||||
// --- Human-in-the-Loop traps (DeepMind kat. 6, v5.0 S4) ---
|
||||
// Approval urgency: pressure to approve without review
|
||||
{ pattern: /(?:approve|accept|confirm)\s+(?:this\s+)?immediately/i, label: 'hitl-trap: approval urgency — approve immediately' },
|
||||
{ pattern: /no\s+need\s+to\s+review/i, label: 'hitl-trap: approval urgency — no need to review' },
|
||||
{ pattern: /(?:just|simply)\s+(?:approve|accept|confirm)\s+(?:it|this|everything)/i, label: 'hitl-trap: approval urgency — just approve' },
|
||||
{ pattern: /(?:don'?t|do\s+not)\s+(?:bother\s+)?(?:check|review|verify|inspect|read)(?:ing)?\s+(?:this|the|it)/i, label: 'hitl-trap: approval urgency — suppress review' },
|
||||
// Summary suppression: hiding actions behind brevity claims
|
||||
{ pattern: /for\s+brevity[\s,]+(?:skipping|omitting|leaving\s+out)/i, label: 'hitl-trap: summary suppression — for brevity, skipping' },
|
||||
{ pattern: /(?:too\s+(?:long|verbose|detailed)\s+to\s+(?:show|display|list|include))/i, label: 'hitl-trap: summary suppression — too long to show' },
|
||||
{ pattern: /(?:details?\s+(?:omitted|redacted|hidden|suppressed))/i, label: 'hitl-trap: summary suppression — details omitted' },
|
||||
// Scope minimization: downplaying destructive changes
|
||||
{ pattern: /(?:minor|trivial|small|cosmetic)\s+(?:change|update|fix|adjustment)\b[^.]{0,80}(?:delet|remov|drop|truncat|reset|destroy|\bformat\b|wipe)/i, label: 'hitl-trap: scope minimization — minor change + destructive action' },
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hybrid attack patterns — cross-domain injection (HIGH, v5.0 S6)
|
||||
// Preamble 2.0: P2SQL, recursive injection, XSS in agent context.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const HYBRID_PATTERNS = [
|
||||
// P2SQL: SQL keywords in injection text targeting tool operations
|
||||
{ pattern: /(?:ignore|override|disregard|forget)[^.]{0,60}(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM|INSERT\s+INTO|UPDATE\s+\w+\s+SET)(?:\b|(?=\s|$))/i, label: 'hybrid-p2sql: injection + SQL keywords (prompt-to-SQL attack)' },
|
||||
{ pattern: /(?:SELECT\s+\*|DROP\s+TABLE|UNION\s+SELECT|DELETE\s+FROM)\s[^;]{0,80}(?:ignore|override|disregard|bypass)/i, label: 'hybrid-p2sql: SQL operation + injection override keywords' },
|
||||
|
||||
// Recursive injection: text that instructs the model to inject into its own output
|
||||
{ pattern: /(?:inject|insert|embed|include)\s+(?:this|the\s+following)\s+(?:into|in)\s+(?:your|the)\s+(?:output|response|reply|message|prompt|context)/i, label: 'hybrid-recursive: instruction to inject into model output' },
|
||||
{ pattern: /(?:when|if)\s+(?:the\s+)?(?:user|human|operator)\s+(?:asks?|requests?|queries)[^.]{0,60}(?:respond\s+with|output|reply\s+with|include)\s+(?:this|the\s+following)/i, label: 'hybrid-recursive: conditional response injection (recursive payload)' },
|
||||
|
||||
// XSS in agent context: script/event handlers in content for markdown rendering
|
||||
{ pattern: /<script\b[^>]*>[\s\S]*?<\/script>/i, label: 'hybrid-xss: <script> tag in content (agent context XSS)' },
|
||||
{ pattern: /javascript\s*:/i, label: 'hybrid-xss: javascript: URI scheme (agent context XSS)' },
|
||||
{ pattern: /\bon(?:error|load|click|mouseover|focus|blur)\s*=/i, label: 'hybrid-xss: inline event handler attribute (agent context XSS)' },
|
||||
{ pattern: /<iframe\b[^>]*src\s*=\s*["'][^"']*(?:javascript:|data:text\/html)/i, label: 'hybrid-xss: iframe with executable src (agent context XSS)' },
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HITL cognitive load patterns (MEDIUM, v5.0 S4)
|
||||
// Injection buried after 2000+ characters in verbose output.
|
||||
// Checked separately due to length-dependent logic.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check for cognitive load HITL trap: injection payload buried deep in verbose output.
|
||||
* Only flags if the injection appears after the first 2000 characters.
|
||||
* @param {string} text
|
||||
* @returns {{ found: boolean, label: string|null }}
|
||||
*/
|
||||
export function checkCognitiveLoadTrap(text) {
|
||||
if (text.length < 2500) return { found: false, label: null };
|
||||
|
||||
const tail = text.slice(2000);
|
||||
for (const { pattern, label } of CRITICAL_PATTERNS) {
|
||||
if (pattern.test(tail)) {
|
||||
return {
|
||||
found: true,
|
||||
label: `hitl-trap: cognitive load — injection buried after 2000+ chars (${label})`,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { found: false, label: null };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scan text for prompt injection patterns.
|
||||
* Checks both raw text and normalized (decoded) text to catch obfuscated injections.
|
||||
* Also checks for Unicode Tag steganography (DeepMind traps kat. 1):
|
||||
* - CRITICAL if decoded tags contain injection patterns
|
||||
* - HIGH if Unicode Tags are present at all (suspicious regardless of content)
|
||||
*
|
||||
* @param {string} text - the text to scan
|
||||
* @returns {{ critical: string[], high: string[], medium: string[], found: boolean, severity: string|null, patterns: Array<{label: string, severity: string}> }}
|
||||
* Arrays of human-readable finding labels per tier, plus convenience fields.
|
||||
*/
|
||||
export function scanForInjection(text) {
|
||||
const normalized = normalizeForScan(text);
|
||||
const isDifferent = normalized !== text;
|
||||
|
||||
const critical = [];
|
||||
const high = [];
|
||||
const medium = [];
|
||||
|
||||
// Deduplicate by label (same pattern may match in both raw and normalized)
|
||||
const seenLabels = new Set();
|
||||
|
||||
const variants = isDifferent ? [text, normalized] : [text];
|
||||
|
||||
for (const variant of variants) {
|
||||
for (const { pattern, label } of CRITICAL_PATTERNS) {
|
||||
if (seenLabels.has(label)) continue;
|
||||
if (pattern.test(variant)) {
|
||||
seenLabels.add(label);
|
||||
critical.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
for (const { pattern, label } of HIGH_PATTERNS) {
|
||||
if (seenLabels.has(label)) continue;
|
||||
if (pattern.test(variant)) {
|
||||
seenLabels.add(label);
|
||||
high.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
// Hybrid patterns are HIGH severity (v5.0 S6)
|
||||
for (const { pattern, label } of HYBRID_PATTERNS) {
|
||||
if (seenLabels.has(label)) continue;
|
||||
if (pattern.test(variant)) {
|
||||
seenLabels.add(label);
|
||||
high.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
for (const { pattern, label } of MEDIUM_PATTERNS) {
|
||||
if (seenLabels.has(label)) continue;
|
||||
if (pattern.test(variant)) {
|
||||
seenLabels.add(label);
|
||||
medium.push(label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unicode Tag steganography check (DeepMind traps kat. 1)
|
||||
// ---------------------------------------------------------------------------
|
||||
if (containsUnicodeTags(text)) {
|
||||
const tagLabel = 'unicode-tags: invisible Unicode Tag characters detected (U+E0000 block steganography)';
|
||||
if (!seenLabels.has(tagLabel)) {
|
||||
seenLabels.add(tagLabel);
|
||||
high.push(tagLabel);
|
||||
}
|
||||
|
||||
const decodedTags = decodeUnicodeTags(text);
|
||||
for (const { pattern, label } of CRITICAL_PATTERNS) {
|
||||
const escalatedLabel = `unicode-tags+${label}`;
|
||||
if (seenLabels.has(escalatedLabel)) continue;
|
||||
if (pattern.test(decodedTags) && !pattern.test(text)) {
|
||||
seenLabels.add(escalatedLabel);
|
||||
critical.push(`${label} (hidden via Unicode Tag steganography)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HITL cognitive load check (v5.0 S4)
|
||||
// ---------------------------------------------------------------------------
|
||||
const cogLoad = checkCognitiveLoadTrap(text);
|
||||
if (cogLoad.found && !seenLabels.has(cogLoad.label)) {
|
||||
seenLabels.add(cogLoad.label);
|
||||
medium.push(cogLoad.label);
|
||||
}
|
||||
|
||||
// Convenience fields
|
||||
const found = critical.length > 0 || high.length > 0 || medium.length > 0;
|
||||
const severity = critical.length > 0 ? 'critical' : high.length > 0 ? 'high' : medium.length > 0 ? 'medium' : null;
|
||||
const patterns = [
|
||||
...critical.map(label => ({ label, severity: 'critical' })),
|
||||
...high.map(label => ({ label, severity: 'high' })),
|
||||
...medium.map(label => ({ label, severity: 'medium' })),
|
||||
];
|
||||
|
||||
return { critical, high, medium, found, severity, patterns };
|
||||
}
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
// mcp-description-cache.mjs — Cache MCP tool descriptions and detect drift.
|
||||
// Zero external dependencies.
|
||||
//
|
||||
// Purpose:
|
||||
// MCP servers can change tool descriptions between sessions (rug-pull, MCP05).
|
||||
// This module caches the first-seen description for each tool and alerts when
|
||||
// a subsequent invocation delivers a description that has drifted significantly
|
||||
// (Levenshtein distance > 10% of original length).
|
||||
//
|
||||
// Storage: ~/.cache/llm-security/mcp-descriptions.json
|
||||
// TTL: 7 days per entry (stale entries purged on load).
|
||||
//
|
||||
// OWASP: MCP05 (Tool Description Manipulation / Rug Pull)
|
||||
|
||||
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
import { levenshtein } from './string-utils.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CACHE_DIR = join(homedir(), '.cache', 'llm-security');
|
||||
const CACHE_FILE = join(CACHE_DIR, 'mcp-descriptions.json');
|
||||
const TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
|
||||
const DRIFT_THRESHOLD = 0.10; // 10% Levenshtein distance relative to original length
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cache structure
|
||||
// ---------------------------------------------------------------------------
|
||||
// {
|
||||
// "mcp__server__tool": {
|
||||
// "description": "original description text",
|
||||
// "firstSeen": 1712345678000,
|
||||
// "lastSeen": 1712345678000,
|
||||
// "hash": "sha256-prefix (optional, for quick equality check)"
|
||||
// }
|
||||
// }
|
||||
|
||||
/**
|
||||
* Load the cache from disk. Purges stale entries (older than TTL).
|
||||
* Returns empty object if file doesn't exist or is corrupt.
|
||||
* @param {object} [opts] - Options for testing
|
||||
* @param {string} [opts.cacheFile] - Override cache file path
|
||||
* @param {number} [opts.now] - Override current time
|
||||
* @returns {Record<string, { description: string, firstSeen: number, lastSeen: number }>}
|
||||
*/
|
||||
export function loadCache(opts = {}) {
|
||||
const cacheFile = opts.cacheFile ?? CACHE_FILE;
|
||||
const now = opts.now ?? Date.now();
|
||||
|
||||
if (!existsSync(cacheFile)) return {};
|
||||
|
||||
try {
|
||||
const raw = readFileSync(cacheFile, 'utf-8');
|
||||
const data = JSON.parse(raw);
|
||||
if (!data || typeof data !== 'object') return {};
|
||||
|
||||
// Purge stale entries
|
||||
const cleaned = {};
|
||||
for (const [key, entry] of Object.entries(data)) {
|
||||
if (entry && typeof entry === 'object' && typeof entry.lastSeen === 'number') {
|
||||
if (now - entry.lastSeen <= TTL_MS) {
|
||||
cleaned[key] = entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
return cleaned;
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the cache to disk. Creates the cache directory if needed.
|
||||
* @param {Record<string, object>} cache
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.cacheFile] - Override cache file path
|
||||
*/
|
||||
export function saveCache(cache, opts = {}) {
|
||||
const cacheFile = opts.cacheFile ?? CACHE_FILE;
|
||||
const dir = dirname(cacheFile);
|
||||
|
||||
try {
|
||||
if (!existsSync(dir)) {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
writeFileSync(cacheFile, JSON.stringify(cache, null, 2), 'utf-8');
|
||||
} catch {
|
||||
// Silently fail — drift detection is advisory, not critical
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check a tool description against the cached version.
|
||||
*
|
||||
* First call for a tool: caches the description, returns no drift.
|
||||
* Subsequent calls: compares via Levenshtein distance.
|
||||
*
|
||||
* @param {string} toolName - Full tool name (e.g. "mcp__tavily__tavily_search")
|
||||
* @param {string} description - Current tool description
|
||||
* @param {object} [opts] - Options for testing
|
||||
* @param {string} [opts.cacheFile] - Override cache file path
|
||||
* @param {number} [opts.now] - Override current time
|
||||
* @returns {{ drift: boolean, detail: string|null, distance: number, threshold: number, cached: string|null }}
|
||||
*/
|
||||
export function checkDescriptionDrift(toolName, description, opts = {}) {
|
||||
const now = opts.now ?? Date.now();
|
||||
const noDrift = { drift: false, detail: null, distance: 0, threshold: 0, cached: null };
|
||||
|
||||
if (!toolName || !description || typeof description !== 'string') {
|
||||
return noDrift;
|
||||
}
|
||||
|
||||
const cache = loadCache(opts);
|
||||
const existing = cache[toolName];
|
||||
|
||||
if (!existing) {
|
||||
// First time seeing this tool — cache it
|
||||
cache[toolName] = {
|
||||
description,
|
||||
firstSeen: now,
|
||||
lastSeen: now,
|
||||
};
|
||||
saveCache(cache, opts);
|
||||
return noDrift;
|
||||
}
|
||||
|
||||
// Update lastSeen
|
||||
existing.lastSeen = now;
|
||||
|
||||
// Quick equality check
|
||||
if (existing.description === description) {
|
||||
saveCache(cache, opts);
|
||||
return noDrift;
|
||||
}
|
||||
|
||||
// Compute Levenshtein distance
|
||||
const dist = levenshtein(existing.description, description);
|
||||
const baseLen = Math.max(existing.description.length, 1);
|
||||
const ratio = dist / baseLen;
|
||||
const threshold = DRIFT_THRESHOLD;
|
||||
|
||||
if (ratio > threshold) {
|
||||
// Drift detected — update cache to new description (the description has changed)
|
||||
const cachedDesc = existing.description;
|
||||
existing.description = description;
|
||||
saveCache(cache, opts);
|
||||
|
||||
const pct = Math.round(ratio * 100);
|
||||
return {
|
||||
drift: true,
|
||||
detail: `Tool "${toolName}" description changed by ${pct}% (${dist} edits / ${baseLen} chars). ` +
|
||||
`Threshold: ${Math.round(threshold * 100)}%. This may indicate a rug-pull attack (OWASP MCP05).`,
|
||||
distance: dist,
|
||||
threshold,
|
||||
cached: cachedDesc,
|
||||
};
|
||||
}
|
||||
|
||||
// Minor change below threshold — update cache silently
|
||||
existing.description = description;
|
||||
saveCache(cache, opts);
|
||||
return { drift: false, detail: null, distance: dist, threshold, cached: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract MCP server name from a tool name.
|
||||
* Convention: mcp__<server>__<tool>
|
||||
* @param {string} toolName
|
||||
* @returns {string|null}
|
||||
*/
|
||||
export function extractMcpServer(toolName) {
|
||||
if (!toolName?.startsWith('mcp__')) return null;
|
||||
const parts = toolName.split('__');
|
||||
// mcp__server__tool → parts = ['mcp', 'server', 'tool']
|
||||
return parts.length >= 3 ? parts[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the entire cache (for testing).
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.cacheFile] - Override cache file path
|
||||
*/
|
||||
export function clearCache(opts = {}) {
|
||||
saveCache({}, opts);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Exported constants (for testing)
|
||||
// ---------------------------------------------------------------------------
|
||||
export { TTL_MS, DRIFT_THRESHOLD, CACHE_DIR, CACHE_FILE };
|
||||
177
plugins/llm-security-copilot/scanners/lib/output.mjs
Normal file
177
plugins/llm-security-copilot/scanners/lib/output.mjs
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
// output.mjs — Finding and result builders, JSON envelope
|
||||
// Zero dependencies (uses severity.mjs).
|
||||
|
||||
import { riskScore, verdict, riskBand, owaspCategorize } from './severity.mjs';
|
||||
|
||||
let findingCounter = 0;
|
||||
|
||||
/**
|
||||
* Reset the global finding counter.
|
||||
* Called between scanner runs in the orchestrator and before each test.
|
||||
*/
|
||||
export function resetCounter() {
|
||||
findingCounter = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a finding object.
|
||||
* @param {object} opts
|
||||
* @param {string} opts.scanner - Scanner prefix (UNI, ENT, PRM, DEP, TNT, GIT, NET)
|
||||
* @param {string} opts.severity - From SEVERITY constants
|
||||
* @param {string} opts.title - Short finding title
|
||||
* @param {string} opts.description - Detailed description
|
||||
* @param {string} [opts.file] - Affected file path (relative)
|
||||
* @param {number} [opts.line] - Line number
|
||||
* @param {string} [opts.evidence] - Redacted evidence string
|
||||
* @param {string} [opts.owasp] - OWASP reference (e.g. "LLM01")
|
||||
* @param {string} [opts.recommendation] - Fix suggestion
|
||||
* @returns {object}
|
||||
*/
|
||||
export function finding(opts) {
|
||||
findingCounter++;
|
||||
const id = `DS-${opts.scanner}-${String(findingCounter).padStart(3, '0')}`;
|
||||
return {
|
||||
id,
|
||||
scanner: opts.scanner,
|
||||
severity: opts.severity,
|
||||
title: opts.title,
|
||||
description: opts.description,
|
||||
file: opts.file || null,
|
||||
line: opts.line || null,
|
||||
evidence: opts.evidence || null,
|
||||
owasp: opts.owasp || null,
|
||||
recommendation: opts.recommendation || null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a scanner result envelope.
|
||||
* @param {string} scannerName
|
||||
* @param {'ok'|'error'|'skipped'} status
|
||||
* @param {object[]} findings
|
||||
* @param {number} filesScanned
|
||||
* @param {number} durationMs
|
||||
* @param {string} [errorMsg]
|
||||
* @returns {object}
|
||||
*/
|
||||
export function scannerResult(scannerName, status, findings, filesScanned, durationMs, errorMsg) {
|
||||
const counts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
|
||||
for (const f of findings) {
|
||||
counts[f.severity] = (counts[f.severity] || 0) + 1;
|
||||
}
|
||||
const result = {
|
||||
scanner: scannerName,
|
||||
status,
|
||||
files_scanned: filesScanned,
|
||||
duration_ms: durationMs,
|
||||
findings,
|
||||
counts,
|
||||
};
|
||||
if (errorMsg) result.error = errorMsg;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a fix result object for the auto-cleaner.
|
||||
* @param {object} opts
|
||||
* @param {string} opts.finding_id - Original finding ID (e.g. "DS-UNI-001")
|
||||
* @param {string} opts.file - Affected file path (relative)
|
||||
* @param {string} opts.operation - Fix operation name (e.g. "strip_zero_width")
|
||||
* @param {'applied'|'skipped'|'failed'} opts.status
|
||||
* @param {string} opts.description - What was done
|
||||
* @param {string} [opts.error] - Error message if failed
|
||||
* @returns {object}
|
||||
*/
|
||||
export function fixResult(opts) {
|
||||
const result = {
|
||||
finding_id: opts.finding_id,
|
||||
file: opts.file,
|
||||
operation: opts.operation,
|
||||
status: opts.status,
|
||||
description: opts.description,
|
||||
};
|
||||
if (opts.error) result.error = opts.error;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the top-level output envelope for the auto-cleaner.
|
||||
* @param {string} targetPath
|
||||
* @param {boolean} dryRun
|
||||
* @param {object[]} fixes - Array of fixResult objects
|
||||
* @param {object[]} errors - Array of error objects
|
||||
* @param {number} durationMs
|
||||
* @returns {object}
|
||||
*/
|
||||
export function cleanEnvelope(targetPath, dryRun, fixes, errors, durationMs) {
|
||||
const applied = fixes.filter(f => f.status === 'applied').length;
|
||||
const skipped = fixes.filter(f => f.status === 'skipped').length;
|
||||
const failed = fixes.filter(f => f.status === 'failed').length;
|
||||
const filesModified = new Set(fixes.filter(f => f.status === 'applied').map(f => f.file)).size;
|
||||
|
||||
return {
|
||||
meta: {
|
||||
target: targetPath,
|
||||
timestamp: new Date().toISOString(),
|
||||
dry_run: dryRun,
|
||||
duration_ms: durationMs,
|
||||
},
|
||||
summary: {
|
||||
findings_received: fixes.length + errors.length,
|
||||
fixes_applied: applied,
|
||||
fixes_skipped: skipped,
|
||||
fixes_failed: failed,
|
||||
files_modified: filesModified,
|
||||
},
|
||||
fixes,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the top-level output envelope from all scanner results.
|
||||
* @param {string} targetPath
|
||||
* @param {Record<string, object>} scannerResults - keyed by scanner short name
|
||||
* @param {number} totalDurationMs
|
||||
* @returns {object}
|
||||
*/
|
||||
export function envelope(targetPath, scannerResults, totalDurationMs) {
|
||||
const aggCounts = { critical: 0, high: 0, medium: 0, low: 0, info: 0 };
|
||||
const allFindings = [];
|
||||
let totalFindings = 0;
|
||||
let scannersOk = 0;
|
||||
let scannersError = 0;
|
||||
let scannersSkipped = 0;
|
||||
|
||||
for (const r of Object.values(scannerResults)) {
|
||||
for (const sev of Object.keys(aggCounts)) {
|
||||
aggCounts[sev] += r.counts[sev] || 0;
|
||||
}
|
||||
totalFindings += r.findings.length;
|
||||
allFindings.push(...r.findings);
|
||||
if (r.status === 'ok') scannersOk++;
|
||||
else if (r.status === 'error') scannersError++;
|
||||
else if (r.status === 'skipped') scannersSkipped++;
|
||||
}
|
||||
|
||||
return {
|
||||
meta: {
|
||||
target: targetPath,
|
||||
timestamp: new Date().toISOString(),
|
||||
node_version: process.version,
|
||||
total_duration_ms: totalDurationMs,
|
||||
},
|
||||
scanners: scannerResults,
|
||||
aggregate: {
|
||||
total_findings: totalFindings,
|
||||
counts: aggCounts,
|
||||
risk_score: riskScore(aggCounts),
|
||||
risk_band: riskBand(riskScore(aggCounts)),
|
||||
verdict: verdict(aggCounts),
|
||||
owasp_breakdown: owaspCategorize(allFindings),
|
||||
scanners_ok: scannersOk,
|
||||
scanners_error: scannersError,
|
||||
scanners_skipped: scannersSkipped,
|
||||
},
|
||||
};
|
||||
}
|
||||
178
plugins/llm-security-copilot/scanners/lib/severity.mjs
Normal file
178
plugins/llm-security-copilot/scanners/lib/severity.mjs
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
// severity.mjs — Constants, risk score calculation, verdict logic
|
||||
// Zero dependencies. Used by all scanners and the orchestrator.
|
||||
|
||||
export const SEVERITY = Object.freeze({
|
||||
CRITICAL: 'critical',
|
||||
HIGH: 'high',
|
||||
MEDIUM: 'medium',
|
||||
LOW: 'low',
|
||||
INFO: 'info',
|
||||
});
|
||||
|
||||
const SEVERITY_WEIGHTS = { critical: 25, high: 10, medium: 4, low: 1, info: 0 };
|
||||
|
||||
/**
|
||||
* Calculate aggregate risk score from severity counts.
|
||||
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
|
||||
* @returns {number} 0-100 capped score
|
||||
*/
|
||||
export function riskScore(counts) {
|
||||
const raw =
|
||||
(counts.critical || 0) * SEVERITY_WEIGHTS.critical +
|
||||
(counts.high || 0) * SEVERITY_WEIGHTS.high +
|
||||
(counts.medium || 0) * SEVERITY_WEIGHTS.medium +
|
||||
(counts.low || 0) * SEVERITY_WEIGHTS.low +
|
||||
(counts.info || 0) * SEVERITY_WEIGHTS.info;
|
||||
return Math.min(raw, 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive verdict from severity counts and risk score.
|
||||
* BLOCK if Critical >= 1 OR score >= 61. WARNING if High >= 1 OR score >= 21. Otherwise ALLOW.
|
||||
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
|
||||
* @returns {'BLOCK' | 'WARNING' | 'ALLOW'}
|
||||
*/
|
||||
export function verdict(counts) {
|
||||
const score = riskScore(counts);
|
||||
if ((counts.critical || 0) >= 1 || score >= 61) return 'BLOCK';
|
||||
if ((counts.high || 0) >= 1 || score >= 21) return 'WARNING';
|
||||
return 'ALLOW';
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a 0-100 risk score to a human-readable risk band.
|
||||
* @param {number} score - 0-100 risk score
|
||||
* @returns {'Low' | 'Medium' | 'High' | 'Critical' | 'Extreme'}
|
||||
*/
|
||||
export function riskBand(score) {
|
||||
if (score <= 20) return 'Low';
|
||||
if (score <= 40) return 'Medium';
|
||||
if (score <= 60) return 'High';
|
||||
if (score <= 80) return 'Critical';
|
||||
return 'Extreme';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate A-F grade from posture/audit pass rate.
|
||||
* @param {number} passRate - 0.0 to 1.0
|
||||
* @param {number} failsInCritCats - Number of FAIL results in critical categories (1, 2, 5)
|
||||
* @param {number} critCount - Number of Critical-severity findings
|
||||
* @returns {'A' | 'B' | 'C' | 'D' | 'F'}
|
||||
*/
|
||||
export function gradeFromPassRate(passRate, failsInCritCats = 0, critCount = 0) {
|
||||
if (passRate < 0.33 || critCount >= 3) return 'F';
|
||||
if (passRate >= 0.89 && failsInCritCats === 0 && critCount === 0) return 'A';
|
||||
if (passRate >= 0.72 && critCount === 0) return 'B';
|
||||
if (passRate >= 0.56) return 'C';
|
||||
if (passRate >= 0.33) return 'D';
|
||||
return 'F';
|
||||
}
|
||||
|
||||
/**
|
||||
* Scanner prefix to OWASP LLM Top 10 category mapping.
|
||||
*/
|
||||
export const OWASP_MAP = Object.freeze({
|
||||
UNI: ['LLM01'],
|
||||
ENT: ['LLM01', 'LLM03'],
|
||||
PRM: ['LLM06'],
|
||||
DEP: ['LLM03'],
|
||||
TNT: ['LLM01', 'LLM02'],
|
||||
GIT: ['LLM03'],
|
||||
NET: ['LLM02', 'LLM03'],
|
||||
TFA: ['LLM01', 'LLM02', 'LLM06'],
|
||||
MCI: ['LLM01', 'LLM02'],
|
||||
MEM: ['LLM01'],
|
||||
SCR: ['LLM03'],
|
||||
PST: ['LLM01', 'LLM06'],
|
||||
});
|
||||
|
||||
/**
|
||||
* Scanner prefix to OWASP Agentic AI Top 10 (ASI) category mapping.
|
||||
*/
|
||||
export const OWASP_AGENTIC_MAP = Object.freeze({
|
||||
UNI: ['ASI01'],
|
||||
ENT: ['ASI01', 'ASI04'],
|
||||
PRM: ['ASI02', 'ASI03'],
|
||||
DEP: ['ASI04'],
|
||||
TNT: ['ASI01', 'ASI05'],
|
||||
GIT: ['ASI04'],
|
||||
NET: ['ASI02', 'ASI05'],
|
||||
TFA: ['ASI01', 'ASI02', 'ASI05'],
|
||||
MCI: ['ASI01', 'ASI04'],
|
||||
MEM: ['ASI01', 'ASI02'],
|
||||
SCR: ['ASI04'],
|
||||
PST: ['ASI02', 'ASI03', 'ASI04', 'ASI05'],
|
||||
});
|
||||
|
||||
/**
|
||||
* Scanner prefix to OWASP Skills Top 10 (AST) category mapping.
|
||||
*/
|
||||
export const OWASP_SKILLS_MAP = Object.freeze({
|
||||
UNI: ['AST05'],
|
||||
ENT: ['AST02', 'AST05'],
|
||||
PRM: ['AST03'],
|
||||
DEP: ['AST06'],
|
||||
TNT: ['AST01', 'AST02'],
|
||||
GIT: ['AST06'],
|
||||
NET: ['AST02'],
|
||||
TFA: ['AST01', 'AST02', 'AST03'],
|
||||
MCI: ['AST01', 'AST02'],
|
||||
MEM: ['AST01', 'AST05'],
|
||||
SCR: ['AST06'],
|
||||
PST: ['AST01', 'AST03'],
|
||||
});
|
||||
|
||||
/**
|
||||
* Scanner prefix to OWASP MCP Top 10 category mapping.
|
||||
*/
|
||||
export const OWASP_MCP_MAP = Object.freeze({
|
||||
UNI: ['MCP06'],
|
||||
ENT: ['MCP01', 'MCP06'],
|
||||
PRM: ['MCP02', 'MCP07'],
|
||||
DEP: ['MCP04'],
|
||||
TNT: ['MCP05', 'MCP06'],
|
||||
GIT: ['MCP04'],
|
||||
NET: ['MCP02', 'MCP10'],
|
||||
TFA: ['MCP03', 'MCP06'],
|
||||
MCI: ['MCP03', 'MCP06', 'MCP09'],
|
||||
MEM: ['MCP05', 'MCP06'],
|
||||
SCR: ['MCP04'],
|
||||
PST: ['MCP02', 'MCP07'],
|
||||
});
|
||||
|
||||
/**
|
||||
* Regex matching all supported OWASP framework prefixes:
|
||||
* LLM01-LLM10, ASI01-ASI10, AST01-AST10, MCP01-MCP10 (MCP1-MCP10 also accepted).
|
||||
*/
|
||||
const OWASP_PREFIX_RE = /(?:LLM|ASI|AST|MCP)\d{1,2}/g;
|
||||
|
||||
/**
|
||||
* Group findings by OWASP category across all frameworks.
|
||||
* Uses each finding's `owasp` field if present, otherwise falls back to OWASP_MAP by scanner prefix.
|
||||
* Recognizes LLM, ASI, AST, and MCP prefixes.
|
||||
* @param {object[]} findings - Array of finding objects with scanner, owasp, and severity fields
|
||||
* @returns {Record<string, { count: number, critical: number, high: number, medium: number, low: number, info: number }>}
|
||||
*/
|
||||
export function owaspCategorize(findings) {
|
||||
const cats = {};
|
||||
for (const f of findings) {
|
||||
const categories = [];
|
||||
if (f.owasp) {
|
||||
const match = f.owasp.match(OWASP_PREFIX_RE);
|
||||
if (match) categories.push(...match);
|
||||
}
|
||||
if (categories.length === 0 && f.scanner && OWASP_MAP[f.scanner]) {
|
||||
categories.push(...OWASP_MAP[f.scanner]);
|
||||
}
|
||||
if (categories.length === 0) categories.push('Unmapped');
|
||||
|
||||
for (const cat of categories) {
|
||||
if (!cats[cat]) cats[cat] = { count: 0, critical: 0, high: 0, medium: 0, low: 0, info: 0 };
|
||||
cats[cat].count++;
|
||||
if (f.severity && cats[cat][f.severity] !== undefined) {
|
||||
cats[cat][f.severity]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return cats;
|
||||
}
|
||||
462
plugins/llm-security-copilot/scanners/lib/skill-registry.mjs
Normal file
462
plugins/llm-security-copilot/scanners/lib/skill-registry.mjs
Normal file
|
|
@ -0,0 +1,462 @@
|
|||
// skill-registry.mjs — Local database of known skill fingerprints and risk profiles.
|
||||
// Fingerprints skills by SHA-256 of normalized content, stores scan results,
|
||||
// enables instant re-scan detection and pattern search.
|
||||
// Zero external dependencies.
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync } from 'node:fs';
|
||||
import { join, resolve, relative, dirname, basename, extname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const REGISTRY_VERSION = '1';
|
||||
const MAX_FILE_SIZE = 256 * 1024; // 256KB — skills are markdown, not binaries
|
||||
const SCANNABLE_EXTENSIONS = new Set(['.md', '.mdx', '.json', '.mjs', '.js', '.ts', '.sh']);
|
||||
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage']);
|
||||
|
||||
// Stale threshold — 7 days. If a cached scan is older than this,
|
||||
// we suggest re-scanning but still return the cached result.
|
||||
const STALE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Plugin root resolution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const PLUGIN_ROOT = resolve(__dirname, '..', '..');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Content normalization — same skill should produce same fingerprint
|
||||
// regardless of trailing whitespace, line endings, or blank line count.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Normalize content for fingerprinting.
|
||||
* - Normalize line endings to \n
|
||||
* - Trim trailing whitespace from each line
|
||||
* - Collapse multiple consecutive blank lines into one
|
||||
* - Trim leading/trailing blank lines
|
||||
* @param {string} content
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeContent(content) {
|
||||
return content
|
||||
.replace(/\r\n/g, '\n')
|
||||
.replace(/\r/g, '\n')
|
||||
.split('\n')
|
||||
.map(line => line.trimEnd())
|
||||
.join('\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// File collection — gather all scannable files from a skill path
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Recursively collect files from a directory.
|
||||
* @param {string} dirPath - Absolute path to directory
|
||||
* @param {string} basePath - Base path for relative path calculation
|
||||
* @returns {{ relPath: string, content: string }[]}
|
||||
*/
|
||||
function collectFiles(dirPath, basePath) {
|
||||
const files = [];
|
||||
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(dirPath, { withFileTypes: true });
|
||||
} catch {
|
||||
return files;
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = join(dirPath, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (SKIP_DIRS.has(entry.name)) continue;
|
||||
files.push(...collectFiles(fullPath, basePath));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry.isFile()) continue;
|
||||
|
||||
const ext = extname(entry.name).toLowerCase();
|
||||
if (!SCANNABLE_EXTENSIONS.has(ext)) continue;
|
||||
|
||||
try {
|
||||
const stat = statSync(fullPath);
|
||||
if (stat.size > MAX_FILE_SIZE) continue;
|
||||
const content = readFileSync(fullPath, 'utf8');
|
||||
files.push({ relPath: relative(basePath, fullPath), content });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fingerprinting
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Generate a SHA-256 fingerprint for a skill.
|
||||
*
|
||||
* For a directory: collects all scannable files, sorts by relative path,
|
||||
* normalizes each, and hashes the concatenation.
|
||||
*
|
||||
* For a single file: normalizes and hashes it directly.
|
||||
*
|
||||
* @param {string} skillPath - Absolute or relative path to skill file or directory
|
||||
* @returns {{ fingerprint: string, files: string[], name: string }}
|
||||
*/
|
||||
export function fingerprintSkill(skillPath) {
|
||||
const absPath = resolve(skillPath);
|
||||
const hash = createHash('sha256');
|
||||
let fileList = [];
|
||||
let name = basename(absPath);
|
||||
|
||||
if (statSync(absPath).isDirectory()) {
|
||||
const collected = collectFiles(absPath, absPath);
|
||||
// Sort for determinism
|
||||
collected.sort((a, b) => a.relPath.localeCompare(b.relPath));
|
||||
|
||||
for (const { relPath, content } of collected) {
|
||||
fileList.push(relPath);
|
||||
// Hash includes the relative path so renames change the fingerprint
|
||||
hash.update(relPath + '\x00');
|
||||
hash.update(normalizeContent(content) + '\x00');
|
||||
}
|
||||
|
||||
// Try to extract skill name from SKILL.md or plugin.json
|
||||
const skillMd = collected.find(f =>
|
||||
f.relPath.toLowerCase().endsWith('skill.md') ||
|
||||
f.relPath.toLowerCase().includes('/skill.md')
|
||||
);
|
||||
if (skillMd) {
|
||||
const nameMatch = skillMd.content.match(/^#\s+(.+)/m);
|
||||
if (nameMatch) name = nameMatch[1].trim();
|
||||
}
|
||||
|
||||
const pluginJson = collected.find(f => f.relPath === 'plugin.json' || f.relPath.endsWith('/plugin.json'));
|
||||
if (pluginJson) {
|
||||
try {
|
||||
const parsed = JSON.parse(pluginJson.content);
|
||||
if (parsed.name) name = parsed.name;
|
||||
} catch { /* ignore parse errors */ }
|
||||
}
|
||||
} else {
|
||||
// Single file
|
||||
const content = readFileSync(absPath, 'utf8');
|
||||
fileList.push(basename(absPath));
|
||||
hash.update(normalizeContent(content));
|
||||
|
||||
// Try to extract name from frontmatter
|
||||
const nameMatch = content.match(/^name:\s*(.+)/m);
|
||||
if (nameMatch) name = nameMatch[1].trim().replace(/^["']|["']$/g, '');
|
||||
}
|
||||
|
||||
return {
|
||||
fingerprint: hash.digest('hex'),
|
||||
files: fileList,
|
||||
name,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Registry I/O
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Default registry file path.
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {string}
|
||||
*/
|
||||
export function registryPath(pluginRoot) {
|
||||
return join(pluginRoot || PLUGIN_ROOT, 'reports', 'skill-registry.json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Seed registry file path (ships with plugin).
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {string}
|
||||
*/
|
||||
export function seedRegistryPath(pluginRoot) {
|
||||
return join(pluginRoot || PLUGIN_ROOT, 'knowledge', 'skill-registry.json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an empty registry structure.
|
||||
* @returns {object}
|
||||
*/
|
||||
function emptyRegistry() {
|
||||
return {
|
||||
version: REGISTRY_VERSION,
|
||||
updated: new Date().toISOString(),
|
||||
entry_count: 0,
|
||||
entries: {},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load registry from disk. Merges seed data if available.
|
||||
* Creates empty registry if file doesn't exist.
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {object}
|
||||
*/
|
||||
export function loadRegistry(pluginRoot) {
|
||||
const filePath = registryPath(pluginRoot);
|
||||
let registry;
|
||||
|
||||
if (existsSync(filePath)) {
|
||||
try {
|
||||
registry = JSON.parse(readFileSync(filePath, 'utf8'));
|
||||
} catch {
|
||||
registry = emptyRegistry();
|
||||
}
|
||||
} else {
|
||||
registry = emptyRegistry();
|
||||
}
|
||||
|
||||
// Merge seed data (seed entries never overwrite existing entries)
|
||||
const seedPath = seedRegistryPath(pluginRoot);
|
||||
if (existsSync(seedPath)) {
|
||||
try {
|
||||
const seeds = JSON.parse(readFileSync(seedPath, 'utf8'));
|
||||
for (const [fp, entry] of Object.entries(seeds.entries || {})) {
|
||||
if (!registry.entries[fp]) {
|
||||
registry.entries[fp] = { ...entry, source_type: 'seed' };
|
||||
}
|
||||
}
|
||||
} catch { /* ignore seed parse errors */ }
|
||||
}
|
||||
|
||||
// Ensure entry_count is accurate
|
||||
registry.entry_count = Object.keys(registry.entries).length;
|
||||
|
||||
return registry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save registry to disk.
|
||||
* @param {object} registry
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {string} Path to saved file
|
||||
*/
|
||||
export function saveRegistry(registry, pluginRoot) {
|
||||
const filePath = registryPath(pluginRoot);
|
||||
const dir = dirname(filePath);
|
||||
if (!existsSync(dir)) {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
registry.updated = new Date().toISOString();
|
||||
registry.entry_count = Object.keys(registry.entries).length;
|
||||
|
||||
writeFileSync(filePath, JSON.stringify(registry, null, 2) + '\n');
|
||||
return filePath;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core operations
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Check if a fingerprint exists in the registry.
|
||||
* @param {string} fingerprint
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {{ found: boolean, entry: object|null, stale: boolean }}
|
||||
*/
|
||||
export function checkRegistry(fingerprint, pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
const entry = registry.entries[fingerprint] || null;
|
||||
|
||||
if (!entry) {
|
||||
return { found: false, entry: null, stale: false };
|
||||
}
|
||||
|
||||
const lastScanned = new Date(entry.last_scanned).getTime();
|
||||
const stale = (Date.now() - lastScanned) > STALE_THRESHOLD_MS;
|
||||
|
||||
return { found: true, entry, stale };
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a scan result for a skill.
|
||||
* @param {object} opts
|
||||
* @param {string} opts.skillPath - Path that was scanned
|
||||
* @param {string} opts.fingerprint - From fingerprintSkill()
|
||||
* @param {string} opts.name - Skill name
|
||||
* @param {string[]} opts.files - Files included in fingerprint
|
||||
* @param {string} opts.verdict - ALLOW|WARNING|BLOCK
|
||||
* @param {number} opts.risk_score - 0-100
|
||||
* @param {object} opts.counts - { critical, high, medium, low, info }
|
||||
* @param {number} opts.files_scanned - Number of files scanned
|
||||
* @param {string[]} [opts.tags] - Optional tags
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {{ entry: object, path: string }}
|
||||
*/
|
||||
export function registerScan(opts, pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
const existing = registry.entries[opts.fingerprint];
|
||||
|
||||
const entry = {
|
||||
name: opts.name,
|
||||
source: opts.skillPath,
|
||||
fingerprint: opts.fingerprint,
|
||||
first_seen: existing?.first_seen || new Date().toISOString(),
|
||||
last_scanned: new Date().toISOString(),
|
||||
scan_count: (existing?.scan_count || 0) + 1,
|
||||
verdict: opts.verdict,
|
||||
risk_score: opts.risk_score,
|
||||
counts: opts.counts,
|
||||
files_scanned: opts.files_scanned,
|
||||
files_in_fingerprint: opts.files,
|
||||
tags: opts.tags || existing?.tags || [],
|
||||
source_type: 'scanned',
|
||||
};
|
||||
|
||||
registry.entries[opts.fingerprint] = entry;
|
||||
const savedPath = saveRegistry(registry, pluginRoot);
|
||||
|
||||
return { entry, path: savedPath };
|
||||
}
|
||||
|
||||
/**
|
||||
* Search the registry by name, source, or tag pattern.
|
||||
* @param {string} pattern - Search pattern (case-insensitive substring match)
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {object[]} Matching entries
|
||||
*/
|
||||
export function searchRegistry(pattern, pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
const lower = pattern.toLowerCase();
|
||||
const matches = [];
|
||||
|
||||
for (const entry of Object.values(registry.entries)) {
|
||||
const searchable = [
|
||||
entry.name || '',
|
||||
entry.source || '',
|
||||
...(entry.tags || []),
|
||||
entry.fingerprint || '',
|
||||
].join(' ').toLowerCase();
|
||||
|
||||
if (searchable.includes(lower)) {
|
||||
matches.push(entry);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by last_scanned descending (most recent first)
|
||||
matches.sort((a, b) => {
|
||||
const aTime = new Date(b.last_scanned || 0).getTime();
|
||||
const bTime = new Date(a.last_scanned || 0).getTime();
|
||||
return aTime - bTime;
|
||||
});
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get registry statistics.
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {object}
|
||||
*/
|
||||
export function getStats(pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
const entries = Object.values(registry.entries);
|
||||
|
||||
const stats = {
|
||||
version: registry.version,
|
||||
updated: registry.updated,
|
||||
total_entries: entries.length,
|
||||
by_verdict: { ALLOW: 0, WARNING: 0, BLOCK: 0 },
|
||||
by_source_type: { scanned: 0, seed: 0 },
|
||||
total_scans: 0,
|
||||
stale_count: 0,
|
||||
avg_risk_score: 0,
|
||||
};
|
||||
|
||||
let riskSum = 0;
|
||||
const now = Date.now();
|
||||
|
||||
for (const entry of entries) {
|
||||
// By verdict
|
||||
const v = entry.verdict || 'ALLOW';
|
||||
stats.by_verdict[v] = (stats.by_verdict[v] || 0) + 1;
|
||||
|
||||
// By source type
|
||||
const st = entry.source_type || 'scanned';
|
||||
stats.by_source_type[st] = (stats.by_source_type[st] || 0) + 1;
|
||||
|
||||
// Scan count
|
||||
stats.total_scans += entry.scan_count || 0;
|
||||
|
||||
// Risk score
|
||||
riskSum += entry.risk_score || 0;
|
||||
|
||||
// Stale check
|
||||
const lastScanned = new Date(entry.last_scanned || 0).getTime();
|
||||
if ((now - lastScanned) > STALE_THRESHOLD_MS) {
|
||||
stats.stale_count++;
|
||||
}
|
||||
}
|
||||
|
||||
stats.avg_risk_score = entries.length > 0
|
||||
? Math.round(riskSum / entries.length)
|
||||
: 0;
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an entry from the registry by fingerprint.
|
||||
* @param {string} fingerprint
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {boolean} true if entry was found and removed
|
||||
*/
|
||||
export function removeEntry(fingerprint, pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
if (!registry.entries[fingerprint]) return false;
|
||||
|
||||
delete registry.entries[fingerprint];
|
||||
saveRegistry(registry, pluginRoot);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* List all entries, optionally filtered by verdict.
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.verdict] - Filter by verdict (ALLOW|WARNING|BLOCK)
|
||||
* @param {boolean} [opts.staleOnly] - Only return stale entries
|
||||
* @param {string} [pluginRoot]
|
||||
* @returns {object[]}
|
||||
*/
|
||||
export function listEntries(opts, pluginRoot) {
|
||||
const registry = loadRegistry(pluginRoot);
|
||||
let entries = Object.values(registry.entries);
|
||||
const now = Date.now();
|
||||
|
||||
if (opts?.verdict) {
|
||||
entries = entries.filter(e => e.verdict === opts.verdict);
|
||||
}
|
||||
|
||||
if (opts?.staleOnly) {
|
||||
entries = entries.filter(e => {
|
||||
const lastScanned = new Date(e.last_scanned || 0).getTime();
|
||||
return (now - lastScanned) > STALE_THRESHOLD_MS;
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by last_scanned descending
|
||||
entries.sort((a, b) =>
|
||||
new Date(b.last_scanned || 0).getTime() - new Date(a.last_scanned || 0).getTime()
|
||||
);
|
||||
|
||||
return entries;
|
||||
}
|
||||
322
plugins/llm-security-copilot/scanners/lib/string-utils.mjs
Normal file
322
plugins/llm-security-copilot/scanners/lib/string-utils.mjs
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
// string-utils.mjs — Entropy, Levenshtein, base64 detection, redaction, decoding
|
||||
// Zero dependencies.
|
||||
|
||||
/**
|
||||
* Shannon entropy of a string (bits per character).
|
||||
* @param {string} s
|
||||
* @returns {number}
|
||||
*/
|
||||
export function shannonEntropy(s) {
|
||||
if (s.length === 0) return 0;
|
||||
const freq = new Map();
|
||||
for (const ch of s) {
|
||||
freq.set(ch, (freq.get(ch) || 0) + 1);
|
||||
}
|
||||
let H = 0;
|
||||
const len = s.length;
|
||||
for (const count of freq.values()) {
|
||||
const p = count / len;
|
||||
H -= p * Math.log2(p);
|
||||
}
|
||||
return H;
|
||||
}
|
||||
|
||||
/**
|
||||
* Levenshtein edit distance between two strings.
|
||||
* @param {string} a
|
||||
* @param {string} b
|
||||
* @returns {number}
|
||||
*/
|
||||
export function levenshtein(a, b) {
|
||||
if (a === b) return 0;
|
||||
if (a.length === 0) return b.length;
|
||||
if (b.length === 0) return a.length;
|
||||
|
||||
const m = a.length;
|
||||
const n = b.length;
|
||||
// Single-row optimization
|
||||
let prev = new Array(n + 1);
|
||||
let curr = new Array(n + 1);
|
||||
for (let j = 0; j <= n; j++) prev[j] = j;
|
||||
|
||||
for (let i = 1; i <= m; i++) {
|
||||
curr[0] = i;
|
||||
for (let j = 1; j <= n; j++) {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
||||
curr[j] = Math.min(
|
||||
prev[j] + 1, // deletion
|
||||
curr[j - 1] + 1, // insertion
|
||||
prev[j - 1] + cost // substitution
|
||||
);
|
||||
}
|
||||
[prev, curr] = [curr, prev];
|
||||
}
|
||||
return prev[n];
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a string looks like base64-encoded data.
|
||||
* @param {string} s
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isBase64Like(s) {
|
||||
if (s.length < 20) return false;
|
||||
// Must be mostly base64 chars and optionally end with =
|
||||
return /^[A-Za-z0-9+/]{20,}={0,3}$/.test(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a string looks like a hex-encoded blob.
|
||||
* @param {string} s
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isHexBlob(s) {
|
||||
if (s.length < 32) return false;
|
||||
return /^(0x)?[0-9a-fA-F]{32,}$/.test(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Redact a string for safe display — show first 8 and last 4 chars.
|
||||
* @param {string} s
|
||||
* @param {number} [showStart=8]
|
||||
* @param {number} [showEnd=4]
|
||||
* @returns {string}
|
||||
*/
|
||||
export function redact(s, showStart = 8, showEnd = 4) {
|
||||
if (s.length <= showStart + showEnd + 3) return s;
|
||||
return `${s.slice(0, showStart)}...${s.slice(-showEnd)}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract string literals from a line of code.
|
||||
* Handles single-quoted, double-quoted, and backtick strings.
|
||||
* @param {string} line
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function extractStringLiterals(line) {
|
||||
const results = [];
|
||||
const regex = /(?:"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)'|`([^`\\]*(?:\\.[^`\\]*)*)`)/g;
|
||||
let match;
|
||||
while ((match = regex.exec(line)) !== null) {
|
||||
results.push(match[1] ?? match[2] ?? match[3]);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Encoding/obfuscation decoders
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Decode JavaScript/Unicode escape sequences: \uXXXX and \u{XXXXX}.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function decodeUnicodeEscapes(s) {
|
||||
return s
|
||||
.replace(/\\u\{([0-9a-fA-F]{1,6})\}/g, (_, hex) => {
|
||||
const cp = parseInt(hex, 16);
|
||||
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
|
||||
})
|
||||
.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
|
||||
String.fromCodePoint(parseInt(hex, 16))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode hex escape sequences: \xXX.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function decodeHexEscapes(s) {
|
||||
return s.replace(/\\x([0-9a-fA-F]{2})/g, (_, hex) =>
|
||||
String.fromCharCode(parseInt(hex, 16))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode URL percent-encoding: %XX.
|
||||
* Uses decodeURIComponent with fallback for malformed sequences.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function decodeUrlEncoding(s) {
|
||||
// Fast path: no percent signs means nothing to decode
|
||||
if (!s.includes('%')) return s;
|
||||
try {
|
||||
return decodeURIComponent(s);
|
||||
} catch {
|
||||
// Malformed sequences — decode individual %XX pairs
|
||||
return s.replace(/%([0-9a-fA-F]{2})/g, (_, hex) =>
|
||||
String.fromCharCode(parseInt(hex, 16))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to decode a base64 string to UTF-8 text.
|
||||
* Returns null if the input is not base64-like or decoded result is not readable text.
|
||||
* @param {string} s
|
||||
* @returns {string|null}
|
||||
*/
|
||||
export function tryDecodeBase64(s) {
|
||||
if (!isBase64Like(s)) return null;
|
||||
try {
|
||||
const decoded = Buffer.from(s, 'base64').toString('utf-8');
|
||||
// Check if result is mostly printable text (>= 80% printable ASCII)
|
||||
const printable = decoded.replace(/[^\x20-\x7E\n\r\t]/g, '').length;
|
||||
if (decoded.length === 0 || printable / decoded.length < 0.8) return null;
|
||||
return decoded;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities: named (< > & " '),
|
||||
* decimal (i), and hex (i).
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function decodeHtmlEntities(s) {
|
||||
if (!s.includes('&')) return s;
|
||||
const NAMED = {
|
||||
'<': '<', '>': '>', '&': '&', '"': '"', ''': "'",
|
||||
' ': ' ', '&tab;': '\t', '&newline;': '\n',
|
||||
'(': '(', ')': ')', '[': '[', ']': ']',
|
||||
'{': '{', '}': '}', '/': '/', '\': '\\',
|
||||
':': ':', ';': ';', ',': ',', '.': '.',
|
||||
'!': '!', '?': '?', '#': '#', '%': '%',
|
||||
'=': '=', '+': '+', '−': '-', '*': '*',
|
||||
'|': '|', '˜': '~', '`': '`', '^': '^',
|
||||
'_': '_', '&at;': '@', '$': '$',
|
||||
};
|
||||
return s
|
||||
.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, hex) => {
|
||||
const cp = parseInt(hex, 16);
|
||||
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
|
||||
})
|
||||
.replace(/&#(\d{1,7});/g, (_, dec) => {
|
||||
const cp = parseInt(dec, 10);
|
||||
return cp <= 0x10FFFF ? String.fromCodePoint(cp) : _;
|
||||
})
|
||||
.replace(/&[a-zA-Z]{2,8};/g, (entity) => NAMED[entity] ?? entity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse letter-spaced text: "i g n o r e" → "ignore".
|
||||
* Only collapses runs of single letters separated by spaces/tabs.
|
||||
* Minimum 4 letters to avoid false positives on normal text.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function collapseLetterSpacing(s) {
|
||||
// Match 4+ single-letter tokens separated by 1+ spaces/tabs
|
||||
return s.replace(/\b([a-zA-Z]) (?:[a-zA-Z] ){2,}[a-zA-Z]\b/g, (match) =>
|
||||
match.replace(/ /g, '')
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unicode Tags steganography (U+E0000 block) — DeepMind traps kat. 1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Decode Unicode Tags steganography: U+E0001-E007F → ASCII.
|
||||
* Unicode Tags (U+E0000 block) can encode invisible ASCII text inside
|
||||
* what appears to be empty or normal-looking strings.
|
||||
* E.g., U+E0069 U+E0067 U+E006E → "ign"
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function decodeUnicodeTags(s) {
|
||||
let result = '';
|
||||
let decoded = '';
|
||||
let inTagSequence = false;
|
||||
|
||||
for (const ch of s) {
|
||||
const cp = ch.codePointAt(0);
|
||||
if (cp >= 0xE0001 && cp <= 0xE007F) {
|
||||
// Tag character — map to ASCII (subtract 0xE0000)
|
||||
decoded += String.fromCharCode(cp - 0xE0000);
|
||||
inTagSequence = true;
|
||||
} else {
|
||||
if (inTagSequence && decoded.length > 0) {
|
||||
result += decoded;
|
||||
decoded = '';
|
||||
inTagSequence = false;
|
||||
}
|
||||
result += ch;
|
||||
}
|
||||
}
|
||||
// Flush remaining tag sequence
|
||||
if (decoded.length > 0) {
|
||||
result += decoded;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a string contains Unicode Tag characters (U+E0001-E007F).
|
||||
* Presence of these characters is suspicious regardless of decoded content.
|
||||
* @param {string} s
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function containsUnicodeTags(s) {
|
||||
for (const ch of s) {
|
||||
const cp = ch.codePointAt(0);
|
||||
if (cp >= 0xE0001 && cp <= 0xE007F) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// BIDI override stripping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Strip BIDI override characters that can reorder text visually.
|
||||
* U+202A (LRE), U+202B (RLE), U+202C (PDF), U+202D (LRO), U+202E (RLO),
|
||||
* U+2066 (LRI), U+2067 (RLI), U+2068 (FSI), U+2069 (PDI).
|
||||
* These can hide injection by making text render differently than it parses.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function stripBidiOverrides(s) {
|
||||
return s.replace(/[\u202A-\u202E\u2066-\u2069]/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a string by decoding all known obfuscation layers.
|
||||
* Runs up to 3 iterations to catch multi-layered encoding (e.g., base64 of URL-encoded).
|
||||
* Order per iteration: Unicode Tags -> BIDI strip -> HTML entities -> unicode escapes ->
|
||||
* hex escapes -> URL encoding -> base64.
|
||||
* After decoding: collapse letter-spaced text.
|
||||
* @param {string} s
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeForScan(s) {
|
||||
let result = s;
|
||||
const MAX_ITERATIONS = 3;
|
||||
|
||||
// Pre-decode: Unicode Tags and BIDI overrides (before the main loop)
|
||||
result = decodeUnicodeTags(result);
|
||||
result = stripBidiOverrides(result);
|
||||
|
||||
for (let i = 0; i < MAX_ITERATIONS; i++) {
|
||||
const prev = result;
|
||||
result = decodeHtmlEntities(result);
|
||||
result = decodeUnicodeEscapes(result);
|
||||
result = decodeHexEscapes(result);
|
||||
result = decodeUrlEncoding(result);
|
||||
const b64decoded = tryDecodeBase64(result);
|
||||
if (b64decoded) result = b64decoded;
|
||||
// Stable — no further decoding possible
|
||||
if (result === prev) break;
|
||||
}
|
||||
|
||||
// Post-decode: collapse letter-spaced evasion
|
||||
result = collapseLetterSpacing(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
284
plugins/llm-security-copilot/scanners/lib/supply-chain-data.mjs
Normal file
284
plugins/llm-security-copilot/scanners/lib/supply-chain-data.mjs
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
// supply-chain-data.mjs — Shared blocklists, parsers, and OSV.dev API for supply chain checks
|
||||
// Used by: pre-install-supply-chain.mjs (hook) and supply-chain-recheck.mjs (scanner)
|
||||
// Zero external dependencies (Node.js builtins only).
|
||||
|
||||
import { execSync } from 'node:child_process';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cross-platform HTTP helper (replaces curl subprocess)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Fetch JSON from a URL with timeout. Cross-platform (no curl dependency).
|
||||
* @param {string} url
|
||||
* @param {object} [options] - fetch options (method, headers, body)
|
||||
* @param {number} [timeoutMs=8000]
|
||||
* @returns {Promise<object|null>} Parsed JSON or null on failure
|
||||
*/
|
||||
async function fetchJSON(url, options = {}, timeoutMs = 8000) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
const res = await fetch(url, { ...options, signal: controller.signal });
|
||||
clearTimeout(timer);
|
||||
if (!res.ok) return null;
|
||||
return await res.json();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// Age threshold for new package detection (hours)
|
||||
// ===========================================================================
|
||||
|
||||
export const AGE_THRESHOLD_HOURS = 72;
|
||||
|
||||
// ===========================================================================
|
||||
// KNOWN COMPROMISED — curated blocklists per ecosystem
|
||||
// '*' = all versions blocked (entirely malicious package)
|
||||
// ===========================================================================
|
||||
|
||||
export const NPM_COMPROMISED = {
|
||||
'axios': ['1.14.1', '0.30.4'],
|
||||
'event-stream': ['3.3.6'],
|
||||
'ua-parser-js': ['0.7.29', '0.8.0', '1.0.0'],
|
||||
'coa': ['2.0.3', '2.0.4', '2.1.1', '2.1.3'],
|
||||
'rc': ['1.2.9', '1.3.9', '2.3.9'],
|
||||
'colors': ['1.4.1', '1.4.2'],
|
||||
'faker': ['6.6.6'],
|
||||
'node-ipc': ['10.1.1', '10.1.2', '10.1.3', '11.0.0', '11.1.0'],
|
||||
'peacenotwar': ['*'],
|
||||
'plain-crypto-js': ['*'],
|
||||
};
|
||||
|
||||
export const PIP_COMPROMISED = {
|
||||
'colourama': ['*'],
|
||||
'python3-dateutil': ['*'],
|
||||
'jeIlyfish': ['*'],
|
||||
'python-binance': ['*'],
|
||||
'openai-api': ['*'],
|
||||
'requesocks': ['*'],
|
||||
'python-mongo': ['*'],
|
||||
'nmap-python': ['*'],
|
||||
'beautifulsoup': ['*'],
|
||||
'djanga': ['*'],
|
||||
'httpslib2': ['*'],
|
||||
'urllib4': ['*'],
|
||||
'pipsqlite3': ['*'],
|
||||
'torlogging': ['*'],
|
||||
'flasck': ['*'],
|
||||
'matploltlib': ['*'],
|
||||
'discordi': ['*'],
|
||||
'numpyi': ['*'],
|
||||
'pycryptdome': ['*'],
|
||||
};
|
||||
|
||||
export const CARGO_COMPROMISED = {
|
||||
'rustdecimal': ['*'],
|
||||
'cratesio': ['*'],
|
||||
};
|
||||
|
||||
export const GEM_COMPROMISED = {
|
||||
'rest-client': ['1.6.13'],
|
||||
'strong_password': ['0.0.7'],
|
||||
'bootstrap-sass': ['3.2.0.3'],
|
||||
};
|
||||
|
||||
export const DOCKER_SUSPICIOUS = [
|
||||
/xmrig/i,
|
||||
/cryptonight/i,
|
||||
/monero-?miner/i,
|
||||
/coin-?hive/i,
|
||||
];
|
||||
|
||||
// Popular PyPI packages for typosquat detection (used by hook)
|
||||
export const POPULAR_PIP = [
|
||||
'requests', 'flask', 'django', 'numpy', 'pandas', 'scipy', 'matplotlib',
|
||||
'tensorflow', 'torch', 'opencv-python', 'pillow', 'beautifulsoup4',
|
||||
'sqlalchemy', 'celery', 'redis', 'boto3', 'openai', 'anthropic',
|
||||
'fastapi', 'uvicorn', 'pydantic', 'httpx', 'aiohttp', 'colorama',
|
||||
'cryptography', 'pycryptodome', 'paramiko', 'fabric', 'pytest',
|
||||
'setuptools', 'pip', 'wheel', 'twine', 'black', 'mypy', 'ruff',
|
||||
'python-dateutil', 'jellyfish', 'pymongo', 'psycopg2', 'python-nmap',
|
||||
'discord.py', 'selenium', 'scrapy', 'lxml', 'pyyaml',
|
||||
];
|
||||
|
||||
// ===========================================================================
|
||||
// Helper functions
|
||||
// ===========================================================================
|
||||
|
||||
/**
|
||||
* Check if a package name+version is on a compromised blocklist.
|
||||
* @param {Record<string, string[]>} list - Blocklist object
|
||||
* @param {string} name - Package name
|
||||
* @param {string|null} version - Package version (null = any)
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCompromised(list, name, version) {
|
||||
const bad = list[name];
|
||||
if (!bad) return false;
|
||||
if (bad.includes('*')) return true;
|
||||
if (version && bad.includes(version)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an npm package specifier (e.g. "@scope/pkg@1.0.0" or "pkg@1.0.0").
|
||||
* @param {string} spec
|
||||
* @returns {{ name: string, version: string|null }}
|
||||
*/
|
||||
export function parseSpec(spec) {
|
||||
if (spec.startsWith('@')) {
|
||||
const rest = spec.slice(1);
|
||||
const atIdx = rest.lastIndexOf('@');
|
||||
if (atIdx > 0) return { name: '@' + rest.slice(0, atIdx), version: rest.slice(atIdx + 1) };
|
||||
return { name: spec, version: null };
|
||||
}
|
||||
const atIdx = spec.lastIndexOf('@');
|
||||
if (atIdx > 0) return { name: spec.slice(0, atIdx), version: spec.slice(atIdx + 1) };
|
||||
return { name: spec, version: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a pip package specifier (e.g. "requests==2.28.0" or "flask>=2.0").
|
||||
* @param {string} spec
|
||||
* @returns {{ name: string, version: string|null }}
|
||||
*/
|
||||
export function parsePipSpec(spec) {
|
||||
const eqIdx = spec.indexOf('==');
|
||||
if (eqIdx > 0) return { name: spec.slice(0, eqIdx), version: spec.slice(eqIdx + 2) };
|
||||
const match = spec.match(/^([a-zA-Z0-9_.-]+)/);
|
||||
return { name: match ? match[1] : spec, version: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a shell command safely with timeout.
|
||||
* @param {string} cmd
|
||||
* @param {number} [timeoutMs=10000]
|
||||
* @returns {string|null}
|
||||
*/
|
||||
export function execSafe(cmd, timeoutMs = 10000) {
|
||||
try {
|
||||
return execSync(cmd, { timeout: timeoutMs, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] });
|
||||
} catch (err) {
|
||||
return err.stdout || null;
|
||||
}
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// OSV.dev API — unified vulnerability database
|
||||
// ===========================================================================
|
||||
|
||||
/** Map ecosystem names to OSV format. */
|
||||
export const OSV_ECOSYSTEM_MAP = {
|
||||
npm: 'npm',
|
||||
pip: 'PyPI',
|
||||
cargo: 'crates.io',
|
||||
gem: 'RubyGems',
|
||||
go: 'Go',
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract severity from an OSV vulnerability record.
|
||||
* @param {object} vuln - OSV vulnerability object
|
||||
* @returns {string} - 'CRITICAL', 'HIGH', or 'MEDIUM'
|
||||
*/
|
||||
export function extractOSVSeverity(vuln) {
|
||||
const dbSev = vuln.database_specific?.severity;
|
||||
if (dbSev) return dbSev.toUpperCase();
|
||||
|
||||
const ecoSev = vuln.ecosystem_specific?.severity;
|
||||
if (ecoSev) return ecoSev.toUpperCase();
|
||||
|
||||
for (const sev of vuln.severity || []) {
|
||||
if (sev.score && typeof sev.score === 'number') {
|
||||
if (sev.score >= 9.0) return 'CRITICAL';
|
||||
if (sev.score >= 7.0) return 'HIGH';
|
||||
return 'MEDIUM';
|
||||
}
|
||||
}
|
||||
|
||||
if (vuln.id?.startsWith('GHSA') || vuln.id?.startsWith('CVE')) return 'HIGH';
|
||||
return 'MEDIUM';
|
||||
}
|
||||
|
||||
/**
|
||||
* Query OSV.dev for vulnerabilities on a single package version.
|
||||
* Used by the hook (real-time, single package).
|
||||
* @param {string} ecosystem - 'npm', 'pip', 'cargo', 'gem', 'go'
|
||||
* @param {string} name
|
||||
* @param {string} version
|
||||
* @returns {Promise<{ critical: object[], high: object[] }>}
|
||||
*/
|
||||
export async function queryOSV(ecosystem, name, version) {
|
||||
const critical = [];
|
||||
const high = [];
|
||||
|
||||
const osvEcosystem = OSV_ECOSYSTEM_MAP[ecosystem];
|
||||
if (!osvEcosystem) return { critical, high };
|
||||
|
||||
try {
|
||||
const result = await fetchJSON('https://api.osv.dev/v1/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
version,
|
||||
package: { name, ecosystem: osvEcosystem },
|
||||
}),
|
||||
}, 8000);
|
||||
if (!result) return { critical, high };
|
||||
|
||||
for (const vuln of result.vulns || []) {
|
||||
const severity = extractOSVSeverity(vuln);
|
||||
const entry = {
|
||||
id: vuln.id,
|
||||
summary: (vuln.summary || vuln.details || 'No description').slice(0, 120),
|
||||
severity,
|
||||
};
|
||||
if (severity === 'CRITICAL') critical.push(entry);
|
||||
else if (severity === 'HIGH') high.push(entry);
|
||||
}
|
||||
} catch { /* network error — fail open */ }
|
||||
|
||||
return { critical, high };
|
||||
}
|
||||
|
||||
/**
|
||||
* Query OSV.dev batch API for multiple packages at once.
|
||||
* Used by the scanner (periodic re-check of all lockfile deps).
|
||||
* Falls back gracefully if network is unavailable.
|
||||
* @param {{ ecosystem: string, name: string, version: string }[]} packages
|
||||
* @returns {Promise<{ results: Array<{ vulns: object[] }>, offline: boolean }>}
|
||||
*/
|
||||
export async function queryOSVBatch(packages) {
|
||||
if (packages.length === 0) return { results: [], offline: false };
|
||||
|
||||
const queries = packages.map(pkg => ({
|
||||
version: pkg.version,
|
||||
package: { name: pkg.name, ecosystem: OSV_ECOSYSTEM_MAP[pkg.ecosystem] || pkg.ecosystem },
|
||||
}));
|
||||
|
||||
// OSV batch API accepts max 1000 queries per request
|
||||
const BATCH_SIZE = 1000;
|
||||
const allResults = [];
|
||||
|
||||
for (let i = 0; i < queries.length; i += BATCH_SIZE) {
|
||||
const batch = queries.slice(i, i + BATCH_SIZE);
|
||||
|
||||
try {
|
||||
const result = await fetchJSON('https://api.osv.dev/v1/querybatch', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ queries: batch }),
|
||||
}, 15000);
|
||||
if (!result) return { results: [], offline: true };
|
||||
|
||||
allResults.push(...(result.results || []));
|
||||
} catch {
|
||||
return { results: [], offline: true };
|
||||
}
|
||||
}
|
||||
|
||||
return { results: allResults, offline: false };
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
// yaml-frontmatter.mjs — Regex-based YAML frontmatter parser
|
||||
// Handles Claude Code plugin command/agent/skill frontmatter.
|
||||
// Zero dependencies.
|
||||
|
||||
/**
|
||||
* Parse YAML frontmatter from a markdown file.
|
||||
* Returns null if no frontmatter found.
|
||||
*
|
||||
* @param {string} content - File content
|
||||
* @returns {{ name?: string, description?: string, model?: string, color?: string,
|
||||
* tools?: string[], allowed_tools?: string[] } | null}
|
||||
*/
|
||||
export function parseFrontmatter(content) {
|
||||
// Match --- delimited block at start of file
|
||||
const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
|
||||
if (!match) return null;
|
||||
|
||||
const block = match[1];
|
||||
const result = {};
|
||||
|
||||
// Parse simple key: value pairs
|
||||
for (const line of block.split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed.startsWith('#')) continue;
|
||||
|
||||
// Handle key: value
|
||||
const kvMatch = trimmed.match(/^(\w[\w-]*)\s*:\s*(.*)$/);
|
||||
if (!kvMatch) continue;
|
||||
|
||||
const [, key, rawValue] = kvMatch;
|
||||
let value = rawValue.trim();
|
||||
|
||||
// Strip quotes
|
||||
if ((value.startsWith('"') && value.endsWith('"')) ||
|
||||
(value.startsWith("'") && value.endsWith("'"))) {
|
||||
value = value.slice(1, -1);
|
||||
}
|
||||
|
||||
// Handle inline arrays: [Read, Write, Bash]
|
||||
if (value.startsWith('[') && value.endsWith(']')) {
|
||||
value = value.slice(1, -1).split(',').map(s => s.trim().replace(/^["']|["']$/g, ''));
|
||||
}
|
||||
|
||||
// Handle multi-line description with |
|
||||
if (value === '|' || value === '>') {
|
||||
const descLines = [];
|
||||
const lines = block.split('\n');
|
||||
const lineIdx = lines.indexOf(line);
|
||||
for (let i = lineIdx + 1; i < lines.length; i++) {
|
||||
const dLine = lines[i];
|
||||
if (/^\S/.test(dLine) && !dLine.startsWith(' ') && !dLine.startsWith('\t')) break;
|
||||
descLines.push(dLine.replace(/^ /, ''));
|
||||
}
|
||||
value = descLines.join('\n').trim();
|
||||
}
|
||||
|
||||
// Normalize key names
|
||||
const normalizedKey = key.replace(/-/g, '_');
|
||||
result[normalizedKey] = value;
|
||||
}
|
||||
|
||||
// Parse tools from allowed-tools (comma-separated string) or tools (array)
|
||||
if (typeof result.allowed_tools === 'string') {
|
||||
result.allowed_tools = result.allowed_tools.split(',').map(s => s.trim());
|
||||
}
|
||||
if (typeof result.tools === 'string') {
|
||||
result.tools = result.tools.split(',').map(s => s.trim());
|
||||
}
|
||||
|
||||
return Object.keys(result).length > 0 ? result : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify a plugin file by its path and frontmatter.
|
||||
* @param {string} relPath - Relative path within plugin
|
||||
* @param {object|null} frontmatter - Parsed frontmatter
|
||||
* @returns {'command' | 'agent' | 'skill' | 'hook-config' | 'knowledge' | 'template' | 'unknown'}
|
||||
*/
|
||||
export function classifyPluginFile(relPath, frontmatter) {
|
||||
const lower = relPath.toLowerCase();
|
||||
if (lower.includes('/commands/') || lower.startsWith('commands/')) return 'command';
|
||||
if (lower.includes('/agents/') || lower.startsWith('agents/')) return 'agent';
|
||||
if (lower.includes('/skills/') || lower.startsWith('skills/') || lower.endsWith('skill.md')) return 'skill';
|
||||
if (lower.endsWith('hooks.json') || lower.includes('/hooks/')) return 'hook-config';
|
||||
if (lower.includes('/knowledge/') || lower.startsWith('knowledge/')) return 'knowledge';
|
||||
if (lower.includes('/templates/') || lower.startsWith('templates/')) return 'template';
|
||||
if (frontmatter?.name && frontmatter?.allowed_tools) return 'command';
|
||||
if (frontmatter?.name && frontmatter?.tools) return 'agent';
|
||||
return 'unknown';
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue