ktg-plugin-marketplace/plugins/llm-security/scanners/lib/skill-registry.mjs

462 lines
14 KiB
JavaScript

// skill-registry.mjs — Local database of known skill fingerprints and risk profiles.
// Fingerprints skills by SHA-256 of normalized content, stores scan results,
// enables instant re-scan detection and pattern search.
// Zero external dependencies.
import { createHash } from 'node:crypto';
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync } from 'node:fs';
import { join, resolve, relative, dirname, basename, extname } from 'node:path';
import { fileURLToPath } from 'node:url';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const REGISTRY_VERSION = '1';
const MAX_FILE_SIZE = 256 * 1024; // 256KB — skills are markdown, not binaries
const SCANNABLE_EXTENSIONS = new Set(['.md', '.mdx', '.json', '.mjs', '.js', '.ts', '.sh']);
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage']);
// Stale threshold — 7 days. If a cached scan is older than this,
// we suggest re-scanning but still return the cached result.
const STALE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000;
// ---------------------------------------------------------------------------
// Plugin root resolution
// ---------------------------------------------------------------------------
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const PLUGIN_ROOT = resolve(__dirname, '..', '..');
// ---------------------------------------------------------------------------
// Content normalization — same skill should produce same fingerprint
// regardless of trailing whitespace, line endings, or blank line count.
// ---------------------------------------------------------------------------
/**
* Normalize content for fingerprinting.
* - Normalize line endings to \n
* - Trim trailing whitespace from each line
* - Collapse multiple consecutive blank lines into one
* - Trim leading/trailing blank lines
* @param {string} content
* @returns {string}
*/
export function normalizeContent(content) {
return content
.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n')
.split('\n')
.map(line => line.trimEnd())
.join('\n')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
// ---------------------------------------------------------------------------
// File collection — gather all scannable files from a skill path
// ---------------------------------------------------------------------------
/**
* Recursively collect files from a directory.
* @param {string} dirPath - Absolute path to directory
* @param {string} basePath - Base path for relative path calculation
* @returns {{ relPath: string, content: string }[]}
*/
function collectFiles(dirPath, basePath) {
const files = [];
let entries;
try {
entries = readdirSync(dirPath, { withFileTypes: true });
} catch {
return files;
}
for (const entry of entries) {
const fullPath = join(dirPath, entry.name);
if (entry.isDirectory()) {
if (SKIP_DIRS.has(entry.name)) continue;
files.push(...collectFiles(fullPath, basePath));
continue;
}
if (!entry.isFile()) continue;
const ext = extname(entry.name).toLowerCase();
if (!SCANNABLE_EXTENSIONS.has(ext)) continue;
try {
const stat = statSync(fullPath);
if (stat.size > MAX_FILE_SIZE) continue;
const content = readFileSync(fullPath, 'utf8');
files.push({ relPath: relative(basePath, fullPath), content });
} catch {
continue;
}
}
return files;
}
// ---------------------------------------------------------------------------
// Fingerprinting
// ---------------------------------------------------------------------------
/**
* Generate a SHA-256 fingerprint for a skill.
*
* For a directory: collects all scannable files, sorts by relative path,
* normalizes each, and hashes the concatenation.
*
* For a single file: normalizes and hashes it directly.
*
* @param {string} skillPath - Absolute or relative path to skill file or directory
* @returns {{ fingerprint: string, files: string[], name: string }}
*/
export function fingerprintSkill(skillPath) {
const absPath = resolve(skillPath);
const hash = createHash('sha256');
let fileList = [];
let name = basename(absPath);
if (statSync(absPath).isDirectory()) {
const collected = collectFiles(absPath, absPath);
// Sort for determinism
collected.sort((a, b) => a.relPath.localeCompare(b.relPath));
for (const { relPath, content } of collected) {
fileList.push(relPath);
// Hash includes the relative path so renames change the fingerprint
hash.update(relPath + '\x00');
hash.update(normalizeContent(content) + '\x00');
}
// Try to extract skill name from SKILL.md or plugin.json
const skillMd = collected.find(f =>
f.relPath.toLowerCase().endsWith('skill.md') ||
f.relPath.toLowerCase().includes('/skill.md')
);
if (skillMd) {
const nameMatch = skillMd.content.match(/^#\s+(.+)/m);
if (nameMatch) name = nameMatch[1].trim();
}
const pluginJson = collected.find(f => f.relPath === 'plugin.json' || f.relPath.endsWith('/plugin.json'));
if (pluginJson) {
try {
const parsed = JSON.parse(pluginJson.content);
if (parsed.name) name = parsed.name;
} catch { /* ignore parse errors */ }
}
} else {
// Single file
const content = readFileSync(absPath, 'utf8');
fileList.push(basename(absPath));
hash.update(normalizeContent(content));
// Try to extract name from frontmatter
const nameMatch = content.match(/^name:\s*(.+)/m);
if (nameMatch) name = nameMatch[1].trim().replace(/^["']|["']$/g, '');
}
return {
fingerprint: hash.digest('hex'),
files: fileList,
name,
};
}
// ---------------------------------------------------------------------------
// Registry I/O
// ---------------------------------------------------------------------------
/**
* Default registry file path.
* @param {string} [pluginRoot]
* @returns {string}
*/
export function registryPath(pluginRoot) {
return join(pluginRoot || PLUGIN_ROOT, 'reports', 'skill-registry.json');
}
/**
* Seed registry file path (ships with plugin).
* @param {string} [pluginRoot]
* @returns {string}
*/
export function seedRegistryPath(pluginRoot) {
return join(pluginRoot || PLUGIN_ROOT, 'knowledge', 'skill-registry.json');
}
/**
* Create an empty registry structure.
* @returns {object}
*/
function emptyRegistry() {
return {
version: REGISTRY_VERSION,
updated: new Date().toISOString(),
entry_count: 0,
entries: {},
};
}
/**
* Load registry from disk. Merges seed data if available.
* Creates empty registry if file doesn't exist.
* @param {string} [pluginRoot]
* @returns {object}
*/
export function loadRegistry(pluginRoot) {
const filePath = registryPath(pluginRoot);
let registry;
if (existsSync(filePath)) {
try {
registry = JSON.parse(readFileSync(filePath, 'utf8'));
} catch {
registry = emptyRegistry();
}
} else {
registry = emptyRegistry();
}
// Merge seed data (seed entries never overwrite existing entries)
const seedPath = seedRegistryPath(pluginRoot);
if (existsSync(seedPath)) {
try {
const seeds = JSON.parse(readFileSync(seedPath, 'utf8'));
for (const [fp, entry] of Object.entries(seeds.entries || {})) {
if (!registry.entries[fp]) {
registry.entries[fp] = { ...entry, source_type: 'seed' };
}
}
} catch { /* ignore seed parse errors */ }
}
// Ensure entry_count is accurate
registry.entry_count = Object.keys(registry.entries).length;
return registry;
}
/**
* Save registry to disk.
* @param {object} registry
* @param {string} [pluginRoot]
* @returns {string} Path to saved file
*/
export function saveRegistry(registry, pluginRoot) {
const filePath = registryPath(pluginRoot);
const dir = dirname(filePath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
registry.updated = new Date().toISOString();
registry.entry_count = Object.keys(registry.entries).length;
writeFileSync(filePath, JSON.stringify(registry, null, 2) + '\n');
return filePath;
}
// ---------------------------------------------------------------------------
// Core operations
// ---------------------------------------------------------------------------
/**
* Check if a fingerprint exists in the registry.
* @param {string} fingerprint
* @param {string} [pluginRoot]
* @returns {{ found: boolean, entry: object|null, stale: boolean }}
*/
export function checkRegistry(fingerprint, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const entry = registry.entries[fingerprint] || null;
if (!entry) {
return { found: false, entry: null, stale: false };
}
const lastScanned = new Date(entry.last_scanned).getTime();
const stale = (Date.now() - lastScanned) > STALE_THRESHOLD_MS;
return { found: true, entry, stale };
}
/**
* Register a scan result for a skill.
* @param {object} opts
* @param {string} opts.skillPath - Path that was scanned
* @param {string} opts.fingerprint - From fingerprintSkill()
* @param {string} opts.name - Skill name
* @param {string[]} opts.files - Files included in fingerprint
* @param {string} opts.verdict - ALLOW|WARNING|BLOCK
* @param {number} opts.risk_score - 0-100
* @param {object} opts.counts - { critical, high, medium, low, info }
* @param {number} opts.files_scanned - Number of files scanned
* @param {string[]} [opts.tags] - Optional tags
* @param {string} [pluginRoot]
* @returns {{ entry: object, path: string }}
*/
export function registerScan(opts, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const existing = registry.entries[opts.fingerprint];
const entry = {
name: opts.name,
source: opts.skillPath,
fingerprint: opts.fingerprint,
first_seen: existing?.first_seen || new Date().toISOString(),
last_scanned: new Date().toISOString(),
scan_count: (existing?.scan_count || 0) + 1,
verdict: opts.verdict,
risk_score: opts.risk_score,
counts: opts.counts,
files_scanned: opts.files_scanned,
files_in_fingerprint: opts.files,
tags: opts.tags || existing?.tags || [],
source_type: 'scanned',
};
registry.entries[opts.fingerprint] = entry;
const savedPath = saveRegistry(registry, pluginRoot);
return { entry, path: savedPath };
}
/**
* Search the registry by name, source, or tag pattern.
* @param {string} pattern - Search pattern (case-insensitive substring match)
* @param {string} [pluginRoot]
* @returns {object[]} Matching entries
*/
export function searchRegistry(pattern, pluginRoot) {
const registry = loadRegistry(pluginRoot);
const lower = pattern.toLowerCase();
const matches = [];
for (const entry of Object.values(registry.entries)) {
const searchable = [
entry.name || '',
entry.source || '',
...(entry.tags || []),
entry.fingerprint || '',
].join(' ').toLowerCase();
if (searchable.includes(lower)) {
matches.push(entry);
}
}
// Sort by last_scanned descending (most recent first)
matches.sort((a, b) => {
const aTime = new Date(b.last_scanned || 0).getTime();
const bTime = new Date(a.last_scanned || 0).getTime();
return aTime - bTime;
});
return matches;
}
/**
* Get registry statistics.
* @param {string} [pluginRoot]
* @returns {object}
*/
export function getStats(pluginRoot) {
const registry = loadRegistry(pluginRoot);
const entries = Object.values(registry.entries);
const stats = {
version: registry.version,
updated: registry.updated,
total_entries: entries.length,
by_verdict: { ALLOW: 0, WARNING: 0, BLOCK: 0 },
by_source_type: { scanned: 0, seed: 0 },
total_scans: 0,
stale_count: 0,
avg_risk_score: 0,
};
let riskSum = 0;
const now = Date.now();
for (const entry of entries) {
// By verdict
const v = entry.verdict || 'ALLOW';
stats.by_verdict[v] = (stats.by_verdict[v] || 0) + 1;
// By source type
const st = entry.source_type || 'scanned';
stats.by_source_type[st] = (stats.by_source_type[st] || 0) + 1;
// Scan count
stats.total_scans += entry.scan_count || 0;
// Risk score
riskSum += entry.risk_score || 0;
// Stale check
const lastScanned = new Date(entry.last_scanned || 0).getTime();
if ((now - lastScanned) > STALE_THRESHOLD_MS) {
stats.stale_count++;
}
}
stats.avg_risk_score = entries.length > 0
? Math.round(riskSum / entries.length)
: 0;
return stats;
}
/**
* Remove an entry from the registry by fingerprint.
* @param {string} fingerprint
* @param {string} [pluginRoot]
* @returns {boolean} true if entry was found and removed
*/
export function removeEntry(fingerprint, pluginRoot) {
const registry = loadRegistry(pluginRoot);
if (!registry.entries[fingerprint]) return false;
delete registry.entries[fingerprint];
saveRegistry(registry, pluginRoot);
return true;
}
/**
* List all entries, optionally filtered by verdict.
* @param {object} [opts]
* @param {string} [opts.verdict] - Filter by verdict (ALLOW|WARNING|BLOCK)
* @param {boolean} [opts.staleOnly] - Only return stale entries
* @param {string} [pluginRoot]
* @returns {object[]}
*/
export function listEntries(opts, pluginRoot) {
const registry = loadRegistry(pluginRoot);
let entries = Object.values(registry.entries);
const now = Date.now();
if (opts?.verdict) {
entries = entries.filter(e => e.verdict === opts.verdict);
}
if (opts?.staleOnly) {
entries = entries.filter(e => {
const lastScanned = new Date(e.last_scanned || 0).getTime();
return (now - lastScanned) > STALE_THRESHOLD_MS;
});
}
// Sort by last_scanned descending
entries.sort((a, b) =>
new Date(b.last_scanned || 0).getTime() - new Date(a.last_scanned || 0).getTime()
);
return entries;
}