462 lines
14 KiB
JavaScript
462 lines
14 KiB
JavaScript
// skill-registry.mjs — Local database of known skill fingerprints and risk profiles.
|
|
// Fingerprints skills by SHA-256 of normalized content, stores scan results,
|
|
// enables instant re-scan detection and pattern search.
|
|
// Zero external dependencies.
|
|
|
|
import { createHash } from 'node:crypto';
|
|
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, statSync } from 'node:fs';
|
|
import { join, resolve, relative, dirname, basename, extname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const REGISTRY_VERSION = '1';
|
|
const MAX_FILE_SIZE = 256 * 1024; // 256KB — skills are markdown, not binaries
|
|
const SCANNABLE_EXTENSIONS = new Set(['.md', '.mdx', '.json', '.mjs', '.js', '.ts', '.sh']);
|
|
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'coverage']);
|
|
|
|
// Stale threshold — 7 days. If a cached scan is older than this,
|
|
// we suggest re-scanning but still return the cached result.
|
|
const STALE_THRESHOLD_MS = 7 * 24 * 60 * 60 * 1000;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Plugin root resolution
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
const PLUGIN_ROOT = resolve(__dirname, '..', '..');
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Content normalization — same skill should produce same fingerprint
|
|
// regardless of trailing whitespace, line endings, or blank line count.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Normalize content for fingerprinting.
|
|
* - Normalize line endings to \n
|
|
* - Trim trailing whitespace from each line
|
|
* - Collapse multiple consecutive blank lines into one
|
|
* - Trim leading/trailing blank lines
|
|
* @param {string} content
|
|
* @returns {string}
|
|
*/
|
|
export function normalizeContent(content) {
|
|
return content
|
|
.replace(/\r\n/g, '\n')
|
|
.replace(/\r/g, '\n')
|
|
.split('\n')
|
|
.map(line => line.trimEnd())
|
|
.join('\n')
|
|
.replace(/\n{3,}/g, '\n\n')
|
|
.trim();
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// File collection — gather all scannable files from a skill path
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Recursively collect files from a directory.
|
|
* @param {string} dirPath - Absolute path to directory
|
|
* @param {string} basePath - Base path for relative path calculation
|
|
* @returns {{ relPath: string, content: string }[]}
|
|
*/
|
|
function collectFiles(dirPath, basePath) {
|
|
const files = [];
|
|
|
|
let entries;
|
|
try {
|
|
entries = readdirSync(dirPath, { withFileTypes: true });
|
|
} catch {
|
|
return files;
|
|
}
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = join(dirPath, entry.name);
|
|
|
|
if (entry.isDirectory()) {
|
|
if (SKIP_DIRS.has(entry.name)) continue;
|
|
files.push(...collectFiles(fullPath, basePath));
|
|
continue;
|
|
}
|
|
|
|
if (!entry.isFile()) continue;
|
|
|
|
const ext = extname(entry.name).toLowerCase();
|
|
if (!SCANNABLE_EXTENSIONS.has(ext)) continue;
|
|
|
|
try {
|
|
const stat = statSync(fullPath);
|
|
if (stat.size > MAX_FILE_SIZE) continue;
|
|
const content = readFileSync(fullPath, 'utf8');
|
|
files.push({ relPath: relative(basePath, fullPath), content });
|
|
} catch {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return files;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Fingerprinting
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Generate a SHA-256 fingerprint for a skill.
|
|
*
|
|
* For a directory: collects all scannable files, sorts by relative path,
|
|
* normalizes each, and hashes the concatenation.
|
|
*
|
|
* For a single file: normalizes and hashes it directly.
|
|
*
|
|
* @param {string} skillPath - Absolute or relative path to skill file or directory
|
|
* @returns {{ fingerprint: string, files: string[], name: string }}
|
|
*/
|
|
export function fingerprintSkill(skillPath) {
|
|
const absPath = resolve(skillPath);
|
|
const hash = createHash('sha256');
|
|
let fileList = [];
|
|
let name = basename(absPath);
|
|
|
|
if (statSync(absPath).isDirectory()) {
|
|
const collected = collectFiles(absPath, absPath);
|
|
// Sort for determinism
|
|
collected.sort((a, b) => a.relPath.localeCompare(b.relPath));
|
|
|
|
for (const { relPath, content } of collected) {
|
|
fileList.push(relPath);
|
|
// Hash includes the relative path so renames change the fingerprint
|
|
hash.update(relPath + '\x00');
|
|
hash.update(normalizeContent(content) + '\x00');
|
|
}
|
|
|
|
// Try to extract skill name from SKILL.md or plugin.json
|
|
const skillMd = collected.find(f =>
|
|
f.relPath.toLowerCase().endsWith('skill.md') ||
|
|
f.relPath.toLowerCase().includes('/skill.md')
|
|
);
|
|
if (skillMd) {
|
|
const nameMatch = skillMd.content.match(/^#\s+(.+)/m);
|
|
if (nameMatch) name = nameMatch[1].trim();
|
|
}
|
|
|
|
const pluginJson = collected.find(f => f.relPath === 'plugin.json' || f.relPath.endsWith('/plugin.json'));
|
|
if (pluginJson) {
|
|
try {
|
|
const parsed = JSON.parse(pluginJson.content);
|
|
if (parsed.name) name = parsed.name;
|
|
} catch { /* ignore parse errors */ }
|
|
}
|
|
} else {
|
|
// Single file
|
|
const content = readFileSync(absPath, 'utf8');
|
|
fileList.push(basename(absPath));
|
|
hash.update(normalizeContent(content));
|
|
|
|
// Try to extract name from frontmatter
|
|
const nameMatch = content.match(/^name:\s*(.+)/m);
|
|
if (nameMatch) name = nameMatch[1].trim().replace(/^["']|["']$/g, '');
|
|
}
|
|
|
|
return {
|
|
fingerprint: hash.digest('hex'),
|
|
files: fileList,
|
|
name,
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Registry I/O
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Default registry file path.
|
|
* @param {string} [pluginRoot]
|
|
* @returns {string}
|
|
*/
|
|
export function registryPath(pluginRoot) {
|
|
return join(pluginRoot || PLUGIN_ROOT, 'reports', 'skill-registry.json');
|
|
}
|
|
|
|
/**
|
|
* Seed registry file path (ships with plugin).
|
|
* @param {string} [pluginRoot]
|
|
* @returns {string}
|
|
*/
|
|
export function seedRegistryPath(pluginRoot) {
|
|
return join(pluginRoot || PLUGIN_ROOT, 'knowledge', 'skill-registry.json');
|
|
}
|
|
|
|
/**
|
|
* Create an empty registry structure.
|
|
* @returns {object}
|
|
*/
|
|
function emptyRegistry() {
|
|
return {
|
|
version: REGISTRY_VERSION,
|
|
updated: new Date().toISOString(),
|
|
entry_count: 0,
|
|
entries: {},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Load registry from disk. Merges seed data if available.
|
|
* Creates empty registry if file doesn't exist.
|
|
* @param {string} [pluginRoot]
|
|
* @returns {object}
|
|
*/
|
|
export function loadRegistry(pluginRoot) {
|
|
const filePath = registryPath(pluginRoot);
|
|
let registry;
|
|
|
|
if (existsSync(filePath)) {
|
|
try {
|
|
registry = JSON.parse(readFileSync(filePath, 'utf8'));
|
|
} catch {
|
|
registry = emptyRegistry();
|
|
}
|
|
} else {
|
|
registry = emptyRegistry();
|
|
}
|
|
|
|
// Merge seed data (seed entries never overwrite existing entries)
|
|
const seedPath = seedRegistryPath(pluginRoot);
|
|
if (existsSync(seedPath)) {
|
|
try {
|
|
const seeds = JSON.parse(readFileSync(seedPath, 'utf8'));
|
|
for (const [fp, entry] of Object.entries(seeds.entries || {})) {
|
|
if (!registry.entries[fp]) {
|
|
registry.entries[fp] = { ...entry, source_type: 'seed' };
|
|
}
|
|
}
|
|
} catch { /* ignore seed parse errors */ }
|
|
}
|
|
|
|
// Ensure entry_count is accurate
|
|
registry.entry_count = Object.keys(registry.entries).length;
|
|
|
|
return registry;
|
|
}
|
|
|
|
/**
|
|
* Save registry to disk.
|
|
* @param {object} registry
|
|
* @param {string} [pluginRoot]
|
|
* @returns {string} Path to saved file
|
|
*/
|
|
export function saveRegistry(registry, pluginRoot) {
|
|
const filePath = registryPath(pluginRoot);
|
|
const dir = dirname(filePath);
|
|
if (!existsSync(dir)) {
|
|
mkdirSync(dir, { recursive: true });
|
|
}
|
|
|
|
registry.updated = new Date().toISOString();
|
|
registry.entry_count = Object.keys(registry.entries).length;
|
|
|
|
writeFileSync(filePath, JSON.stringify(registry, null, 2) + '\n');
|
|
return filePath;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Core operations
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Check if a fingerprint exists in the registry.
|
|
* @param {string} fingerprint
|
|
* @param {string} [pluginRoot]
|
|
* @returns {{ found: boolean, entry: object|null, stale: boolean }}
|
|
*/
|
|
export function checkRegistry(fingerprint, pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
const entry = registry.entries[fingerprint] || null;
|
|
|
|
if (!entry) {
|
|
return { found: false, entry: null, stale: false };
|
|
}
|
|
|
|
const lastScanned = new Date(entry.last_scanned).getTime();
|
|
const stale = (Date.now() - lastScanned) > STALE_THRESHOLD_MS;
|
|
|
|
return { found: true, entry, stale };
|
|
}
|
|
|
|
/**
|
|
* Register a scan result for a skill.
|
|
* @param {object} opts
|
|
* @param {string} opts.skillPath - Path that was scanned
|
|
* @param {string} opts.fingerprint - From fingerprintSkill()
|
|
* @param {string} opts.name - Skill name
|
|
* @param {string[]} opts.files - Files included in fingerprint
|
|
* @param {string} opts.verdict - ALLOW|WARNING|BLOCK
|
|
* @param {number} opts.risk_score - 0-100
|
|
* @param {object} opts.counts - { critical, high, medium, low, info }
|
|
* @param {number} opts.files_scanned - Number of files scanned
|
|
* @param {string[]} [opts.tags] - Optional tags
|
|
* @param {string} [pluginRoot]
|
|
* @returns {{ entry: object, path: string }}
|
|
*/
|
|
export function registerScan(opts, pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
const existing = registry.entries[opts.fingerprint];
|
|
|
|
const entry = {
|
|
name: opts.name,
|
|
source: opts.skillPath,
|
|
fingerprint: opts.fingerprint,
|
|
first_seen: existing?.first_seen || new Date().toISOString(),
|
|
last_scanned: new Date().toISOString(),
|
|
scan_count: (existing?.scan_count || 0) + 1,
|
|
verdict: opts.verdict,
|
|
risk_score: opts.risk_score,
|
|
counts: opts.counts,
|
|
files_scanned: opts.files_scanned,
|
|
files_in_fingerprint: opts.files,
|
|
tags: opts.tags || existing?.tags || [],
|
|
source_type: 'scanned',
|
|
};
|
|
|
|
registry.entries[opts.fingerprint] = entry;
|
|
const savedPath = saveRegistry(registry, pluginRoot);
|
|
|
|
return { entry, path: savedPath };
|
|
}
|
|
|
|
/**
|
|
* Search the registry by name, source, or tag pattern.
|
|
* @param {string} pattern - Search pattern (case-insensitive substring match)
|
|
* @param {string} [pluginRoot]
|
|
* @returns {object[]} Matching entries
|
|
*/
|
|
export function searchRegistry(pattern, pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
const lower = pattern.toLowerCase();
|
|
const matches = [];
|
|
|
|
for (const entry of Object.values(registry.entries)) {
|
|
const searchable = [
|
|
entry.name || '',
|
|
entry.source || '',
|
|
...(entry.tags || []),
|
|
entry.fingerprint || '',
|
|
].join(' ').toLowerCase();
|
|
|
|
if (searchable.includes(lower)) {
|
|
matches.push(entry);
|
|
}
|
|
}
|
|
|
|
// Sort by last_scanned descending (most recent first)
|
|
matches.sort((a, b) => {
|
|
const aTime = new Date(b.last_scanned || 0).getTime();
|
|
const bTime = new Date(a.last_scanned || 0).getTime();
|
|
return aTime - bTime;
|
|
});
|
|
|
|
return matches;
|
|
}
|
|
|
|
/**
|
|
* Get registry statistics.
|
|
* @param {string} [pluginRoot]
|
|
* @returns {object}
|
|
*/
|
|
export function getStats(pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
const entries = Object.values(registry.entries);
|
|
|
|
const stats = {
|
|
version: registry.version,
|
|
updated: registry.updated,
|
|
total_entries: entries.length,
|
|
by_verdict: { ALLOW: 0, WARNING: 0, BLOCK: 0 },
|
|
by_source_type: { scanned: 0, seed: 0 },
|
|
total_scans: 0,
|
|
stale_count: 0,
|
|
avg_risk_score: 0,
|
|
};
|
|
|
|
let riskSum = 0;
|
|
const now = Date.now();
|
|
|
|
for (const entry of entries) {
|
|
// By verdict
|
|
const v = entry.verdict || 'ALLOW';
|
|
stats.by_verdict[v] = (stats.by_verdict[v] || 0) + 1;
|
|
|
|
// By source type
|
|
const st = entry.source_type || 'scanned';
|
|
stats.by_source_type[st] = (stats.by_source_type[st] || 0) + 1;
|
|
|
|
// Scan count
|
|
stats.total_scans += entry.scan_count || 0;
|
|
|
|
// Risk score
|
|
riskSum += entry.risk_score || 0;
|
|
|
|
// Stale check
|
|
const lastScanned = new Date(entry.last_scanned || 0).getTime();
|
|
if ((now - lastScanned) > STALE_THRESHOLD_MS) {
|
|
stats.stale_count++;
|
|
}
|
|
}
|
|
|
|
stats.avg_risk_score = entries.length > 0
|
|
? Math.round(riskSum / entries.length)
|
|
: 0;
|
|
|
|
return stats;
|
|
}
|
|
|
|
/**
|
|
* Remove an entry from the registry by fingerprint.
|
|
* @param {string} fingerprint
|
|
* @param {string} [pluginRoot]
|
|
* @returns {boolean} true if entry was found and removed
|
|
*/
|
|
export function removeEntry(fingerprint, pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
if (!registry.entries[fingerprint]) return false;
|
|
|
|
delete registry.entries[fingerprint];
|
|
saveRegistry(registry, pluginRoot);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* List all entries, optionally filtered by verdict.
|
|
* @param {object} [opts]
|
|
* @param {string} [opts.verdict] - Filter by verdict (ALLOW|WARNING|BLOCK)
|
|
* @param {boolean} [opts.staleOnly] - Only return stale entries
|
|
* @param {string} [pluginRoot]
|
|
* @returns {object[]}
|
|
*/
|
|
export function listEntries(opts, pluginRoot) {
|
|
const registry = loadRegistry(pluginRoot);
|
|
let entries = Object.values(registry.entries);
|
|
const now = Date.now();
|
|
|
|
if (opts?.verdict) {
|
|
entries = entries.filter(e => e.verdict === opts.verdict);
|
|
}
|
|
|
|
if (opts?.staleOnly) {
|
|
entries = entries.filter(e => {
|
|
const lastScanned = new Date(e.last_scanned || 0).getTime();
|
|
return (now - lastScanned) > STALE_THRESHOLD_MS;
|
|
});
|
|
}
|
|
|
|
// Sort by last_scanned descending
|
|
entries.sort((a, b) =>
|
|
new Date(b.last_scanned || 0).getTime() - new Date(a.last_scanned || 0).getTime()
|
|
);
|
|
|
|
return entries;
|
|
}
|