193 lines
6.4 KiB
JavaScript
193 lines
6.4 KiB
JavaScript
// mcp-description-cache.mjs — Cache MCP tool descriptions and detect drift.
|
|
// Zero external dependencies.
|
|
//
|
|
// Purpose:
|
|
// MCP servers can change tool descriptions between sessions (rug-pull, MCP05).
|
|
// This module caches the first-seen description for each tool and alerts when
|
|
// a subsequent invocation delivers a description that has drifted significantly
|
|
// (Levenshtein distance > 10% of original length).
|
|
//
|
|
// Storage: ~/.cache/llm-security/mcp-descriptions.json
|
|
// TTL: 7 days per entry (stale entries purged on load).
|
|
//
|
|
// OWASP: MCP05 (Tool Description Manipulation / Rug Pull)
|
|
|
|
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { homedir } from 'node:os';
|
|
import { levenshtein } from './string-utils.mjs';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const CACHE_DIR = join(homedir(), '.cache', 'llm-security');
|
|
const CACHE_FILE = join(CACHE_DIR, 'mcp-descriptions.json');
|
|
const TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
const DRIFT_THRESHOLD = 0.10; // 10% Levenshtein distance relative to original length
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Cache structure
|
|
// ---------------------------------------------------------------------------
|
|
// {
|
|
// "mcp__server__tool": {
|
|
// "description": "original description text",
|
|
// "firstSeen": 1712345678000,
|
|
// "lastSeen": 1712345678000,
|
|
// "hash": "sha256-prefix (optional, for quick equality check)"
|
|
// }
|
|
// }
|
|
|
|
/**
|
|
* Load the cache from disk. Purges stale entries (older than TTL).
|
|
* Returns empty object if file doesn't exist or is corrupt.
|
|
* @param {object} [opts] - Options for testing
|
|
* @param {string} [opts.cacheFile] - Override cache file path
|
|
* @param {number} [opts.now] - Override current time
|
|
* @returns {Record<string, { description: string, firstSeen: number, lastSeen: number }>}
|
|
*/
|
|
export function loadCache(opts = {}) {
|
|
const cacheFile = opts.cacheFile ?? CACHE_FILE;
|
|
const now = opts.now ?? Date.now();
|
|
|
|
if (!existsSync(cacheFile)) return {};
|
|
|
|
try {
|
|
const raw = readFileSync(cacheFile, 'utf-8');
|
|
const data = JSON.parse(raw);
|
|
if (!data || typeof data !== 'object') return {};
|
|
|
|
// Purge stale entries
|
|
const cleaned = {};
|
|
for (const [key, entry] of Object.entries(data)) {
|
|
if (entry && typeof entry === 'object' && typeof entry.lastSeen === 'number') {
|
|
if (now - entry.lastSeen <= TTL_MS) {
|
|
cleaned[key] = entry;
|
|
}
|
|
}
|
|
}
|
|
return cleaned;
|
|
} catch {
|
|
return {};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save the cache to disk. Creates the cache directory if needed.
|
|
* @param {Record<string, object>} cache
|
|
* @param {object} [opts]
|
|
* @param {string} [opts.cacheFile] - Override cache file path
|
|
*/
|
|
export function saveCache(cache, opts = {}) {
|
|
const cacheFile = opts.cacheFile ?? CACHE_FILE;
|
|
const dir = dirname(cacheFile);
|
|
|
|
try {
|
|
if (!existsSync(dir)) {
|
|
mkdirSync(dir, { recursive: true });
|
|
}
|
|
writeFileSync(cacheFile, JSON.stringify(cache, null, 2), 'utf-8');
|
|
} catch {
|
|
// Silently fail — drift detection is advisory, not critical
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check a tool description against the cached version.
|
|
*
|
|
* First call for a tool: caches the description, returns no drift.
|
|
* Subsequent calls: compares via Levenshtein distance.
|
|
*
|
|
* @param {string} toolName - Full tool name (e.g. "mcp__tavily__tavily_search")
|
|
* @param {string} description - Current tool description
|
|
* @param {object} [opts] - Options for testing
|
|
* @param {string} [opts.cacheFile] - Override cache file path
|
|
* @param {number} [opts.now] - Override current time
|
|
* @returns {{ drift: boolean, detail: string|null, distance: number, threshold: number, cached: string|null }}
|
|
*/
|
|
export function checkDescriptionDrift(toolName, description, opts = {}) {
|
|
const now = opts.now ?? Date.now();
|
|
const noDrift = { drift: false, detail: null, distance: 0, threshold: 0, cached: null };
|
|
|
|
if (!toolName || !description || typeof description !== 'string') {
|
|
return noDrift;
|
|
}
|
|
|
|
const cache = loadCache(opts);
|
|
const existing = cache[toolName];
|
|
|
|
if (!existing) {
|
|
// First time seeing this tool — cache it
|
|
cache[toolName] = {
|
|
description,
|
|
firstSeen: now,
|
|
lastSeen: now,
|
|
};
|
|
saveCache(cache, opts);
|
|
return noDrift;
|
|
}
|
|
|
|
// Update lastSeen
|
|
existing.lastSeen = now;
|
|
|
|
// Quick equality check
|
|
if (existing.description === description) {
|
|
saveCache(cache, opts);
|
|
return noDrift;
|
|
}
|
|
|
|
// Compute Levenshtein distance
|
|
const dist = levenshtein(existing.description, description);
|
|
const baseLen = Math.max(existing.description.length, 1);
|
|
const ratio = dist / baseLen;
|
|
const threshold = DRIFT_THRESHOLD;
|
|
|
|
if (ratio > threshold) {
|
|
// Drift detected — update cache to new description (the description has changed)
|
|
const cachedDesc = existing.description;
|
|
existing.description = description;
|
|
saveCache(cache, opts);
|
|
|
|
const pct = Math.round(ratio * 100);
|
|
return {
|
|
drift: true,
|
|
detail: `Tool "${toolName}" description changed by ${pct}% (${dist} edits / ${baseLen} chars). ` +
|
|
`Threshold: ${Math.round(threshold * 100)}%. This may indicate a rug-pull attack (OWASP MCP05).`,
|
|
distance: dist,
|
|
threshold,
|
|
cached: cachedDesc,
|
|
};
|
|
}
|
|
|
|
// Minor change below threshold — update cache silently
|
|
existing.description = description;
|
|
saveCache(cache, opts);
|
|
return { drift: false, detail: null, distance: dist, threshold, cached: null };
|
|
}
|
|
|
|
/**
|
|
* Extract MCP server name from a tool name.
|
|
* Convention: mcp__<server>__<tool>
|
|
* @param {string} toolName
|
|
* @returns {string|null}
|
|
*/
|
|
export function extractMcpServer(toolName) {
|
|
if (!toolName?.startsWith('mcp__')) return null;
|
|
const parts = toolName.split('__');
|
|
// mcp__server__tool → parts = ['mcp', 'server', 'tool']
|
|
return parts.length >= 3 ? parts[1] : null;
|
|
}
|
|
|
|
/**
|
|
* Clear the entire cache (for testing).
|
|
* @param {object} [opts]
|
|
* @param {string} [opts.cacheFile] - Override cache file path
|
|
*/
|
|
export function clearCache(opts = {}) {
|
|
saveCache({}, opts);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Exported constants (for testing)
|
|
// ---------------------------------------------------------------------------
|
|
export { TTL_MS, DRIFT_THRESHOLD, CACHE_DIR, CACHE_FILE };
|