// mcp-description-cache.mjs — Cache MCP tool descriptions and detect drift. // Zero external dependencies. // // Purpose: // MCP servers can change tool descriptions between sessions (rug-pull, MCP05). // This module caches the first-seen description for each tool and alerts when // a subsequent invocation delivers a description that has drifted significantly // (Levenshtein distance > 10% of original length). // // Storage: ~/.cache/llm-security/mcp-descriptions.json // TTL: 7 days per entry (stale entries purged on load). // // OWASP: MCP05 (Tool Description Manipulation / Rug Pull) import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs'; import { join, dirname } from 'node:path'; import { homedir } from 'node:os'; import { levenshtein } from './string-utils.mjs'; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const CACHE_DIR = join(homedir(), '.cache', 'llm-security'); const CACHE_FILE = join(CACHE_DIR, 'mcp-descriptions.json'); const TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days const DRIFT_THRESHOLD = 0.10; // 10% Levenshtein distance relative to original length // --------------------------------------------------------------------------- // Cache structure // --------------------------------------------------------------------------- // { // "mcp__server__tool": { // "description": "original description text", // "firstSeen": 1712345678000, // "lastSeen": 1712345678000, // "hash": "sha256-prefix (optional, for quick equality check)" // } // } /** * Load the cache from disk. Purges stale entries (older than TTL). * Returns empty object if file doesn't exist or is corrupt. * @param {object} [opts] - Options for testing * @param {string} [opts.cacheFile] - Override cache file path * @param {number} [opts.now] - Override current time * @returns {Record} */ export function loadCache(opts = {}) { const cacheFile = opts.cacheFile ?? CACHE_FILE; const now = opts.now ?? Date.now(); if (!existsSync(cacheFile)) return {}; try { const raw = readFileSync(cacheFile, 'utf-8'); const data = JSON.parse(raw); if (!data || typeof data !== 'object') return {}; // Purge stale entries const cleaned = {}; for (const [key, entry] of Object.entries(data)) { if (entry && typeof entry === 'object' && typeof entry.lastSeen === 'number') { if (now - entry.lastSeen <= TTL_MS) { cleaned[key] = entry; } } } return cleaned; } catch { return {}; } } /** * Save the cache to disk. Creates the cache directory if needed. * @param {Record} cache * @param {object} [opts] * @param {string} [opts.cacheFile] - Override cache file path */ export function saveCache(cache, opts = {}) { const cacheFile = opts.cacheFile ?? CACHE_FILE; const dir = dirname(cacheFile); try { if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } writeFileSync(cacheFile, JSON.stringify(cache, null, 2), 'utf-8'); } catch { // Silently fail — drift detection is advisory, not critical } } /** * Check a tool description against the cached version. * * First call for a tool: caches the description, returns no drift. * Subsequent calls: compares via Levenshtein distance. * * @param {string} toolName - Full tool name (e.g. "mcp__tavily__tavily_search") * @param {string} description - Current tool description * @param {object} [opts] - Options for testing * @param {string} [opts.cacheFile] - Override cache file path * @param {number} [opts.now] - Override current time * @returns {{ drift: boolean, detail: string|null, distance: number, threshold: number, cached: string|null }} */ export function checkDescriptionDrift(toolName, description, opts = {}) { const now = opts.now ?? Date.now(); const noDrift = { drift: false, detail: null, distance: 0, threshold: 0, cached: null }; if (!toolName || !description || typeof description !== 'string') { return noDrift; } const cache = loadCache(opts); const existing = cache[toolName]; if (!existing) { // First time seeing this tool — cache it cache[toolName] = { description, firstSeen: now, lastSeen: now, }; saveCache(cache, opts); return noDrift; } // Update lastSeen existing.lastSeen = now; // Quick equality check if (existing.description === description) { saveCache(cache, opts); return noDrift; } // Compute Levenshtein distance const dist = levenshtein(existing.description, description); const baseLen = Math.max(existing.description.length, 1); const ratio = dist / baseLen; const threshold = DRIFT_THRESHOLD; if (ratio > threshold) { // Drift detected — update cache to new description (the description has changed) const cachedDesc = existing.description; existing.description = description; saveCache(cache, opts); const pct = Math.round(ratio * 100); return { drift: true, detail: `Tool "${toolName}" description changed by ${pct}% (${dist} edits / ${baseLen} chars). ` + `Threshold: ${Math.round(threshold * 100)}%. This may indicate a rug-pull attack (OWASP MCP05).`, distance: dist, threshold, cached: cachedDesc, }; } // Minor change below threshold — update cache silently existing.description = description; saveCache(cache, opts); return { drift: false, detail: null, distance: dist, threshold, cached: null }; } /** * Extract MCP server name from a tool name. * Convention: mcp____ * @param {string} toolName * @returns {string|null} */ export function extractMcpServer(toolName) { if (!toolName?.startsWith('mcp__')) return null; const parts = toolName.split('__'); // mcp__server__tool → parts = ['mcp', 'server', 'tool'] return parts.length >= 3 ? parts[1] : null; } /** * Clear the entire cache (for testing). * @param {object} [opts] * @param {string} [opts.cacheFile] - Override cache file path */ export function clearCache(opts = {}) { saveCache({}, opts); } // --------------------------------------------------------------------------- // Exported constants (for testing) // --------------------------------------------------------------------------- export { TTL_MS, DRIFT_THRESHOLD, CACHE_DIR, CACHE_FILE };