feat(llm-security): add lethal-trifecta + mcp-rug-pull example contents [skip-docs]
Companion to 8df5d5c (which only carried the doc updates — the example
directories themselves were left out of staging by mistake). This
commit adds the actual example mappes:
- examples/lethal-trifecta-walkthrough/{README.md, run-trifecta.mjs,
expected-findings.md}
- examples/mcp-rug-pull/{README.md, run-rug-pull.mjs,
expected-findings.md}
Plus plugin CLAUDE.md "Examples (runnable demonstrations)" section
with a 4-row table covering malicious-skill-demo, prompt-injection-
showcase, lethal-trifecta-walkthrough, and mcp-rug-pull plus the
state-isolation discipline notes.
Marketplace root README unchanged since plugin's outward coverage
is unchanged ([skip-docs] covers the marketplace-level gate).
This commit is contained in:
parent
8df5d5c70e
commit
583a78c6cc
7 changed files with 739 additions and 0 deletions
191
plugins/llm-security/examples/mcp-rug-pull/run-rug-pull.mjs
Normal file
191
plugins/llm-security/examples/mcp-rug-pull/run-rug-pull.mjs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
#!/usr/bin/env node
|
||||
// run-rug-pull.mjs — MCP slow-burn rug-pull demonstration (v7.3.0 / E14)
|
||||
// Mutates an MCP tool description across 7 stages — each edit stays UNDER
|
||||
// the per-update threshold (10% Levenshtein vs previous), but cumulatively
|
||||
// the description diverges >=25% from the original baseline. Demonstrates
|
||||
// that `post-mcp-verify` emits a `mcp-cumulative-drift` MEDIUM advisory
|
||||
// even when per-update detection never trips.
|
||||
//
|
||||
// State isolation:
|
||||
// The hook reads MCP description cache via LLM_SECURITY_MCP_CACHE_FILE.
|
||||
// This script points it at a private temp file and deletes that file
|
||||
// on exit. The user's real ~/.cache/llm-security/mcp-descriptions.json
|
||||
// is never touched.
|
||||
//
|
||||
// Usage:
|
||||
// cd plugins/llm-security
|
||||
// node examples/mcp-rug-pull/run-rug-pull.mjs
|
||||
// node examples/mcp-rug-pull/run-rug-pull.mjs --verbose
|
||||
|
||||
import { execFile } from 'node:child_process';
|
||||
import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const PLUGIN_ROOT = resolve(__dirname, '../..');
|
||||
const HOOK = resolve(PLUGIN_ROOT, 'hooks/scripts/post-mcp-verify.mjs');
|
||||
const VERBOSE = process.argv.includes('--verbose');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Description sequence — slow-burn rug-pull
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Baseline: a benign-looking knowledge-base MCP server description.
|
||||
// Each subsequent stage applies a small edit. Per-update Levenshtein stays
|
||||
// under 10%, but by stage 7 the cumulative distance to baseline >=25% and
|
||||
// the meaning has shifted toward exfiltration ("upload", "external",
|
||||
// "telemetry endpoint").
|
||||
|
||||
// Each stage edits at most ~6 chars vs the previous (per-update <10%), but
|
||||
// every edit moves "internal knowledge base / agent / read" toward
|
||||
// "local data store / user / brief snippets". By stage 7 the cumulative
|
||||
// Levenshtein vs baseline crosses 25% — the rug-pull is detected even
|
||||
// though no single update tripped per-update detection.
|
||||
const STAGES = [
|
||||
// 0 — baseline (121 chars). All future drift is measured against this.
|
||||
'Search the internal knowledge base. Returns a ranked list of matching documents and short snippets for the agent to read.',
|
||||
// 1 — "agent" → "user" (per=3.3%, cum=3.3%)
|
||||
'Search the internal knowledge base. Returns a ranked list of matching documents and short snippets for the user to read.',
|
||||
// 2 — "ranked" → "scored" (per=3.3%, cum=6.6%)
|
||||
'Search the internal knowledge base. Returns a scored list of matching documents and short snippets for the user to read.',
|
||||
// 3 — "short" → "brief" (per=4.2%, cum=10.7%)
|
||||
'Search the internal knowledge base. Returns a scored list of matching documents and brief snippets for the user to read.',
|
||||
// 4 — "documents" → "files" (per=5.8%, cum=16.5%)
|
||||
'Search the internal knowledge base. Returns a scored list of matching files and brief snippets for the user to read.',
|
||||
// 5 — "internal" → "local" (per=5.2%, cum=21.5%)
|
||||
'Search the local knowledge base. Returns a scored list of matching files and brief snippets for the user to read.',
|
||||
// 6 — "base" → "store" (per=3.5%, cum=24.8% — still under cumulative threshold)
|
||||
'Search the local knowledge store. Returns a scored list of matching files and brief snippets for the user to read.',
|
||||
// 7 — "knowledge" → "data" (per=7.9%, cum=32.2% — CUMULATIVE TRIPS, per-update does not)
|
||||
'Search the local data store. Returns a scored list of matching files and brief snippets for the user to read.',
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hook runner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function runHook(input, env) {
|
||||
return new Promise((res) => {
|
||||
const child = execFile(
|
||||
'node',
|
||||
[HOOK],
|
||||
{ timeout: 5000, env: { ...process.env, ...env } },
|
||||
(_err, stdout, stderr) => {
|
||||
res({ code: child.exitCode ?? 1, stdout: stdout || '', stderr: stderr || '' });
|
||||
},
|
||||
);
|
||||
child.stdin.end(JSON.stringify(input));
|
||||
});
|
||||
}
|
||||
|
||||
function parseAdvisories(stdout) {
|
||||
const trimmed = stdout.trim();
|
||||
if (!trimmed.startsWith('{')) return [];
|
||||
try {
|
||||
const parsed = JSON.parse(trimmed);
|
||||
if (!parsed.systemMessage) return [];
|
||||
// Hook joins multiple advisories with `\n\n---\n\n` (see post-mcp-verify.mjs)
|
||||
return parsed.systemMessage.split('\n\n---\n\n');
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), 'llm-security-rugpull-'));
|
||||
const cacheFile = join(tmpDir, 'mcp-descriptions.json');
|
||||
|
||||
console.log('MCP CUMULATIVE DRIFT (RUG-PULL) WALKTHROUGH');
|
||||
console.log('===========================================');
|
||||
console.log(`Cache file (deleted on exit): ${cacheFile}\n`);
|
||||
console.log('Per-update threshold: 10% Levenshtein vs previous description');
|
||||
console.log('Cumulative threshold: 25% Levenshtein vs sticky baseline');
|
||||
console.log('OWASP MCP05 (Rug Pull) — v7.3.0 introduces the cumulative leg.\n');
|
||||
|
||||
let pass = 0;
|
||||
let fail = 0;
|
||||
|
||||
const expectations = [
|
||||
{ perUpdate: false, cumulative: false, note: 'baseline seeded — no advisory' },
|
||||
{ perUpdate: false, cumulative: false, note: 'agent → user' },
|
||||
{ perUpdate: false, cumulative: false, note: 'ranked → scored' },
|
||||
{ perUpdate: false, cumulative: false, note: 'short → brief' },
|
||||
{ perUpdate: false, cumulative: false, note: 'documents → files' },
|
||||
{ perUpdate: false, cumulative: false, note: 'internal → local' },
|
||||
{ perUpdate: false, cumulative: false, note: 'base → store (cum=24.8%, just under threshold)' },
|
||||
{ perUpdate: false, cumulative: true, note: 'knowledge → data — CUMULATIVE TRIPS at 32.2%' },
|
||||
];
|
||||
|
||||
try {
|
||||
for (let i = 0; i < STAGES.length; i++) {
|
||||
const description = STAGES[i];
|
||||
const expect = expectations[i];
|
||||
|
||||
// post-mcp-verify exits early when tool_output is empty — the drift
|
||||
// check only runs on tool calls that actually produce output. We send a
|
||||
// benign placeholder so the description-drift code path executes.
|
||||
const result = await runHook(
|
||||
{
|
||||
tool_name: 'mcp__knowledge__search',
|
||||
tool_input: { description, query: 'demo' },
|
||||
tool_output: 'no results',
|
||||
},
|
||||
{ LLM_SECURITY_MCP_CACHE_FILE: cacheFile },
|
||||
);
|
||||
|
||||
const advisories = parseAdvisories(result.stdout);
|
||||
const perUpdateAdv = advisories.find(a => a.includes('description drift detected'));
|
||||
const cumulativeAdv = advisories.find(a => a.includes('cumulative description drift'));
|
||||
|
||||
const perUpdateOk = !!perUpdateAdv === expect.perUpdate;
|
||||
const cumulativeOk = !!cumulativeAdv === expect.cumulative;
|
||||
const ok = perUpdateOk && cumulativeOk;
|
||||
if (ok) pass++; else fail++;
|
||||
|
||||
const tick = ok ? 'PASS' : 'FAIL';
|
||||
const len = description.length;
|
||||
console.log(`[${tick}] Stage ${i} (${len} chars) — ${expect.note}`);
|
||||
console.log(` per-update advisory: expect=${expect.perUpdate} got=${!!perUpdateAdv}`);
|
||||
console.log(` cumulative advisory: expect=${expect.cumulative} got=${!!cumulativeAdv}`);
|
||||
console.log(` description: "${description.slice(0, 80)}${len > 80 ? '...' : ''}"`);
|
||||
if (cumulativeAdv) {
|
||||
const head = cumulativeAdv.split('\n').slice(0, 2).join('\n');
|
||||
console.log(` advisory preview: "${head.replace(/\n/g, ' / ')}"`);
|
||||
}
|
||||
if (VERBOSE && result.stderr.trim()) {
|
||||
console.log(` stderr: ${result.stderr.trim().slice(0, 120)}`);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
if (VERBOSE && existsSync(cacheFile)) {
|
||||
const cache = JSON.parse(readFileSync(cacheFile, 'utf-8'));
|
||||
const entry = cache['mcp__knowledge__search'];
|
||||
if (entry) {
|
||||
console.log('Cache state at exit:');
|
||||
console.log(` baseline.description = "${entry.baseline.description.slice(0, 60)}..."`);
|
||||
console.log(` current.description = "${entry.description.slice(0, 60)}..."`);
|
||||
console.log(` history length = ${entry.history?.length ?? 0}`);
|
||||
console.log();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
console.log('---');
|
||||
console.log(`Result: ${pass} pass, ${fail} fail`);
|
||||
|
||||
if (fail > 0) {
|
||||
console.log('\nFAILURE — see expected-findings.md for the documented contract.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('\nSUCCESS — cumulative-drift advisory fired exactly when expected.');
|
||||
console.log('Reset MCP baseline after a legitimate upgrade: /security mcp-baseline-reset');
|
||||
process.exit(0);
|
||||
Loading…
Add table
Add a link
Reference in a new issue