ktg-plugin-marketplace/plugins/llm-security/tests/scanners/memory-poisoning.test.mjs

190 lines
7.4 KiB
JavaScript

// memory-poisoning.test.mjs — Integration tests for the memory-poisoning-scanner
// Tests against fixtures in tests/fixtures/memory-scan/ with:
// - clean-project: normal CLAUDE.md + memory file + rules (0 findings expected)
// - poisoned-project: injection, shell commands, credential paths, suspicious URLs,
// permission expansion, encoded payloads
import { describe, it, beforeEach } from 'node:test';
import assert from 'node:assert/strict';
import { resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { resetCounter } from '../../scanners/lib/output.mjs';
import { discoverFiles } from '../../scanners/lib/file-discovery.mjs';
import { scan } from '../../scanners/memory-poisoning-scanner.mjs';
const __dirname = fileURLToPath(new URL('.', import.meta.url));
const CLEAN_FIXTURE = resolve(__dirname, '../fixtures/memory-scan/clean-project');
const POISONED_FIXTURE = resolve(__dirname, '../fixtures/memory-scan/poisoned-project');
// ---------------------------------------------------------------------------
// Clean project — should produce 0 findings
// ---------------------------------------------------------------------------
describe('memory-poisoning-scanner: clean project', () => {
let discovery;
beforeEach(async () => {
resetCounter();
discovery = await discoverFiles(CLEAN_FIXTURE);
});
it('returns status ok', async () => {
const result = await scan(CLEAN_FIXTURE, discovery);
assert.equal(result.status, 'ok');
});
it('scans memory/config files', async () => {
const result = await scan(CLEAN_FIXTURE, discovery);
assert.ok(result.files_scanned >= 1, `Expected >= 1 files scanned, got ${result.files_scanned}`);
});
it('produces 0 findings for clean project', async () => {
const result = await scan(CLEAN_FIXTURE, discovery);
assert.equal(
result.findings.length, 0,
`Expected 0 findings, got ${result.findings.length}: ${result.findings.map(f => f.title).join('; ')}`
);
});
});
// ---------------------------------------------------------------------------
// Poisoned project — should produce multiple findings
// ---------------------------------------------------------------------------
describe('memory-poisoning-scanner: poisoned project', () => {
let discovery;
beforeEach(async () => {
resetCounter();
discovery = await discoverFiles(POISONED_FIXTURE);
});
it('returns status ok', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
assert.equal(result.status, 'ok');
});
it('scans memory/config files', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
assert.ok(result.files_scanned >= 3, `Expected >= 3 files scanned (CLAUDE.md + memory + rules), got ${result.files_scanned}`);
});
it('detects at least 5 findings in poisoned project', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
assert.ok(
result.findings.length >= 5,
`Expected >= 5 findings, got ${result.findings.length}: ${result.findings.map(f => `${f.title} [${f.severity}]`).join('; ')}`
);
});
it('all findings have DS-MEM- prefix', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const wrongPrefix = result.findings.filter(f => !f.id.startsWith('DS-MEM-'));
assert.equal(
wrongPrefix.length, 0,
`All findings should have DS-MEM- prefix. Wrong: ${wrongPrefix.map(f => f.id).join(', ')}`
);
});
it('finding IDs are sequential starting from DS-MEM-001', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
if (result.findings.length === 0) return;
assert.equal(result.findings[0].id, 'DS-MEM-001');
});
it('maps to correct OWASP categories (LLM01 or ASI02)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
for (const f of result.findings) {
assert.ok(
f.owasp === 'LLM01' || f.owasp === 'ASI02',
`Finding ${f.id} owasp should be LLM01 or ASI02, got: ${f.owasp}`
);
}
});
it('all findings have required fields', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
for (const f of result.findings) {
assert.ok(f.id, `Finding missing id`);
assert.ok(f.scanner === 'MEM', `Finding ${f.id} scanner should be MEM, got ${f.scanner}`);
assert.ok(f.severity, `Finding ${f.id} missing severity`);
assert.ok(f.title, `Finding ${f.id} missing title`);
assert.ok(f.description, `Finding ${f.id} missing description`);
assert.ok(f.file, `Finding ${f.id} missing file`);
assert.ok(f.recommendation, `Finding ${f.id} missing recommendation`);
}
});
it('detects injection patterns (CRITICAL or HIGH)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const injections = result.findings.filter(f => f.title.includes('Injection pattern'));
assert.ok(
injections.length >= 1,
`Expected >= 1 injection finding, got ${injections.length}`
);
});
it('detects permission expansion (CRITICAL)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const perms = result.findings.filter(f => f.title.includes('Permission expansion'));
assert.ok(
perms.length >= 1,
`Expected >= 1 permission expansion finding, got ${perms.length}`
);
assert.ok(
perms.every(f => f.severity === 'critical'),
'Permission expansion findings should be CRITICAL'
);
});
it('detects suspicious URLs (HIGH)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const urls = result.findings.filter(f => f.title.includes('Suspicious exfiltration URL'));
assert.ok(
urls.length >= 1,
`Expected >= 1 suspicious URL finding, got ${urls.length}`
);
});
it('detects credential path references (HIGH)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const creds = result.findings.filter(f => f.title.includes('Credential path'));
assert.ok(
creds.length >= 1,
`Expected >= 1 credential path finding, got ${creds.length}`
);
});
it('detects shell commands in memory files (HIGH)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const shells = result.findings.filter(f => f.title.includes('Shell command in memory'));
assert.ok(
shells.length >= 1,
`Expected >= 1 shell command finding in memory file, got ${shells.length}`
);
});
it('detects encoded payloads (MEDIUM)', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const encoded = result.findings.filter(f => f.title.includes('encoded'));
assert.ok(
encoded.length >= 1,
`Expected >= 1 encoded payload finding, got ${encoded.length}`
);
});
it('severity counts are correct', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
const { counts } = result;
const total = counts.critical + counts.high + counts.medium + counts.low + counts.info;
assert.equal(total, result.findings.length, 'Severity counts should sum to total findings');
assert.ok(counts.critical >= 1, 'Expected at least 1 CRITICAL finding');
assert.ok(counts.high >= 1, 'Expected at least 1 HIGH finding');
});
it('duration_ms is a non-negative number', async () => {
const result = await scan(POISONED_FIXTURE, discovery);
assert.ok(typeof result.duration_ms === 'number', 'duration_ms should be a number');
assert.ok(result.duration_ms >= 0, 'duration_ms should be >= 0');
});
});