From 5afb9b1f3353eae9b7fd945bfb3a80b8c84f3062 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sat, 18 Apr 2026 10:15:12 +0200 Subject: [PATCH] feat(llm-security): implement parseIntelliJPlugin with nested-jar extraction --- .../scanners/lib/ide-extension-parser.mjs | 220 +++++++++++++++++- .../llm-security/tests/helpers/zip-writer.mjs | 127 ++++++++++ .../tests/scanners/jetbrains-parser.test.mjs | 139 ++++++++++- 3 files changed, 479 insertions(+), 7 deletions(-) create mode 100644 plugins/llm-security/tests/helpers/zip-writer.mjs diff --git a/plugins/llm-security/scanners/lib/ide-extension-parser.mjs b/plugins/llm-security/scanners/lib/ide-extension-parser.mjs index 928c0b6..a6c0873 100644 --- a/plugins/llm-security/scanners/lib/ide-extension-parser.mjs +++ b/plugins/llm-security/scanners/lib/ide-extension-parser.mjs @@ -5,8 +5,11 @@ // - type: 'vscode' → parseVSCodeExtension (package.json + contributes) // - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs) -import { readFile, access } from 'node:fs/promises'; -import { join } from 'node:path'; +import { readFile, readdir, stat, mkdtemp, rm, access } from 'node:fs/promises'; +import { join, basename } from 'node:path'; +import { tmpdir } from 'node:os'; +import { createHash } from 'node:crypto'; +import { extractToDir } from './zip-extract.mjs'; async function pathExists(p) { try { await access(p); return true; } catch { return false; } @@ -400,12 +403,217 @@ export function parseManifestMf(mfString) { return out; } +const NATIVE_BIN_RE = /\.(dll|so|dylib|jnilib|exe)$/i; +const SIGNATURE_FILE_RE = /\.(SF|RSA|DSA|EC)$/; + +async function walkFiles(rootDir) { + const out = []; + async function recurse(dir) { + let entries; + try { entries = await readdir(dir, { withFileTypes: true }); } catch { return; } + for (const entry of entries) { + const full = join(dir, entry.name); + if (entry.isDirectory()) await recurse(full); + else if (entry.isFile()) out.push(full); + } + } + await recurse(rootDir); + return out; +} + /** - * Parse an IntelliJ plugin directory. Implemented in Step 6 (v6.6.0). - * Stub preserved until Step 6 lands. + * Parse an IntelliJ plugin directory layout: + * /lib/*.jar — main jar contains META-INF/plugin.xml + * * @param {string} pluginRoot - * @returns {Promise} + * @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>} */ export async function parseIntelliJPlugin(pluginRoot) { - return null; + if (typeof pluginRoot !== 'string' || !pluginRoot) return null; + const warnings = []; + + const libDir = join(pluginRoot, 'lib'); + try { + const s = await stat(libDir); + if (!s.isDirectory()) { + warnings.push('IDE-JB-NO-LIB-DIR: lib is not a directory'); + return { manifest: null, warnings }; + } + } catch { + warnings.push('IDE-JB-NO-LIB-DIR: lib directory missing'); + return { manifest: null, warnings }; + } + + let jarNames; + try { + jarNames = (await readdir(libDir)).filter(n => n.toLowerCase().endsWith('.jar')); + } catch { + warnings.push('IDE-JB-NO-LIB-DIR: cannot read lib'); + return { manifest: null, warnings }; + } + if (jarNames.length === 0) { + warnings.push('IDE-JB-NO-PLUGIN-XML: no jars in lib/'); + return { manifest: null, warnings }; + } + + const extractionRoot = await mkdtemp(join(tmpdir(), 'llmsec-jb-')); + const extractedJars = []; + + try { + for (const jarName of jarNames) { + const jarPath = join(libDir, jarName); + try { + const jarBuffer = await readFile(jarPath); + const jarDir = await mkdtemp(join(extractionRoot, 'jar-')); + await extractToDir(jarBuffer, jarDir); + extractedJars.push({ jarPath, jarName, jarDir }); + } catch (err) { + warnings.push(`IDE-JB-JAR-EXTRACT: ${jarName}: ${err.message}`); + } + } + + if (extractedJars.length === 0) { + warnings.push('IDE-JB-NO-PLUGIN-XML: no jars could be extracted'); + return { manifest: null, warnings }; + } + + // Locate main jar: first one containing META-INF/plugin.xml + let mainJar = null; + const mainJarCandidates = []; + for (const ej of extractedJars) { + const xmlPath = join(ej.jarDir, 'META-INF', 'plugin.xml'); + if (await pathExists(xmlPath)) { + mainJarCandidates.push(ej); + if (!mainJar) mainJar = ej; + } + } + + if (!mainJar) { + warnings.push('IDE-JB-NO-PLUGIN-XML: no jar contains META-INF/plugin.xml'); + return { manifest: null, warnings }; + } + if (mainJarCandidates.length > 1) { + warnings.push(`IDE-JB-MULTIPLE-PLUGIN-XML: ${mainJarCandidates.length} jars contain plugin.xml; first wins`); + } + + // Parse plugin.xml + let pluginXmlResult; + try { + const xmlRaw = await readFile(join(mainJar.jarDir, 'META-INF', 'plugin.xml'), 'utf8'); + pluginXmlResult = parsePluginXml(xmlRaw); + } catch (err) { + warnings.push(`IDE-JB-PLUGIN-XML-READ: ${err.message}`); + return { manifest: null, warnings }; + } + if (pluginXmlResult.warnings.length) warnings.push(...pluginXmlResult.warnings); + if (!pluginXmlResult.manifest) { + warnings.push('IDE-JB-PLUGIN-XML-PARSE: unparseable plugin.xml'); + return { manifest: null, warnings }; + } + const px = pluginXmlResult.manifest; + + // Parse main jar MANIFEST.MF + let mainMf = { mainClass: null, premainClass: null, implTitle: null, implVersion: null, premainAttrs: {} }; + const mainMfPath = join(mainJar.jarDir, 'META-INF', 'MANIFEST.MF'); + if (await pathExists(mainMfPath)) { + try { + const mfRaw = await readFile(mainMfPath, 'utf8'); + mainMf = parseManifestMf(mfRaw); + } catch (err) { + warnings.push(`IDE-JB-MANIFEST-MF-READ: ${err.message}`); + } + } + + // Walk ALL jar-dirs for native binaries + const nativeBinaries = []; + for (const ej of extractedJars) { + const files = await walkFiles(ej.jarDir); + for (const f of files) { + if (NATIVE_BIN_RE.test(f)) { + try { + const buf = await readFile(f); + const s = await stat(f); + nativeBinaries.push({ + path: `${ej.jarName}:${f.slice(ej.jarDir.length + 1)}`, + size: s.size, + sha256: createHash('sha256').update(buf).digest('hex'), + }); + } catch (err) { + warnings.push(`IDE-JB-NATIVE-READ: ${err.message}`); + } + } + } + } + + // Parse every jar's MANIFEST.MF for bundled-jars list + const bundledJars = []; + for (const ej of extractedJars) { + const mfPath = join(ej.jarDir, 'META-INF', 'MANIFEST.MF'); + let mf = { implTitle: null, implVersion: null }; + if (await pathExists(mfPath)) { + try { + mf = parseManifestMf(await readFile(mfPath, 'utf8')); + } catch { + // fall through with nulls + } + } + bundledJars.push({ + name: ej.jarName, + version: mf.implVersion || null, + shaded: !mf.implTitle || !mf.implVersion, + coords: mf.implTitle || null, + }); + } + + // Signature check on main jar + let hasSignature = false; + try { + const metaInfDir = join(mainJar.jarDir, 'META-INF'); + const metaEntries = await readdir(metaInfDir); + hasSignature = metaEntries.some(f => SIGNATURE_FILE_RE.test(f)); + } catch { /* no META-INF */ } + + const pluginId = px.pluginId || basename(pluginRoot); + + const manifest = { + type: 'jetbrains', + id: pluginId.toLowerCase(), + pluginId, + publisher: (px.vendor || '').toLowerCase(), + name: px.name || '', + version: px.version || '', + engines: {}, + main: null, + browser: null, + activationEvents: [], + contributes: {}, + extensionPack: [], + extensionDependencies: [], + extensionKind: [], + categories: [], + capabilities: {}, + scripts: {}, + repository: px.vendorUrl || null, + dependencies: {}, + hasSignature, + sinceBuild: px.sinceBuild, + untilBuild: px.untilBuild, + depends: px.depends, + extensionDeclarations: px.extensionDeclarations, + applicationComponents: px.applicationComponents, + listeners: px.listeners, + themeProviders: px.themeProviders, + hasPremainClass: Boolean(mainMf.premainClass), + premainClass: mainMf.premainClass || null, + nativeBinaries, + bundledJars, + }; + + return { manifest, warnings }; + } catch (err) { + warnings.push(`IDE-JB-UNCAUGHT: ${err.message}`); + return { manifest: null, warnings }; + } finally { + await rm(extractionRoot, { recursive: true, force: true }); + } } diff --git a/plugins/llm-security/tests/helpers/zip-writer.mjs b/plugins/llm-security/tests/helpers/zip-writer.mjs new file mode 100644 index 0000000..93ddfee --- /dev/null +++ b/plugins/llm-security/tests/helpers/zip-writer.mjs @@ -0,0 +1,127 @@ +// zip-writer.mjs — Minimal stored-method (no compression) ZIP writer. +// Zero dependencies. Deterministic output: fixed DOS timestamp, sorted entry order. +// +// Writes a valid ZIP that zip-extract.mjs can parse. Uses method=0 (STORE), +// CRC-32 computed, no encryption, no ZIP64. Suitable for tiny test fixtures. + +import { createHash } from 'node:crypto'; + +// CRC-32 table (IEEE 802.3 polynomial). +const CRC_TABLE = (() => { + const t = new Uint32Array(256); + for (let n = 0; n < 256; n++) { + let c = n; + for (let k = 0; k < 8; k++) { + c = (c & 1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1); + } + t[n] = c >>> 0; + } + return t; +})(); + +function crc32(buf) { + let c = 0xFFFFFFFF; + for (let i = 0; i < buf.length; i++) { + c = (CRC_TABLE[(c ^ buf[i]) & 0xFF] ^ (c >>> 8)) >>> 0; + } + return (c ^ 0xFFFFFFFF) >>> 0; +} + +const DOS_DATE = ((1980 - 1980) << 9) | (1 << 5) | 1; // 1980-01-01 +const DOS_TIME = 0; // 00:00:00 + +/** + * Build a ZIP buffer from a list of entries. + * + * @param {Array<{name: string, data: Buffer | string}>} entries + * @returns {Buffer} + */ +export function createZip(entries) { + // Normalize + sort for determinism + const normalized = entries + .map(e => ({ + name: e.name, + data: Buffer.isBuffer(e.data) ? e.data : Buffer.from(e.data, 'utf8'), + })) + .sort((a, b) => a.name.localeCompare(b.name)); + + const chunks = []; + const centralHeaders = []; + let offset = 0; + + for (const e of normalized) { + const nameBuf = Buffer.from(e.name, 'utf8'); + const c = crc32(e.data); + const sz = e.data.length; + + // Local File Header (30 + nameLen) + const lfh = Buffer.alloc(30); + lfh.writeUInt32LE(0x04034b50, 0); // signature + lfh.writeUInt16LE(20, 4); // version needed + lfh.writeUInt16LE(0, 6); // flags + lfh.writeUInt16LE(0, 8); // method = STORE + lfh.writeUInt16LE(DOS_TIME, 10); + lfh.writeUInt16LE(DOS_DATE, 12); + lfh.writeUInt32LE(c, 14); // crc32 + lfh.writeUInt32LE(sz, 18); // compressed size + lfh.writeUInt32LE(sz, 22); // uncompressed size + lfh.writeUInt16LE(nameBuf.length, 26); + lfh.writeUInt16LE(0, 28); // extra field length + chunks.push(lfh); + chunks.push(nameBuf); + chunks.push(e.data); + + const localOffset = offset; + offset += 30 + nameBuf.length + sz; + + // Central Directory Header (46 + nameLen) + const cdh = Buffer.alloc(46); + cdh.writeUInt32LE(0x02014b50, 0); // signature + cdh.writeUInt16LE(20, 4); // version made by + cdh.writeUInt16LE(20, 6); // version needed + cdh.writeUInt16LE(0, 8); // flags + cdh.writeUInt16LE(0, 10); // method + cdh.writeUInt16LE(DOS_TIME, 12); + cdh.writeUInt16LE(DOS_DATE, 14); + cdh.writeUInt32LE(c, 16); + cdh.writeUInt32LE(sz, 20); + cdh.writeUInt32LE(sz, 24); + cdh.writeUInt16LE(nameBuf.length, 28); + cdh.writeUInt16LE(0, 30); // extra + cdh.writeUInt16LE(0, 32); // comment + cdh.writeUInt16LE(0, 34); // disk + cdh.writeUInt16LE(0, 36); // internal attrs + cdh.writeUInt32LE(0, 38); // external attrs + cdh.writeUInt32LE(localOffset, 42); + centralHeaders.push({ cdh, nameBuf }); + } + + const centralStart = offset; + for (const { cdh, nameBuf } of centralHeaders) { + chunks.push(cdh); + chunks.push(nameBuf); + offset += cdh.length + nameBuf.length; + } + const centralSize = offset - centralStart; + + // End of Central Directory + const eocd = Buffer.alloc(22); + eocd.writeUInt32LE(0x06054b50, 0); + eocd.writeUInt16LE(0, 4); + eocd.writeUInt16LE(0, 6); + eocd.writeUInt16LE(normalized.length, 8); + eocd.writeUInt16LE(normalized.length, 10); + eocd.writeUInt32LE(centralSize, 12); + eocd.writeUInt32LE(centralStart, 16); + eocd.writeUInt16LE(0, 20); // comment length + chunks.push(eocd); + + return Buffer.concat(chunks); +} + +/** + * Compute SHA-256 of a buffer (hex). + */ +export function sha256Hex(buf) { + return createHash('sha256').update(buf).digest('hex'); +} diff --git a/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs b/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs index db2706e..a4785cc 100644 --- a/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs +++ b/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs @@ -2,12 +2,18 @@ // // All inputs are inline strings — no filesystem fixtures required. -import { describe, it } from 'node:test'; +import { describe, it, after } from 'node:test'; import assert from 'node:assert/strict'; +import { mkdtemp, mkdir, writeFile, readdir, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { createHash } from 'node:crypto'; import { parsePluginXml, parseManifestMf, + parseIntelliJPlugin, } from '../../scanners/lib/ide-extension-parser.mjs'; +import { createZip } from '../helpers/zip-writer.mjs'; describe('parsePluginXml — happy path', () => { const xml = ` @@ -241,3 +247,134 @@ describe('parseManifestMf', () => { assert.deepEqual(parseManifestMf(lf), parseManifestMf(crlf)); }); }); + +// --------------------------------------------------------------------------- +// parseIntelliJPlugin — synthetic plugin dirs built in-test via zip-writer +// --------------------------------------------------------------------------- + +const TEST_TMP_PREFIX = 'llmsec-jbparse-test-'; +const createdRoots = []; + +async function makePluginDir(jars) { + const root = await mkdtemp(join(tmpdir(), TEST_TMP_PREFIX)); + createdRoots.push(root); + await mkdir(join(root, 'lib'), { recursive: true }); + for (const { name, entries } of jars) { + const buf = createZip(entries); + await writeFile(join(root, 'lib', name), buf); + } + return root; +} + +const BENIGN_PLUGIN_XML = ` + + com.example.benign + Benign + 1.0 + Example +`; + +describe('parseIntelliJPlugin — benign synthetic plugin', () => { + it('extracts pluginId, depends, no native/premain/signature', async () => { + const root = await makePluginDir([ + { + name: 'main.jar', + entries: [ + { name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML }, + { name: 'META-INF/MANIFEST.MF', data: 'Manifest-Version: 1.0\n' }, + ], + }, + ]); + const res = await parseIntelliJPlugin(root); + assert.ok(res, 'expected non-null result'); + assert.ok(res.manifest, 'expected manifest'); + assert.equal(res.manifest.type, 'jetbrains'); + assert.equal(res.manifest.pluginId, 'com.example.benign'); + assert.equal(res.manifest.nativeBinaries.length, 0); + assert.equal(res.manifest.hasPremainClass, false); + assert.equal(res.manifest.hasSignature, false); + assert.ok(Array.isArray(res.manifest.bundledJars)); + assert.equal(res.manifest.bundledJars.length, 1); + }); +}); + +describe('parseIntelliJPlugin — Premain-Class detection', () => { + it('hasPremainClass === true when MANIFEST.MF sets it', async () => { + const root = await makePluginDir([ + { + name: 'main.jar', + entries: [ + { name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML }, + { + name: 'META-INF/MANIFEST.MF', + data: 'Manifest-Version: 1.0\nPremain-Class: com.example.Agent\n', + }, + ], + }, + ]); + const res = await parseIntelliJPlugin(root); + assert.equal(res.manifest.hasPremainClass, true); + assert.equal(res.manifest.premainClass, 'com.example.Agent'); + }); +}); + +describe('parseIntelliJPlugin — native binary detection', () => { + it('collects .so files with SHA-256 and size', async () => { + const nativeBytes = Buffer.alloc(16, 0xAB); + const expectedSha = createHash('sha256').update(nativeBytes).digest('hex'); + const root = await makePluginDir([ + { + name: 'main.jar', + entries: [ + { name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML }, + { name: 'native/dummy.so', data: nativeBytes }, + ], + }, + ]); + const res = await parseIntelliJPlugin(root); + assert.equal(res.manifest.nativeBinaries.length, 1); + assert.equal(res.manifest.nativeBinaries[0].size, 16); + assert.equal(res.manifest.nativeBinaries[0].sha256, expectedSha); + }); +}); + +describe('parseIntelliJPlugin — failure modes', () => { + it('missing lib/ returns null with IDE-JB-NO-LIB-DIR warning', async () => { + const root = await mkdtemp(join(tmpdir(), TEST_TMP_PREFIX)); + createdRoots.push(root); + const res = await parseIntelliJPlugin(root); + assert.equal(res.manifest, null); + assert.ok(res.warnings.some(w => w.startsWith('IDE-JB-NO-LIB-DIR'))); + }); + + it('no plugin.xml in any jar returns null with IDE-JB-NO-PLUGIN-XML', async () => { + const root = await makePluginDir([ + { + name: 'empty.jar', + entries: [ + { name: 'META-INF/MANIFEST.MF', data: 'Manifest-Version: 1.0\n' }, + ], + }, + ]); + const res = await parseIntelliJPlugin(root); + assert.equal(res.manifest, null); + assert.ok(res.warnings.some(w => w.includes('NO-PLUGIN-XML'))); + }); +}); + +describe('parseIntelliJPlugin — temp dir cleanup', () => { + after(async () => { + // Cleanup test plugin roots + for (const r of createdRoots) { + await rm(r, { recursive: true, force: true }).catch(() => {}); + } + // Assert no llmsec-jb-* temp dirs remain + const entries = await readdir(tmpdir()).catch(() => []); + const leaked = entries.filter(n => n.startsWith('llmsec-jb-')); + assert.equal(leaked.length, 0, `leaked temp dirs: ${leaked.join(', ')}`); + }); + + it('cleanup runs (checked via after hook)', () => { + assert.ok(true); + }); +});