feat(llm-security): implement parseIntelliJPlugin with nested-jar extraction

This commit is contained in:
Kjell Tore Guttormsen 2026-04-18 10:15:12 +02:00
commit 5afb9b1f33
3 changed files with 479 additions and 7 deletions

View file

@ -5,8 +5,11 @@
// - type: 'vscode' → parseVSCodeExtension (package.json + contributes)
// - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs)
import { readFile, access } from 'node:fs/promises';
import { join } from 'node:path';
import { readFile, readdir, stat, mkdtemp, rm, access } from 'node:fs/promises';
import { join, basename } from 'node:path';
import { tmpdir } from 'node:os';
import { createHash } from 'node:crypto';
import { extractToDir } from './zip-extract.mjs';
async function pathExists(p) {
try { await access(p); return true; } catch { return false; }
@ -400,12 +403,217 @@ export function parseManifestMf(mfString) {
return out;
}
const NATIVE_BIN_RE = /\.(dll|so|dylib|jnilib|exe)$/i;
const SIGNATURE_FILE_RE = /\.(SF|RSA|DSA|EC)$/;
async function walkFiles(rootDir) {
const out = [];
async function recurse(dir) {
let entries;
try { entries = await readdir(dir, { withFileTypes: true }); } catch { return; }
for (const entry of entries) {
const full = join(dir, entry.name);
if (entry.isDirectory()) await recurse(full);
else if (entry.isFile()) out.push(full);
}
}
await recurse(rootDir);
return out;
}
/**
* Parse an IntelliJ plugin directory. Implemented in Step 6 (v6.6.0).
* Stub preserved until Step 6 lands.
* Parse an IntelliJ plugin directory layout:
* <pluginRoot>/lib/*.jar main jar contains META-INF/plugin.xml
*
* @param {string} pluginRoot
* @returns {Promise<null>}
* @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>}
*/
export async function parseIntelliJPlugin(pluginRoot) {
return null;
if (typeof pluginRoot !== 'string' || !pluginRoot) return null;
const warnings = [];
const libDir = join(pluginRoot, 'lib');
try {
const s = await stat(libDir);
if (!s.isDirectory()) {
warnings.push('IDE-JB-NO-LIB-DIR: lib is not a directory');
return { manifest: null, warnings };
}
} catch {
warnings.push('IDE-JB-NO-LIB-DIR: lib directory missing');
return { manifest: null, warnings };
}
let jarNames;
try {
jarNames = (await readdir(libDir)).filter(n => n.toLowerCase().endsWith('.jar'));
} catch {
warnings.push('IDE-JB-NO-LIB-DIR: cannot read lib');
return { manifest: null, warnings };
}
if (jarNames.length === 0) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jars in lib/');
return { manifest: null, warnings };
}
const extractionRoot = await mkdtemp(join(tmpdir(), 'llmsec-jb-'));
const extractedJars = [];
try {
for (const jarName of jarNames) {
const jarPath = join(libDir, jarName);
try {
const jarBuffer = await readFile(jarPath);
const jarDir = await mkdtemp(join(extractionRoot, 'jar-'));
await extractToDir(jarBuffer, jarDir);
extractedJars.push({ jarPath, jarName, jarDir });
} catch (err) {
warnings.push(`IDE-JB-JAR-EXTRACT: ${jarName}: ${err.message}`);
}
}
if (extractedJars.length === 0) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jars could be extracted');
return { manifest: null, warnings };
}
// Locate main jar: first one containing META-INF/plugin.xml
let mainJar = null;
const mainJarCandidates = [];
for (const ej of extractedJars) {
const xmlPath = join(ej.jarDir, 'META-INF', 'plugin.xml');
if (await pathExists(xmlPath)) {
mainJarCandidates.push(ej);
if (!mainJar) mainJar = ej;
}
}
if (!mainJar) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jar contains META-INF/plugin.xml');
return { manifest: null, warnings };
}
if (mainJarCandidates.length > 1) {
warnings.push(`IDE-JB-MULTIPLE-PLUGIN-XML: ${mainJarCandidates.length} jars contain plugin.xml; first wins`);
}
// Parse plugin.xml
let pluginXmlResult;
try {
const xmlRaw = await readFile(join(mainJar.jarDir, 'META-INF', 'plugin.xml'), 'utf8');
pluginXmlResult = parsePluginXml(xmlRaw);
} catch (err) {
warnings.push(`IDE-JB-PLUGIN-XML-READ: ${err.message}`);
return { manifest: null, warnings };
}
if (pluginXmlResult.warnings.length) warnings.push(...pluginXmlResult.warnings);
if (!pluginXmlResult.manifest) {
warnings.push('IDE-JB-PLUGIN-XML-PARSE: unparseable plugin.xml');
return { manifest: null, warnings };
}
const px = pluginXmlResult.manifest;
// Parse main jar MANIFEST.MF
let mainMf = { mainClass: null, premainClass: null, implTitle: null, implVersion: null, premainAttrs: {} };
const mainMfPath = join(mainJar.jarDir, 'META-INF', 'MANIFEST.MF');
if (await pathExists(mainMfPath)) {
try {
const mfRaw = await readFile(mainMfPath, 'utf8');
mainMf = parseManifestMf(mfRaw);
} catch (err) {
warnings.push(`IDE-JB-MANIFEST-MF-READ: ${err.message}`);
}
}
// Walk ALL jar-dirs for native binaries
const nativeBinaries = [];
for (const ej of extractedJars) {
const files = await walkFiles(ej.jarDir);
for (const f of files) {
if (NATIVE_BIN_RE.test(f)) {
try {
const buf = await readFile(f);
const s = await stat(f);
nativeBinaries.push({
path: `${ej.jarName}:${f.slice(ej.jarDir.length + 1)}`,
size: s.size,
sha256: createHash('sha256').update(buf).digest('hex'),
});
} catch (err) {
warnings.push(`IDE-JB-NATIVE-READ: ${err.message}`);
}
}
}
}
// Parse every jar's MANIFEST.MF for bundled-jars list
const bundledJars = [];
for (const ej of extractedJars) {
const mfPath = join(ej.jarDir, 'META-INF', 'MANIFEST.MF');
let mf = { implTitle: null, implVersion: null };
if (await pathExists(mfPath)) {
try {
mf = parseManifestMf(await readFile(mfPath, 'utf8'));
} catch {
// fall through with nulls
}
}
bundledJars.push({
name: ej.jarName,
version: mf.implVersion || null,
shaded: !mf.implTitle || !mf.implVersion,
coords: mf.implTitle || null,
});
}
// Signature check on main jar
let hasSignature = false;
try {
const metaInfDir = join(mainJar.jarDir, 'META-INF');
const metaEntries = await readdir(metaInfDir);
hasSignature = metaEntries.some(f => SIGNATURE_FILE_RE.test(f));
} catch { /* no META-INF */ }
const pluginId = px.pluginId || basename(pluginRoot);
const manifest = {
type: 'jetbrains',
id: pluginId.toLowerCase(),
pluginId,
publisher: (px.vendor || '').toLowerCase(),
name: px.name || '',
version: px.version || '',
engines: {},
main: null,
browser: null,
activationEvents: [],
contributes: {},
extensionPack: [],
extensionDependencies: [],
extensionKind: [],
categories: [],
capabilities: {},
scripts: {},
repository: px.vendorUrl || null,
dependencies: {},
hasSignature,
sinceBuild: px.sinceBuild,
untilBuild: px.untilBuild,
depends: px.depends,
extensionDeclarations: px.extensionDeclarations,
applicationComponents: px.applicationComponents,
listeners: px.listeners,
themeProviders: px.themeProviders,
hasPremainClass: Boolean(mainMf.premainClass),
premainClass: mainMf.premainClass || null,
nativeBinaries,
bundledJars,
};
return { manifest, warnings };
} catch (err) {
warnings.push(`IDE-JB-UNCAUGHT: ${err.message}`);
return { manifest: null, warnings };
} finally {
await rm(extractionRoot, { recursive: true, force: true });
}
}

View file

@ -0,0 +1,127 @@
// zip-writer.mjs — Minimal stored-method (no compression) ZIP writer.
// Zero dependencies. Deterministic output: fixed DOS timestamp, sorted entry order.
//
// Writes a valid ZIP that zip-extract.mjs can parse. Uses method=0 (STORE),
// CRC-32 computed, no encryption, no ZIP64. Suitable for tiny test fixtures.
import { createHash } from 'node:crypto';
// CRC-32 table (IEEE 802.3 polynomial).
const CRC_TABLE = (() => {
const t = new Uint32Array(256);
for (let n = 0; n < 256; n++) {
let c = n;
for (let k = 0; k < 8; k++) {
c = (c & 1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1);
}
t[n] = c >>> 0;
}
return t;
})();
function crc32(buf) {
let c = 0xFFFFFFFF;
for (let i = 0; i < buf.length; i++) {
c = (CRC_TABLE[(c ^ buf[i]) & 0xFF] ^ (c >>> 8)) >>> 0;
}
return (c ^ 0xFFFFFFFF) >>> 0;
}
const DOS_DATE = ((1980 - 1980) << 9) | (1 << 5) | 1; // 1980-01-01
const DOS_TIME = 0; // 00:00:00
/**
* Build a ZIP buffer from a list of entries.
*
* @param {Array<{name: string, data: Buffer | string}>} entries
* @returns {Buffer}
*/
export function createZip(entries) {
// Normalize + sort for determinism
const normalized = entries
.map(e => ({
name: e.name,
data: Buffer.isBuffer(e.data) ? e.data : Buffer.from(e.data, 'utf8'),
}))
.sort((a, b) => a.name.localeCompare(b.name));
const chunks = [];
const centralHeaders = [];
let offset = 0;
for (const e of normalized) {
const nameBuf = Buffer.from(e.name, 'utf8');
const c = crc32(e.data);
const sz = e.data.length;
// Local File Header (30 + nameLen)
const lfh = Buffer.alloc(30);
lfh.writeUInt32LE(0x04034b50, 0); // signature
lfh.writeUInt16LE(20, 4); // version needed
lfh.writeUInt16LE(0, 6); // flags
lfh.writeUInt16LE(0, 8); // method = STORE
lfh.writeUInt16LE(DOS_TIME, 10);
lfh.writeUInt16LE(DOS_DATE, 12);
lfh.writeUInt32LE(c, 14); // crc32
lfh.writeUInt32LE(sz, 18); // compressed size
lfh.writeUInt32LE(sz, 22); // uncompressed size
lfh.writeUInt16LE(nameBuf.length, 26);
lfh.writeUInt16LE(0, 28); // extra field length
chunks.push(lfh);
chunks.push(nameBuf);
chunks.push(e.data);
const localOffset = offset;
offset += 30 + nameBuf.length + sz;
// Central Directory Header (46 + nameLen)
const cdh = Buffer.alloc(46);
cdh.writeUInt32LE(0x02014b50, 0); // signature
cdh.writeUInt16LE(20, 4); // version made by
cdh.writeUInt16LE(20, 6); // version needed
cdh.writeUInt16LE(0, 8); // flags
cdh.writeUInt16LE(0, 10); // method
cdh.writeUInt16LE(DOS_TIME, 12);
cdh.writeUInt16LE(DOS_DATE, 14);
cdh.writeUInt32LE(c, 16);
cdh.writeUInt32LE(sz, 20);
cdh.writeUInt32LE(sz, 24);
cdh.writeUInt16LE(nameBuf.length, 28);
cdh.writeUInt16LE(0, 30); // extra
cdh.writeUInt16LE(0, 32); // comment
cdh.writeUInt16LE(0, 34); // disk
cdh.writeUInt16LE(0, 36); // internal attrs
cdh.writeUInt32LE(0, 38); // external attrs
cdh.writeUInt32LE(localOffset, 42);
centralHeaders.push({ cdh, nameBuf });
}
const centralStart = offset;
for (const { cdh, nameBuf } of centralHeaders) {
chunks.push(cdh);
chunks.push(nameBuf);
offset += cdh.length + nameBuf.length;
}
const centralSize = offset - centralStart;
// End of Central Directory
const eocd = Buffer.alloc(22);
eocd.writeUInt32LE(0x06054b50, 0);
eocd.writeUInt16LE(0, 4);
eocd.writeUInt16LE(0, 6);
eocd.writeUInt16LE(normalized.length, 8);
eocd.writeUInt16LE(normalized.length, 10);
eocd.writeUInt32LE(centralSize, 12);
eocd.writeUInt32LE(centralStart, 16);
eocd.writeUInt16LE(0, 20); // comment length
chunks.push(eocd);
return Buffer.concat(chunks);
}
/**
* Compute SHA-256 of a buffer (hex).
*/
export function sha256Hex(buf) {
return createHash('sha256').update(buf).digest('hex');
}

View file

@ -2,12 +2,18 @@
//
// All inputs are inline strings — no filesystem fixtures required.
import { describe, it } from 'node:test';
import { describe, it, after } from 'node:test';
import assert from 'node:assert/strict';
import { mkdtemp, mkdir, writeFile, readdir, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { createHash } from 'node:crypto';
import {
parsePluginXml,
parseManifestMf,
parseIntelliJPlugin,
} from '../../scanners/lib/ide-extension-parser.mjs';
import { createZip } from '../helpers/zip-writer.mjs';
describe('parsePluginXml — happy path', () => {
const xml = `<?xml version="1.0"?>
@ -241,3 +247,134 @@ describe('parseManifestMf', () => {
assert.deepEqual(parseManifestMf(lf), parseManifestMf(crlf));
});
});
// ---------------------------------------------------------------------------
// parseIntelliJPlugin — synthetic plugin dirs built in-test via zip-writer
// ---------------------------------------------------------------------------
const TEST_TMP_PREFIX = 'llmsec-jbparse-test-';
const createdRoots = [];
async function makePluginDir(jars) {
const root = await mkdtemp(join(tmpdir(), TEST_TMP_PREFIX));
createdRoots.push(root);
await mkdir(join(root, 'lib'), { recursive: true });
for (const { name, entries } of jars) {
const buf = createZip(entries);
await writeFile(join(root, 'lib', name), buf);
}
return root;
}
const BENIGN_PLUGIN_XML = `<?xml version="1.0"?>
<idea-plugin>
<id>com.example.benign</id>
<name>Benign</name>
<version>1.0</version>
<vendor>Example</vendor>
</idea-plugin>`;
describe('parseIntelliJPlugin — benign synthetic plugin', () => {
it('extracts pluginId, depends, no native/premain/signature', async () => {
const root = await makePluginDir([
{
name: 'main.jar',
entries: [
{ name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML },
{ name: 'META-INF/MANIFEST.MF', data: 'Manifest-Version: 1.0\n' },
],
},
]);
const res = await parseIntelliJPlugin(root);
assert.ok(res, 'expected non-null result');
assert.ok(res.manifest, 'expected manifest');
assert.equal(res.manifest.type, 'jetbrains');
assert.equal(res.manifest.pluginId, 'com.example.benign');
assert.equal(res.manifest.nativeBinaries.length, 0);
assert.equal(res.manifest.hasPremainClass, false);
assert.equal(res.manifest.hasSignature, false);
assert.ok(Array.isArray(res.manifest.bundledJars));
assert.equal(res.manifest.bundledJars.length, 1);
});
});
describe('parseIntelliJPlugin — Premain-Class detection', () => {
it('hasPremainClass === true when MANIFEST.MF sets it', async () => {
const root = await makePluginDir([
{
name: 'main.jar',
entries: [
{ name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML },
{
name: 'META-INF/MANIFEST.MF',
data: 'Manifest-Version: 1.0\nPremain-Class: com.example.Agent\n',
},
],
},
]);
const res = await parseIntelliJPlugin(root);
assert.equal(res.manifest.hasPremainClass, true);
assert.equal(res.manifest.premainClass, 'com.example.Agent');
});
});
describe('parseIntelliJPlugin — native binary detection', () => {
it('collects .so files with SHA-256 and size', async () => {
const nativeBytes = Buffer.alloc(16, 0xAB);
const expectedSha = createHash('sha256').update(nativeBytes).digest('hex');
const root = await makePluginDir([
{
name: 'main.jar',
entries: [
{ name: 'META-INF/plugin.xml', data: BENIGN_PLUGIN_XML },
{ name: 'native/dummy.so', data: nativeBytes },
],
},
]);
const res = await parseIntelliJPlugin(root);
assert.equal(res.manifest.nativeBinaries.length, 1);
assert.equal(res.manifest.nativeBinaries[0].size, 16);
assert.equal(res.manifest.nativeBinaries[0].sha256, expectedSha);
});
});
describe('parseIntelliJPlugin — failure modes', () => {
it('missing lib/ returns null with IDE-JB-NO-LIB-DIR warning', async () => {
const root = await mkdtemp(join(tmpdir(), TEST_TMP_PREFIX));
createdRoots.push(root);
const res = await parseIntelliJPlugin(root);
assert.equal(res.manifest, null);
assert.ok(res.warnings.some(w => w.startsWith('IDE-JB-NO-LIB-DIR')));
});
it('no plugin.xml in any jar returns null with IDE-JB-NO-PLUGIN-XML', async () => {
const root = await makePluginDir([
{
name: 'empty.jar',
entries: [
{ name: 'META-INF/MANIFEST.MF', data: 'Manifest-Version: 1.0\n' },
],
},
]);
const res = await parseIntelliJPlugin(root);
assert.equal(res.manifest, null);
assert.ok(res.warnings.some(w => w.includes('NO-PLUGIN-XML')));
});
});
describe('parseIntelliJPlugin — temp dir cleanup', () => {
after(async () => {
// Cleanup test plugin roots
for (const r of createdRoots) {
await rm(r, { recursive: true, force: true }).catch(() => {});
}
// Assert no llmsec-jb-* temp dirs remain
const entries = await readdir(tmpdir()).catch(() => []);
const leaked = entries.filter(n => n.startsWith('llmsec-jb-'));
assert.equal(leaked.length, 0, `leaked temp dirs: ${leaked.join(', ')}`);
});
it('cleanup runs (checked via after hook)', () => {
assert.ok(true);
});
});