From b86239448dd894d3cf420aa4fb8299f1edfcf4bb Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sat, 18 Apr 2026 10:07:14 +0200 Subject: [PATCH] feat(llm-security): add zero-dep plugin.xml + MANIFEST.MF parsers --- .../scanners/lib/ide-extension-parser.mjs | 307 +++++++++++++++++- .../tests/scanners/jetbrains-parser.test.mjs | 243 ++++++++++++++ 2 files changed, 546 insertions(+), 4 deletions(-) create mode 100644 plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs diff --git a/plugins/llm-security/scanners/lib/ide-extension-parser.mjs b/plugins/llm-security/scanners/lib/ide-extension-parser.mjs index e7030ca..928c0b6 100644 --- a/plugins/llm-security/scanners/lib/ide-extension-parser.mjs +++ b/plugins/llm-security/scanners/lib/ide-extension-parser.mjs @@ -1,5 +1,9 @@ -// ide-extension-parser.mjs — Parse VS Code extension package.json into normalized manifest. +// ide-extension-parser.mjs — Parse IDE extension manifests into a normalized shape. // Zero dependencies (Node.js builtins only). +// +// Two extension types are supported: +// - type: 'vscode' → parseVSCodeExtension (package.json + contributes) +// - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs) import { readFile, access } from 'node:fs/promises'; import { join } from 'node:path'; @@ -10,6 +14,9 @@ async function pathExists(p) { /** * @typedef {object} ParsedManifest + * @property {'vscode'|'jetbrains'} type + * + * // Shared / VS Code fields (kept at top level for backward compat with runIdeChecks) * @property {string} id * @property {string} publisher * @property {string} name @@ -28,6 +35,20 @@ async function pathExists(p) { * @property {object|string|null} repository * @property {object} dependencies * @property {boolean} hasSignature + * + * // JetBrains-only fields (present only when type === 'jetbrains') + * @property {string} [pluginId] + * @property {string|null} [sinceBuild] + * @property {string|null} [untilBuild] + * @property {Array<{id:string, optional:boolean, configFile:string|null}>} [depends] + * @property {Array<{namespace:string, name:string, attrs:object}>} [extensionDeclarations] + * @property {string[]} [applicationComponents] + * @property {Array<{topic:string, class:string}>} [listeners] + * @property {boolean} [hasPremainClass] + * @property {string|null} [premainClass] + * @property {Array<{path:string, size:number, sha256:string}>} [nativeBinaries] + * @property {Array<{name:string, version:string|null, shaded:boolean, coords:string|null}>} [bundledJars] + * @property {Array<{id:string, path:string}>} [themeProviders] */ /** @@ -70,6 +91,7 @@ export async function parseVSCodeExtension(extRoot) { const hasSignature = await pathExists(join(extRoot, '.signature.p7s')); const manifest = { + type: 'vscode', id: `${publisher}.${name}`.toLowerCase(), publisher: publisher.toLowerCase(), name: name.toLowerCase(), @@ -94,16 +116,293 @@ export async function parseVSCodeExtension(extRoot) { } /** - * Parse a .vsix file. Stub for v1 — user must extract first. + * Parse a .vsix file. Stub — caller extracts first via lib/zip-extract.mjs. * @param {string} vsixPath * @throws {Error} */ export async function parseVsixFile(vsixPath) { - throw new Error(`VSIX parsing not implemented in v6.3.0. Extract manually (unzip ${vsixPath}) and pass the extracted directory.`); + throw new Error(`VSIX parsing not implemented in library-direct form. Extract manually (unzip ${vsixPath}) and pass the extracted directory.`); +} + +// --------------------------------------------------------------------------- +// JetBrains helpers — zero-dep plugin.xml + MANIFEST.MF parsers +// --------------------------------------------------------------------------- + +const NAMED_ENTITIES = { + amp: '&', lt: '<', gt: '>', quot: '"', apos: "'", +}; + +/** + * Decode XML entity references in text content (non-CDATA). + * @param {string} s + * @returns {string} + */ +function decodeEntities(s) { + return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (full, inner) => { + if (inner.startsWith('#x') || inner.startsWith('#X')) { + const cp = parseInt(inner.slice(2), 16); + return Number.isFinite(cp) ? String.fromCodePoint(cp) : full; + } + if (inner.startsWith('#')) { + const cp = parseInt(inner.slice(1), 10); + return Number.isFinite(cp) ? String.fromCodePoint(cp) : full; + } + return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, inner) + ? NAMED_ENTITIES[inner] + : full; + }); } /** - * Parse an IntelliJ plugin. Stub for v1.1. + * Capture the first match of a named element. Returns its text content + * (with CDATA honoured, otherwise entity-decoded), or null. + * @param {string} xml + * @param {string} tag + * @returns {string|null} + */ +function firstElementText(xml, tag) { + const re = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)`, 'i'); + const m = xml.match(re); + if (!m) return null; + return normalizeTextContent(m[1]); +} + +function normalizeTextContent(raw) { + const trimmed = raw.trim(); + if (trimmed.startsWith('')) { + return trimmed.slice(9, -3); + } + return decodeEntities(trimmed); +} + +/** + * Parse a `key="value"` or `key='value'` attribute list. + * @param {string} attrBlob - e.g. ` id="X" path='Y'` + * @returns {Record} + */ +function parseAttrs(attrBlob) { + const attrs = {}; + if (!attrBlob) return attrs; + const re = /([\w-]+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g; + let m; + while ((m = re.exec(attrBlob)) !== null) { + attrs[m[1]] = decodeEntities(m[2] !== undefined ? m[2] : m[3]); + } + return attrs; +} + +/** + * Parse a JetBrains `plugin.xml` document. + * + * Regex-based, intentionally lenient. Malformed input returns + * `{ manifest: null, warnings: [...] }` rather than throwing. + * + * @param {string} xmlString + * @returns {{ manifest: object|null, warnings: string[] }} + */ +export function parsePluginXml(xmlString) { + const warnings = []; + + if (typeof xmlString !== 'string') { + return { manifest: null, warnings: ['input is not a string'] }; + } + + // --- Pre-processing pipeline --- + let xml = xmlString.replace(/^\uFEFF/, ''); + xml = xml.replace(/\r\n?/g, '\n'); + xml = xml.replace(//g, ''); + + // Bail on obviously malformed (no root and no ) + if (!/ or element found — not a plugin.xml'); + return { manifest: null, warnings }; + } + + // Bail on unbalanced-tag smell: count open vs close for and . + // Both are required non-self-closing elements in plugin.xml, so any mismatch + // signals truncation or malformed input. + const balanceChecks = [ + { open: /]*>/g, close: /<\/id>/g, tag: 'id' }, + { open: /]*>/g, close: /<\/name>/g, tag: 'name' }, + ]; + for (const { open, close, tag } of balanceChecks) { + const o = (xml.match(open) || []).length; + const c = (xml.match(close) || []).length; + if (o > 0 && o !== c) { + warnings.push(`unbalanced <${tag}> tags — truncated input`); + return { manifest: null, warnings }; + } + } + + const safe = (fn, label) => { + try { return fn(); } catch (err) { + warnings.push(`${label}: ${err.message}`); + return null; + } + }; + + const pluginId = safe(() => firstElementText(xml, 'id'), 'pluginId') || ''; + const name = safe(() => firstElementText(xml, 'name'), 'name') || ''; + const version = safe(() => firstElementText(xml, 'version'), 'version') || ''; + + // Name + let vendor = ''; + let vendorUrl = null; + const vendorMatch = xml.match(/]*)>([\s\S]*?)<\/vendor>/i); + if (vendorMatch) { + const attrs = parseAttrs(vendorMatch[1]); + vendorUrl = attrs.url || null; + vendor = normalizeTextContent(vendorMatch[2]); + } + + // + let sinceBuild = null; + let untilBuild = null; + const ideaVersionMatch = xml.match(/]*)\/?\s*>/i); + if (ideaVersionMatch) { + const attrs = parseAttrs(ideaVersionMatch[1]); + sinceBuild = attrs['since-build'] || null; + untilBuild = attrs['until-build'] || null; + } + + // ID + const depends = []; + const dependsRe = /]*)>([\s\S]*?)<\/depends>/gi; + let dm; + while ((dm = dependsRe.exec(xml)) !== null) { + const attrs = parseAttrs(dm[1]); + depends.push({ + id: normalizeTextContent(dm[2]), + optional: attrs.optional === 'true', + configFile: attrs['config-file'] || null, + }); + } + + // ... + const extensionDeclarations = []; + const themeProviders = []; + const extBlockRe = /]*)>([\s\S]*?)<\/extensions>/gi; + let em; + while ((em = extBlockRe.exec(xml)) !== null) { + const attrs = parseAttrs(em[1]); + const ns = attrs.defaultExtensionNs || 'com.intellij'; + const body = em[2]; + // Children: <(name) ... /> or <(name) ...>... + // Use [^>]*? (non-greedy, slash allowed in attr values like path="/x/y") + // so self-closing elements with slashes in attributes still match. + const childRe = /<([\w.-]+)\b([^>]*?)(?:\/\s*>|>([\s\S]*?)<\/\1>)/g; + let cm; + while ((cm = childRe.exec(body)) !== null) { + const childName = cm[1]; + const childAttrs = parseAttrs(cm[2]); + extensionDeclarations.push({ namespace: ns, name: childName, attrs: childAttrs }); + if (childName === 'themeProvider') { + themeProviders.push({ + id: childAttrs.id || '', + path: childAttrs.path || '', + }); + } + } + } + + // X + const applicationComponents = []; + const appCompBlockRe = /]*>([\s\S]*?)<\/application-components>/gi; + let am; + while ((am = appCompBlockRe.exec(xml)) !== null) { + const implRe = /\s*([\s\S]*?)\s*<\/implementation-class>/g; + let im; + while ((im = implRe.exec(am[1])) !== null) { + applicationComponents.push(decodeEntities(im[1]).trim()); + } + } + + // + const listeners = []; + const listenerRe = /]*)\/?\s*>/gi; + let lm; + while ((lm = listenerRe.exec(xml)) !== null) { + const attrs = parseAttrs(lm[1]); + listeners.push({ + topic: attrs.topic || '', + class: attrs.class || '', + }); + } + + return { + manifest: { + pluginId, + name, + version, + vendor, + vendorUrl, + sinceBuild, + untilBuild, + depends, + extensionDeclarations, + applicationComponents, + listeners, + themeProviders, + }, + warnings, + }; +} + +/** + * Parse a `META-INF/MANIFEST.MF` file. Simple `Key: Value` line protocol. + * Handles RFC-822 72-char continuation lines (lines starting with space/tab + * are appended to the previous line's value). + * + * @param {string} mfString + * @returns {{mainClass: string|null, premainClass: string|null, implTitle: string|null, implVersion: string|null, premainAttrs: object}} + */ +export function parseManifestMf(mfString) { + const out = { + mainClass: null, + premainClass: null, + implTitle: null, + implVersion: null, + premainAttrs: {}, + }; + + if (typeof mfString !== 'string' || mfString.length === 0) return out; + + // Pre-processing + let s = mfString.replace(/^\uFEFF/, ''); + s = s.replace(/\r\n?/g, '\n'); + + // Concatenate continuation lines (lines beginning with a single space or tab). + const rawLines = s.split('\n'); + const logical = []; + for (const line of rawLines) { + if (line.length > 0 && (line[0] === ' ' || line[0] === '\t') && logical.length > 0) { + logical[logical.length - 1] += line.slice(1); + } else { + logical.push(line); + } + } + + for (const line of logical) { + if (!line || !line.includes(': ')) continue; + const idx = line.indexOf(': '); + const key = line.slice(0, idx).trim(); + const value = line.slice(idx + 2); + if (key === 'Main-Class') out.mainClass = value; + else if (key === 'Premain-Class') out.premainClass = value; + else if (key === 'Implementation-Title') out.implTitle = value; + else if (key === 'Implementation-Version') out.implVersion = value; + // Forensic collection of all Premain-* + Agent-* attributes + if (/^(Premain-|Agent-|Boot-Class-Path|Can-)/.test(key)) { + out.premainAttrs[key] = value; + } + } + + return out; +} + +/** + * Parse an IntelliJ plugin directory. Implemented in Step 6 (v6.6.0). + * Stub preserved until Step 6 lands. * @param {string} pluginRoot * @returns {Promise} */ diff --git a/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs b/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs new file mode 100644 index 0000000..db2706e --- /dev/null +++ b/plugins/llm-security/tests/scanners/jetbrains-parser.test.mjs @@ -0,0 +1,243 @@ +// jetbrains-parser.test.mjs — Zero-dep plugin.xml + MANIFEST.MF parsers. +// +// All inputs are inline strings — no filesystem fixtures required. + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { + parsePluginXml, + parseManifestMf, +} from '../../scanners/lib/ide-extension-parser.mjs'; + +describe('parsePluginXml — happy path', () => { + const xml = ` + + org.example.myplugin + My Plugin + 1.2.3 + Example Inc + + com.intellij.modules.platform + com.intellij.modules.python + + + + + +`; + + it('extracts pluginId, name, version, vendor', () => { + const { manifest, warnings } = parsePluginXml(xml); + assert.ok(manifest, `expected manifest, got null; warnings: ${warnings.join('; ')}`); + assert.equal(manifest.pluginId, 'org.example.myplugin'); + assert.equal(manifest.name, 'My Plugin'); + assert.equal(manifest.version, '1.2.3'); + assert.equal(manifest.vendor, 'Example Inc'); + assert.equal(manifest.vendorUrl, 'https://example.com'); + }); + + it('extracts idea-version build range', () => { + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.sinceBuild, '232.0'); + assert.equal(manifest.untilBuild, '242.*'); + }); + + it('extracts depends[] with optional + config-file', () => { + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.depends.length, 2); + assert.deepEqual(manifest.depends[0], { + id: 'com.intellij.modules.platform', optional: false, configFile: null, + }); + assert.deepEqual(manifest.depends[1], { + id: 'com.intellij.modules.python', optional: true, configFile: 'python.xml', + }); + }); + + it('captures extension children with namespace', () => { + const { manifest } = parsePluginXml(xml); + const names = manifest.extensionDeclarations.map(e => e.name).sort(); + assert.deepEqual(names, ['applicationService', 'postStartupActivity', 'themeProvider']); + assert.ok(manifest.extensionDeclarations.every(e => e.namespace === 'com.intellij')); + }); + + it('collects themeProviders[] with id + path', () => { + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.themeProviders.length, 1); + assert.equal(manifest.themeProviders[0].id, 'my-theme'); + assert.equal(manifest.themeProviders[0].path, '/themes/my.theme.json'); + }); +}); + +describe('parsePluginXml — CDATA + entity handling', () => { + it('preserves CDATA content verbatim', () => { + const xml = ` + x.y + hello & world]]> + `; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.name, 'hello & world'); + }); + + it('decodes named entity refs in non-CDATA text', () => { + const xml = ` + com.intellij.java&extras + n + `; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.pluginId, 'com.intellij.java&extras'); + }); + + it('decodes numeric entity refs (decimal + hex)', () => { + const xml = `ABCDn`; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.pluginId, 'ABCD'); + }); +}); + +describe('parsePluginXml — robustness', () => { + it('parses BOM-prefixed input identically', () => { + const xmlA = `an`; + const xmlB = '\uFEFF' + xmlA; + assert.deepEqual(parsePluginXml(xmlA).manifest, parsePluginXml(xmlB).manifest); + }); + + it('parses CRLF identically to LF', () => { + const xmlLF = `\na\nn\n`; + const xmlCRLF = xmlLF.replace(/\n/g, '\r\n'); + assert.deepEqual(parsePluginXml(xmlLF).manifest, parsePluginXml(xmlCRLF).manifest); + }); + + it('strips XML comments before regex match', () => { + const xml = ` + + real.id + n + `; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.pluginId, 'real.id'); + }); + + it('non-string input returns null + warning (never throws)', () => { + const { manifest, warnings } = parsePluginXml(null); + assert.equal(manifest, null); + assert.ok(warnings.length > 0); + }); + + it('truncated input returns null + warning (never throws)', () => { + const xml = `an 0); + }); + + it('unknown namespace on is preserved', () => { + const xml = ` + an + + + + `; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.extensionDeclarations.length, 1); + assert.equal(manifest.extensionDeclarations[0].namespace, 'org.custom'); + assert.equal(manifest.extensionDeclarations[0].name, 'myService'); + }); + + it('captures legacy application-components', () => { + const xml = ` + an + + + com.bad.Comp + + + `; + const { manifest } = parsePluginXml(xml); + assert.deepEqual(manifest.applicationComponents, ['com.bad.Comp']); + }); + + it('captures applicationListener topic + class', () => { + const xml = ` + an + + `; + const { manifest } = parsePluginXml(xml); + assert.equal(manifest.listeners.length, 1); + assert.equal(manifest.listeners[0].topic, 'com.intellij.ide.AppLifecycleListener'); + assert.equal(manifest.listeners[0].class, 'org.x.Listener'); + }); +}); + +describe('parseManifestMf', () => { + it('extracts Main-Class, Premain-Class, Implementation-Title/Version', () => { + const mf = [ + 'Manifest-Version: 1.0', + 'Main-Class: org.example.Main', + 'Premain-Class: org.bad.Agent', + 'Implementation-Title: my-plugin', + 'Implementation-Version: 1.0.0', + '', + ].join('\n'); + const out = parseManifestMf(mf); + assert.equal(out.mainClass, 'org.example.Main'); + assert.equal(out.premainClass, 'org.bad.Agent'); + assert.equal(out.implTitle, 'my-plugin'); + assert.equal(out.implVersion, '1.0.0'); + }); + + it('collects Premain-/Agent-/Can- attrs into premainAttrs', () => { + const mf = [ + 'Premain-Class: org.bad.Agent', + 'Can-Redefine-Classes: true', + 'Can-Retransform-Classes: true', + 'Agent-Class: org.bad.Agent', + 'Boot-Class-Path: boot.jar', + '', + ].join('\n'); + const out = parseManifestMf(mf); + assert.equal(out.premainAttrs['Can-Redefine-Classes'], 'true'); + assert.equal(out.premainAttrs['Can-Retransform-Classes'], 'true'); + assert.equal(out.premainAttrs['Agent-Class'], 'org.bad.Agent'); + assert.equal(out.premainAttrs['Boot-Class-Path'], 'boot.jar'); + }); + + it('handles 72-char continuation lines (space-prefixed)', () => { + const mf = [ + 'Premain-Class: org.example.VeryLongPackage', + ' Name.ContinuedAgent', + '', + ].join('\n'); + const out = parseManifestMf(mf); + assert.equal(out.premainClass, 'org.example.VeryLongPackageName.ContinuedAgent'); + }); + + it('handles tab continuation (rare but legal)', () => { + const mf = 'Main-Class: org.a\n\tTail\n'; + const out = parseManifestMf(mf); + assert.equal(out.mainClass, 'org.aTail'); + }); + + it('empty input returns all-null without throwing', () => { + const out = parseManifestMf(''); + assert.equal(out.mainClass, null); + assert.equal(out.premainClass, null); + assert.deepEqual(out.premainAttrs, {}); + }); + + it('non-string input returns all-null without throwing', () => { + const out = parseManifestMf(null); + assert.equal(out.mainClass, null); + }); + + it('garbage input returns all-null without throwing', () => { + const out = parseManifestMf('lkajsdf qwertyui 12345\n!!!\n'); + assert.equal(out.mainClass, null); + assert.equal(out.premainClass, null); + }); + + it('CRLF input parses identically to LF', () => { + const lf = 'Main-Class: org.a\nPremain-Class: org.b\n'; + const crlf = lf.replace(/\n/g, '\r\n'); + assert.deepEqual(parseManifestMf(lf), parseManifestMf(crlf)); + }); +});