// ide-extension-parser.mjs — Parse IDE extension manifests into a normalized shape. // Zero dependencies (Node.js builtins only). // // Two extension types are supported: // - type: 'vscode' → parseVSCodeExtension (package.json + contributes) // - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs) import { readFile, readdir, stat, mkdtemp, rm, access } from 'node:fs/promises'; import { join, basename } from 'node:path'; import { tmpdir } from 'node:os'; import { createHash } from 'node:crypto'; import { extractToDir } from './zip-extract.mjs'; async function pathExists(p) { try { await access(p); return true; } catch { return false; } } /** * @typedef {object} ParsedManifest * @property {'vscode'|'jetbrains'} type * * // Shared / VS Code fields (kept at top level for backward compat with runIdeChecks) * @property {string} id * @property {string} publisher * @property {string} name * @property {string} version * @property {object} engines * @property {string|null} main * @property {string|null} browser * @property {string[]} activationEvents * @property {object} contributes * @property {string[]} extensionPack * @property {string[]} extensionDependencies * @property {string[]} extensionKind * @property {string[]} categories * @property {object} capabilities * @property {object} scripts * @property {object|string|null} repository * @property {object} dependencies * @property {boolean} hasSignature * * // JetBrains-only fields (present only when type === 'jetbrains') * @property {string} [pluginId] * @property {string|null} [sinceBuild] * @property {string|null} [untilBuild] * @property {Array<{id:string, optional:boolean, configFile:string|null}>} [depends] * @property {Array<{namespace:string, name:string, attrs:object}>} [extensionDeclarations] * @property {string[]} [applicationComponents] * @property {Array<{topic:string, class:string}>} [listeners] * @property {boolean} [hasPremainClass] * @property {string|null} [premainClass] * @property {Array<{path:string, size:number, sha256:string}>} [nativeBinaries] * @property {Array<{name:string, version:string|null, shaded:boolean, coords:string|null}>} [bundledJars] * @property {Array<{id:string, path:string}>} [themeProviders] */ /** * Parse a VS Code extension directory. * @param {string} extRoot - Absolute path to extracted extension root. * @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>} */ export async function parseVSCodeExtension(extRoot) { const warnings = []; const pkgPath = join(extRoot, 'package.json'); let raw; try { raw = await readFile(pkgPath, 'utf8'); } catch (err) { return null; } let pkg; try { pkg = JSON.parse(raw); } catch (err) { warnings.push(`malformed package.json at ${pkgPath}: ${err.message}`); return null; } if (!pkg || typeof pkg !== 'object') { warnings.push(`package.json at ${pkgPath} is not an object`); return null; } const publisher = typeof pkg.publisher === 'string' ? pkg.publisher : ''; const name = typeof pkg.name === 'string' ? pkg.name : ''; const version = typeof pkg.version === 'string' ? pkg.version : ''; if (!publisher || !name) { warnings.push(`missing publisher/name in ${pkgPath}`); return null; } const hasSignature = await pathExists(join(extRoot, '.signature.p7s')); const manifest = { type: 'vscode', id: `${publisher}.${name}`.toLowerCase(), publisher: publisher.toLowerCase(), name: name.toLowerCase(), version, engines: pkg.engines && typeof pkg.engines === 'object' ? pkg.engines : {}, main: typeof pkg.main === 'string' ? pkg.main : null, browser: typeof pkg.browser === 'string' ? pkg.browser : null, activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.filter(e => typeof e === 'string') : [], contributes: pkg.contributes && typeof pkg.contributes === 'object' ? pkg.contributes : {}, extensionPack: Array.isArray(pkg.extensionPack) ? pkg.extensionPack.filter(e => typeof e === 'string') : [], extensionDependencies: Array.isArray(pkg.extensionDependencies) ? pkg.extensionDependencies.filter(e => typeof e === 'string') : [], extensionKind: Array.isArray(pkg.extensionKind) ? pkg.extensionKind.filter(e => typeof e === 'string') : [], categories: Array.isArray(pkg.categories) ? pkg.categories.filter(c => typeof c === 'string') : [], capabilities: pkg.capabilities && typeof pkg.capabilities === 'object' ? pkg.capabilities : {}, scripts: pkg.scripts && typeof pkg.scripts === 'object' ? pkg.scripts : {}, repository: pkg.repository || null, dependencies: pkg.dependencies && typeof pkg.dependencies === 'object' ? pkg.dependencies : {}, hasSignature, }; return { manifest, warnings }; } /** * Parse a .vsix file. Stub — caller extracts first via lib/zip-extract.mjs. * @param {string} vsixPath * @throws {Error} */ export async function parseVsixFile(vsixPath) { throw new Error(`VSIX parsing not implemented in library-direct form. Extract manually (unzip ${vsixPath}) and pass the extracted directory.`); } // --------------------------------------------------------------------------- // JetBrains helpers — zero-dep plugin.xml + MANIFEST.MF parsers // --------------------------------------------------------------------------- const NAMED_ENTITIES = { amp: '&', lt: '<', gt: '>', quot: '"', apos: "'", }; /** * Decode XML entity references in text content (non-CDATA). * @param {string} s * @returns {string} */ function decodeEntities(s) { return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (full, inner) => { if (inner.startsWith('#x') || inner.startsWith('#X')) { const cp = parseInt(inner.slice(2), 16); return Number.isFinite(cp) ? String.fromCodePoint(cp) : full; } if (inner.startsWith('#')) { const cp = parseInt(inner.slice(1), 10); return Number.isFinite(cp) ? String.fromCodePoint(cp) : full; } return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, inner) ? NAMED_ENTITIES[inner] : full; }); } /** * Capture the first match of a named element. Returns its text content * (with CDATA honoured, otherwise entity-decoded), or null. * @param {string} xml * @param {string} tag * @returns {string|null} */ function firstElementText(xml, tag) { const re = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)`, 'i'); const m = xml.match(re); if (!m) return null; return normalizeTextContent(m[1]); } function normalizeTextContent(raw) { const trimmed = raw.trim(); if (trimmed.startsWith('')) { return trimmed.slice(9, -3); } return decodeEntities(trimmed); } /** * Parse a `key="value"` or `key='value'` attribute list. * @param {string} attrBlob - e.g. ` id="X" path='Y'` * @returns {Record} */ function parseAttrs(attrBlob) { const attrs = {}; if (!attrBlob) return attrs; const re = /([\w-]+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g; let m; while ((m = re.exec(attrBlob)) !== null) { attrs[m[1]] = decodeEntities(m[2] !== undefined ? m[2] : m[3]); } return attrs; } /** * Parse a JetBrains `plugin.xml` document. * * Regex-based, intentionally lenient. Malformed input returns * `{ manifest: null, warnings: [...] }` rather than throwing. * * @param {string} xmlString * @returns {{ manifest: object|null, warnings: string[] }} */ export function parsePluginXml(xmlString) { const warnings = []; if (typeof xmlString !== 'string') { return { manifest: null, warnings: ['input is not a string'] }; } // --- Pre-processing pipeline --- let xml = xmlString.replace(/^\uFEFF/, ''); xml = xml.replace(/\r\n?/g, '\n'); xml = xml.replace(//g, ''); // Bail on obviously malformed (no root and no ) if (!/ or element found — not a plugin.xml'); return { manifest: null, warnings }; } // Bail on unbalanced-tag smell: count open vs close for and . // Both are required non-self-closing elements in plugin.xml, so any mismatch // signals truncation or malformed input. const balanceChecks = [ { open: /]*>/g, close: /<\/id>/g, tag: 'id' }, { open: /]*>/g, close: /<\/name>/g, tag: 'name' }, ]; for (const { open, close, tag } of balanceChecks) { const o = (xml.match(open) || []).length; const c = (xml.match(close) || []).length; if (o > 0 && o !== c) { warnings.push(`unbalanced <${tag}> tags — truncated input`); return { manifest: null, warnings }; } } const safe = (fn, label) => { try { return fn(); } catch (err) { warnings.push(`${label}: ${err.message}`); return null; } }; const pluginId = safe(() => firstElementText(xml, 'id'), 'pluginId') || ''; const name = safe(() => firstElementText(xml, 'name'), 'name') || ''; const version = safe(() => firstElementText(xml, 'version'), 'version') || ''; // Name let vendor = ''; let vendorUrl = null; const vendorMatch = xml.match(/]*)>([\s\S]*?)<\/vendor>/i); if (vendorMatch) { const attrs = parseAttrs(vendorMatch[1]); vendorUrl = attrs.url || null; vendor = normalizeTextContent(vendorMatch[2]); } // let sinceBuild = null; let untilBuild = null; const ideaVersionMatch = xml.match(/]*)\/?\s*>/i); if (ideaVersionMatch) { const attrs = parseAttrs(ideaVersionMatch[1]); sinceBuild = attrs['since-build'] || null; untilBuild = attrs['until-build'] || null; } // ID const depends = []; const dependsRe = /]*)>([\s\S]*?)<\/depends>/gi; let dm; while ((dm = dependsRe.exec(xml)) !== null) { const attrs = parseAttrs(dm[1]); depends.push({ id: normalizeTextContent(dm[2]), optional: attrs.optional === 'true', configFile: attrs['config-file'] || null, }); } // ... const extensionDeclarations = []; const themeProviders = []; const extBlockRe = /]*)>([\s\S]*?)<\/extensions>/gi; let em; while ((em = extBlockRe.exec(xml)) !== null) { const attrs = parseAttrs(em[1]); const ns = attrs.defaultExtensionNs || 'com.intellij'; const body = em[2]; // Children: <(name) ... /> or <(name) ...>... // Use [^>]*? (non-greedy, slash allowed in attr values like path="/x/y") // so self-closing elements with slashes in attributes still match. const childRe = /<([\w.-]+)\b([^>]*?)(?:\/\s*>|>([\s\S]*?)<\/\1>)/g; let cm; while ((cm = childRe.exec(body)) !== null) { const childName = cm[1]; const childAttrs = parseAttrs(cm[2]); extensionDeclarations.push({ namespace: ns, name: childName, attrs: childAttrs }); if (childName === 'themeProvider') { themeProviders.push({ id: childAttrs.id || '', path: childAttrs.path || '', }); } } } // X const applicationComponents = []; const appCompBlockRe = /]*>([\s\S]*?)<\/application-components>/gi; let am; while ((am = appCompBlockRe.exec(xml)) !== null) { const implRe = /\s*([\s\S]*?)\s*<\/implementation-class>/g; let im; while ((im = implRe.exec(am[1])) !== null) { applicationComponents.push(decodeEntities(im[1]).trim()); } } // const listeners = []; const listenerRe = /]*)\/?\s*>/gi; let lm; while ((lm = listenerRe.exec(xml)) !== null) { const attrs = parseAttrs(lm[1]); listeners.push({ topic: attrs.topic || '', class: attrs.class || '', }); } return { manifest: { pluginId, name, version, vendor, vendorUrl, sinceBuild, untilBuild, depends, extensionDeclarations, applicationComponents, listeners, themeProviders, }, warnings, }; } /** * Parse a `META-INF/MANIFEST.MF` file. Simple `Key: Value` line protocol. * Handles RFC-822 72-char continuation lines (lines starting with space/tab * are appended to the previous line's value). * * @param {string} mfString * @returns {{mainClass: string|null, premainClass: string|null, implTitle: string|null, implVersion: string|null, premainAttrs: object}} */ export function parseManifestMf(mfString) { const out = { mainClass: null, premainClass: null, implTitle: null, implVersion: null, premainAttrs: {}, }; if (typeof mfString !== 'string' || mfString.length === 0) return out; // Pre-processing let s = mfString.replace(/^\uFEFF/, ''); s = s.replace(/\r\n?/g, '\n'); // Concatenate continuation lines (lines beginning with a single space or tab). const rawLines = s.split('\n'); const logical = []; for (const line of rawLines) { if (line.length > 0 && (line[0] === ' ' || line[0] === '\t') && logical.length > 0) { logical[logical.length - 1] += line.slice(1); } else { logical.push(line); } } for (const line of logical) { if (!line || !line.includes(': ')) continue; const idx = line.indexOf(': '); const key = line.slice(0, idx).trim(); const value = line.slice(idx + 2); if (key === 'Main-Class') out.mainClass = value; else if (key === 'Premain-Class') out.premainClass = value; else if (key === 'Implementation-Title') out.implTitle = value; else if (key === 'Implementation-Version') out.implVersion = value; // Forensic collection of all Premain-* + Agent-* attributes if (/^(Premain-|Agent-|Boot-Class-Path|Can-)/.test(key)) { out.premainAttrs[key] = value; } } return out; } const NATIVE_BIN_RE = /\.(dll|so|dylib|jnilib|exe)$/i; const SIGNATURE_FILE_RE = /\.(SF|RSA|DSA|EC)$/; async function walkFiles(rootDir) { const out = []; async function recurse(dir) { let entries; try { entries = await readdir(dir, { withFileTypes: true }); } catch { return; } for (const entry of entries) { const full = join(dir, entry.name); if (entry.isDirectory()) await recurse(full); else if (entry.isFile()) out.push(full); } } await recurse(rootDir); return out; } /** * Parse an IntelliJ plugin directory layout: * /lib/*.jar — main jar contains META-INF/plugin.xml * * @param {string} pluginRoot * @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>} */ export async function parseIntelliJPlugin(pluginRoot) { if (typeof pluginRoot !== 'string' || !pluginRoot) return null; const warnings = []; const libDir = join(pluginRoot, 'lib'); try { const s = await stat(libDir); if (!s.isDirectory()) { warnings.push('IDE-JB-NO-LIB-DIR: lib is not a directory'); return { manifest: null, warnings }; } } catch { warnings.push('IDE-JB-NO-LIB-DIR: lib directory missing'); return { manifest: null, warnings }; } let jarNames; try { jarNames = (await readdir(libDir)).filter(n => n.toLowerCase().endsWith('.jar')); } catch { warnings.push('IDE-JB-NO-LIB-DIR: cannot read lib'); return { manifest: null, warnings }; } if (jarNames.length === 0) { warnings.push('IDE-JB-NO-PLUGIN-XML: no jars in lib/'); return { manifest: null, warnings }; } const extractionRoot = await mkdtemp(join(tmpdir(), 'llmsec-jb-')); const extractedJars = []; try { for (const jarName of jarNames) { const jarPath = join(libDir, jarName); try { const jarBuffer = await readFile(jarPath); const jarDir = await mkdtemp(join(extractionRoot, 'jar-')); await extractToDir(jarBuffer, jarDir); extractedJars.push({ jarPath, jarName, jarDir }); } catch (err) { warnings.push(`IDE-JB-JAR-EXTRACT: ${jarName}: ${err.message}`); } } if (extractedJars.length === 0) { warnings.push('IDE-JB-NO-PLUGIN-XML: no jars could be extracted'); return { manifest: null, warnings }; } // Locate main jar: first one containing META-INF/plugin.xml let mainJar = null; const mainJarCandidates = []; for (const ej of extractedJars) { const xmlPath = join(ej.jarDir, 'META-INF', 'plugin.xml'); if (await pathExists(xmlPath)) { mainJarCandidates.push(ej); if (!mainJar) mainJar = ej; } } if (!mainJar) { warnings.push('IDE-JB-NO-PLUGIN-XML: no jar contains META-INF/plugin.xml'); return { manifest: null, warnings }; } if (mainJarCandidates.length > 1) { warnings.push(`IDE-JB-MULTIPLE-PLUGIN-XML: ${mainJarCandidates.length} jars contain plugin.xml; first wins`); } // Parse plugin.xml let pluginXmlResult; try { const xmlRaw = await readFile(join(mainJar.jarDir, 'META-INF', 'plugin.xml'), 'utf8'); pluginXmlResult = parsePluginXml(xmlRaw); } catch (err) { warnings.push(`IDE-JB-PLUGIN-XML-READ: ${err.message}`); return { manifest: null, warnings }; } if (pluginXmlResult.warnings.length) warnings.push(...pluginXmlResult.warnings); if (!pluginXmlResult.manifest) { warnings.push('IDE-JB-PLUGIN-XML-PARSE: unparseable plugin.xml'); return { manifest: null, warnings }; } const px = pluginXmlResult.manifest; // Parse main jar MANIFEST.MF let mainMf = { mainClass: null, premainClass: null, implTitle: null, implVersion: null, premainAttrs: {} }; const mainMfPath = join(mainJar.jarDir, 'META-INF', 'MANIFEST.MF'); if (await pathExists(mainMfPath)) { try { const mfRaw = await readFile(mainMfPath, 'utf8'); mainMf = parseManifestMf(mfRaw); } catch (err) { warnings.push(`IDE-JB-MANIFEST-MF-READ: ${err.message}`); } } // Walk ALL jar-dirs for native binaries const nativeBinaries = []; for (const ej of extractedJars) { const files = await walkFiles(ej.jarDir); for (const f of files) { if (NATIVE_BIN_RE.test(f)) { try { const buf = await readFile(f); const s = await stat(f); nativeBinaries.push({ path: `${ej.jarName}:${f.slice(ej.jarDir.length + 1)}`, size: s.size, sha256: createHash('sha256').update(buf).digest('hex'), }); } catch (err) { warnings.push(`IDE-JB-NATIVE-READ: ${err.message}`); } } } } // Parse every jar's MANIFEST.MF for bundled-jars list const bundledJars = []; for (const ej of extractedJars) { const mfPath = join(ej.jarDir, 'META-INF', 'MANIFEST.MF'); let mf = { implTitle: null, implVersion: null }; if (await pathExists(mfPath)) { try { mf = parseManifestMf(await readFile(mfPath, 'utf8')); } catch { // fall through with nulls } } bundledJars.push({ name: ej.jarName, version: mf.implVersion || null, shaded: !mf.implTitle || !mf.implVersion, coords: mf.implTitle || null, }); } // Signature check on main jar let hasSignature = false; try { const metaInfDir = join(mainJar.jarDir, 'META-INF'); const metaEntries = await readdir(metaInfDir); hasSignature = metaEntries.some(f => SIGNATURE_FILE_RE.test(f)); } catch { /* no META-INF */ } const pluginId = px.pluginId || basename(pluginRoot); const manifest = { type: 'jetbrains', id: pluginId.toLowerCase(), pluginId, publisher: (px.vendor || '').toLowerCase(), name: px.name || '', version: px.version || '', engines: {}, main: null, browser: null, activationEvents: [], contributes: {}, extensionPack: [], extensionDependencies: [], extensionKind: [], categories: [], capabilities: {}, scripts: {}, repository: px.vendorUrl || null, dependencies: {}, hasSignature, sinceBuild: px.sinceBuild, untilBuild: px.untilBuild, depends: px.depends, extensionDeclarations: px.extensionDeclarations, applicationComponents: px.applicationComponents, listeners: px.listeners, themeProviders: px.themeProviders, hasPremainClass: Boolean(mainMf.premainClass), premainClass: mainMf.premainClass || null, nativeBinaries, bundledJars, }; return { manifest, warnings }; } catch (err) { warnings.push(`IDE-JB-UNCAUGHT: ${err.message}`); return { manifest: null, warnings }; } finally { await rm(extractionRoot, { recursive: true, force: true }); } }