ktg-plugin-marketplace/plugins/llm-security/scanners/lib/ide-extension-parser.mjs

619 lines
21 KiB
JavaScript

// ide-extension-parser.mjs — Parse IDE extension manifests into a normalized shape.
// Zero dependencies (Node.js builtins only).
//
// Two extension types are supported:
// - type: 'vscode' → parseVSCodeExtension (package.json + contributes)
// - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs)
import { readFile, readdir, stat, mkdtemp, rm, access } from 'node:fs/promises';
import { join, basename } from 'node:path';
import { tmpdir } from 'node:os';
import { createHash } from 'node:crypto';
import { extractToDir } from './zip-extract.mjs';
async function pathExists(p) {
try { await access(p); return true; } catch { return false; }
}
/**
* @typedef {object} ParsedManifest
* @property {'vscode'|'jetbrains'} type
*
* // Shared / VS Code fields (kept at top level for backward compat with runIdeChecks)
* @property {string} id
* @property {string} publisher
* @property {string} name
* @property {string} version
* @property {object} engines
* @property {string|null} main
* @property {string|null} browser
* @property {string[]} activationEvents
* @property {object} contributes
* @property {string[]} extensionPack
* @property {string[]} extensionDependencies
* @property {string[]} extensionKind
* @property {string[]} categories
* @property {object} capabilities
* @property {object} scripts
* @property {object|string|null} repository
* @property {object} dependencies
* @property {boolean} hasSignature
*
* // JetBrains-only fields (present only when type === 'jetbrains')
* @property {string} [pluginId]
* @property {string|null} [sinceBuild]
* @property {string|null} [untilBuild]
* @property {Array<{id:string, optional:boolean, configFile:string|null}>} [depends]
* @property {Array<{namespace:string, name:string, attrs:object}>} [extensionDeclarations]
* @property {string[]} [applicationComponents]
* @property {Array<{topic:string, class:string}>} [listeners]
* @property {boolean} [hasPremainClass]
* @property {string|null} [premainClass]
* @property {Array<{path:string, size:number, sha256:string}>} [nativeBinaries]
* @property {Array<{name:string, version:string|null, shaded:boolean, coords:string|null}>} [bundledJars]
* @property {Array<{id:string, path:string}>} [themeProviders]
*/
/**
* Parse a VS Code extension directory.
* @param {string} extRoot - Absolute path to extracted extension root.
* @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>}
*/
export async function parseVSCodeExtension(extRoot) {
const warnings = [];
const pkgPath = join(extRoot, 'package.json');
let raw;
try {
raw = await readFile(pkgPath, 'utf8');
} catch (err) {
return null;
}
let pkg;
try {
pkg = JSON.parse(raw);
} catch (err) {
warnings.push(`malformed package.json at ${pkgPath}: ${err.message}`);
return null;
}
if (!pkg || typeof pkg !== 'object') {
warnings.push(`package.json at ${pkgPath} is not an object`);
return null;
}
const publisher = typeof pkg.publisher === 'string' ? pkg.publisher : '';
const name = typeof pkg.name === 'string' ? pkg.name : '';
const version = typeof pkg.version === 'string' ? pkg.version : '';
if (!publisher || !name) {
warnings.push(`missing publisher/name in ${pkgPath}`);
return null;
}
const hasSignature = await pathExists(join(extRoot, '.signature.p7s'));
const manifest = {
type: 'vscode',
id: `${publisher}.${name}`.toLowerCase(),
publisher: publisher.toLowerCase(),
name: name.toLowerCase(),
version,
engines: pkg.engines && typeof pkg.engines === 'object' ? pkg.engines : {},
main: typeof pkg.main === 'string' ? pkg.main : null,
browser: typeof pkg.browser === 'string' ? pkg.browser : null,
activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.filter(e => typeof e === 'string') : [],
contributes: pkg.contributes && typeof pkg.contributes === 'object' ? pkg.contributes : {},
extensionPack: Array.isArray(pkg.extensionPack) ? pkg.extensionPack.filter(e => typeof e === 'string') : [],
extensionDependencies: Array.isArray(pkg.extensionDependencies) ? pkg.extensionDependencies.filter(e => typeof e === 'string') : [],
extensionKind: Array.isArray(pkg.extensionKind) ? pkg.extensionKind.filter(e => typeof e === 'string') : [],
categories: Array.isArray(pkg.categories) ? pkg.categories.filter(c => typeof c === 'string') : [],
capabilities: pkg.capabilities && typeof pkg.capabilities === 'object' ? pkg.capabilities : {},
scripts: pkg.scripts && typeof pkg.scripts === 'object' ? pkg.scripts : {},
repository: pkg.repository || null,
dependencies: pkg.dependencies && typeof pkg.dependencies === 'object' ? pkg.dependencies : {},
hasSignature,
};
return { manifest, warnings };
}
/**
* Parse a .vsix file. Stub — caller extracts first via lib/zip-extract.mjs.
* @param {string} vsixPath
* @throws {Error}
*/
export async function parseVsixFile(vsixPath) {
throw new Error(`VSIX parsing not implemented in library-direct form. Extract manually (unzip ${vsixPath}) and pass the extracted directory.`);
}
// ---------------------------------------------------------------------------
// JetBrains helpers — zero-dep plugin.xml + MANIFEST.MF parsers
// ---------------------------------------------------------------------------
const NAMED_ENTITIES = {
amp: '&', lt: '<', gt: '>', quot: '"', apos: "'",
};
/**
* Decode XML entity references in text content (non-CDATA).
* @param {string} s
* @returns {string}
*/
function decodeEntities(s) {
return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (full, inner) => {
if (inner.startsWith('#x') || inner.startsWith('#X')) {
const cp = parseInt(inner.slice(2), 16);
return Number.isFinite(cp) ? String.fromCodePoint(cp) : full;
}
if (inner.startsWith('#')) {
const cp = parseInt(inner.slice(1), 10);
return Number.isFinite(cp) ? String.fromCodePoint(cp) : full;
}
return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, inner)
? NAMED_ENTITIES[inner]
: full;
});
}
/**
* Capture the first match of a named element. Returns its text content
* (with CDATA honoured, otherwise entity-decoded), or null.
* @param {string} xml
* @param {string} tag
* @returns {string|null}
*/
function firstElementText(xml, tag) {
const re = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)</${tag}>`, 'i');
const m = xml.match(re);
if (!m) return null;
return normalizeTextContent(m[1]);
}
function normalizeTextContent(raw) {
const trimmed = raw.trim();
if (trimmed.startsWith('<![CDATA[') && trimmed.endsWith(']]>')) {
return trimmed.slice(9, -3);
}
return decodeEntities(trimmed);
}
/**
* Parse a `key="value"` or `key='value'` attribute list.
* @param {string} attrBlob - e.g. ` id="X" path='Y'`
* @returns {Record<string,string>}
*/
function parseAttrs(attrBlob) {
const attrs = {};
if (!attrBlob) return attrs;
const re = /([\w-]+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
let m;
while ((m = re.exec(attrBlob)) !== null) {
attrs[m[1]] = decodeEntities(m[2] !== undefined ? m[2] : m[3]);
}
return attrs;
}
/**
* Parse a JetBrains `plugin.xml` document.
*
* Regex-based, intentionally lenient. Malformed input returns
* `{ manifest: null, warnings: [...] }` rather than throwing.
*
* @param {string} xmlString
* @returns {{ manifest: object|null, warnings: string[] }}
*/
export function parsePluginXml(xmlString) {
const warnings = [];
if (typeof xmlString !== 'string') {
return { manifest: null, warnings: ['input is not a string'] };
}
// --- Pre-processing pipeline ---
let xml = xmlString.replace(/^\uFEFF/, '');
xml = xml.replace(/\r\n?/g, '\n');
xml = xml.replace(/<!--[\s\S]*?-->/g, '');
// Bail on obviously malformed (no <idea-plugin> root and no <id>)
if (!/<idea-plugin\b/i.test(xml) && !/<id\b/i.test(xml)) {
warnings.push('no <idea-plugin> or <id> element found — not a plugin.xml');
return { manifest: null, warnings };
}
// Bail on unbalanced-tag smell: count open vs close for <id> and <name>.
// Both are required non-self-closing elements in plugin.xml, so any mismatch
// signals truncation or malformed input.
const balanceChecks = [
{ open: /<id\b[^>]*>/g, close: /<\/id>/g, tag: 'id' },
{ open: /<name\b[^>]*>/g, close: /<\/name>/g, tag: 'name' },
];
for (const { open, close, tag } of balanceChecks) {
const o = (xml.match(open) || []).length;
const c = (xml.match(close) || []).length;
if (o > 0 && o !== c) {
warnings.push(`unbalanced <${tag}> tags — truncated input`);
return { manifest: null, warnings };
}
}
const safe = (fn, label) => {
try { return fn(); } catch (err) {
warnings.push(`${label}: ${err.message}`);
return null;
}
};
const pluginId = safe(() => firstElementText(xml, 'id'), 'pluginId') || '';
const name = safe(() => firstElementText(xml, 'name'), 'name') || '';
const version = safe(() => firstElementText(xml, 'version'), 'version') || '';
// <vendor url="..." email="...">Name</vendor>
let vendor = '';
let vendorUrl = null;
const vendorMatch = xml.match(/<vendor\b([^>]*)>([\s\S]*?)<\/vendor>/i);
if (vendorMatch) {
const attrs = parseAttrs(vendorMatch[1]);
vendorUrl = attrs.url || null;
vendor = normalizeTextContent(vendorMatch[2]);
}
// <idea-version since-build="A" until-build="B"/>
let sinceBuild = null;
let untilBuild = null;
const ideaVersionMatch = xml.match(/<idea-version\b([^>]*)\/?\s*>/i);
if (ideaVersionMatch) {
const attrs = parseAttrs(ideaVersionMatch[1]);
sinceBuild = attrs['since-build'] || null;
untilBuild = attrs['until-build'] || null;
}
// <depends optional="true" config-file="X">ID</depends>
const depends = [];
const dependsRe = /<depends\b([^>]*)>([\s\S]*?)<\/depends>/gi;
let dm;
while ((dm = dependsRe.exec(xml)) !== null) {
const attrs = parseAttrs(dm[1]);
depends.push({
id: normalizeTextContent(dm[2]),
optional: attrs.optional === 'true',
configFile: attrs['config-file'] || null,
});
}
// <extensions defaultExtensionNs="com.intellij">...<extensions>
const extensionDeclarations = [];
const themeProviders = [];
const extBlockRe = /<extensions\b([^>]*)>([\s\S]*?)<\/extensions>/gi;
let em;
while ((em = extBlockRe.exec(xml)) !== null) {
const attrs = parseAttrs(em[1]);
const ns = attrs.defaultExtensionNs || 'com.intellij';
const body = em[2];
// Children: <(name) ... /> or <(name) ...>...</(name)>
// Use [^>]*? (non-greedy, slash allowed in attr values like path="/x/y")
// so self-closing elements with slashes in attributes still match.
const childRe = /<([\w.-]+)\b([^>]*?)(?:\/\s*>|>([\s\S]*?)<\/\1>)/g;
let cm;
while ((cm = childRe.exec(body)) !== null) {
const childName = cm[1];
const childAttrs = parseAttrs(cm[2]);
extensionDeclarations.push({ namespace: ns, name: childName, attrs: childAttrs });
if (childName === 'themeProvider') {
themeProviders.push({
id: childAttrs.id || '',
path: childAttrs.path || '',
});
}
}
}
// <application-components><component><implementation-class>X</implementation-class></component></application-components>
const applicationComponents = [];
const appCompBlockRe = /<application-components\b[^>]*>([\s\S]*?)<\/application-components>/gi;
let am;
while ((am = appCompBlockRe.exec(xml)) !== null) {
const implRe = /<implementation-class>\s*([\s\S]*?)\s*<\/implementation-class>/g;
let im;
while ((im = implRe.exec(am[1])) !== null) {
applicationComponents.push(decodeEntities(im[1]).trim());
}
}
// <applicationListener topic="X" class="Y"/>
const listeners = [];
const listenerRe = /<applicationListener\b([^/>]*)\/?\s*>/gi;
let lm;
while ((lm = listenerRe.exec(xml)) !== null) {
const attrs = parseAttrs(lm[1]);
listeners.push({
topic: attrs.topic || '',
class: attrs.class || '',
});
}
return {
manifest: {
pluginId,
name,
version,
vendor,
vendorUrl,
sinceBuild,
untilBuild,
depends,
extensionDeclarations,
applicationComponents,
listeners,
themeProviders,
},
warnings,
};
}
/**
* Parse a `META-INF/MANIFEST.MF` file. Simple `Key: Value` line protocol.
* Handles RFC-822 72-char continuation lines (lines starting with space/tab
* are appended to the previous line's value).
*
* @param {string} mfString
* @returns {{mainClass: string|null, premainClass: string|null, implTitle: string|null, implVersion: string|null, premainAttrs: object}}
*/
export function parseManifestMf(mfString) {
const out = {
mainClass: null,
premainClass: null,
implTitle: null,
implVersion: null,
premainAttrs: {},
};
if (typeof mfString !== 'string' || mfString.length === 0) return out;
// Pre-processing
let s = mfString.replace(/^\uFEFF/, '');
s = s.replace(/\r\n?/g, '\n');
// Concatenate continuation lines (lines beginning with a single space or tab).
const rawLines = s.split('\n');
const logical = [];
for (const line of rawLines) {
if (line.length > 0 && (line[0] === ' ' || line[0] === '\t') && logical.length > 0) {
logical[logical.length - 1] += line.slice(1);
} else {
logical.push(line);
}
}
for (const line of logical) {
if (!line || !line.includes(': ')) continue;
const idx = line.indexOf(': ');
const key = line.slice(0, idx).trim();
const value = line.slice(idx + 2);
if (key === 'Main-Class') out.mainClass = value;
else if (key === 'Premain-Class') out.premainClass = value;
else if (key === 'Implementation-Title') out.implTitle = value;
else if (key === 'Implementation-Version') out.implVersion = value;
// Forensic collection of all Premain-* + Agent-* attributes
if (/^(Premain-|Agent-|Boot-Class-Path|Can-)/.test(key)) {
out.premainAttrs[key] = value;
}
}
return out;
}
const NATIVE_BIN_RE = /\.(dll|so|dylib|jnilib|exe)$/i;
const SIGNATURE_FILE_RE = /\.(SF|RSA|DSA|EC)$/;
async function walkFiles(rootDir) {
const out = [];
async function recurse(dir) {
let entries;
try { entries = await readdir(dir, { withFileTypes: true }); } catch { return; }
for (const entry of entries) {
const full = join(dir, entry.name);
if (entry.isDirectory()) await recurse(full);
else if (entry.isFile()) out.push(full);
}
}
await recurse(rootDir);
return out;
}
/**
* Parse an IntelliJ plugin directory layout:
* <pluginRoot>/lib/*.jar — main jar contains META-INF/plugin.xml
*
* @param {string} pluginRoot
* @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>}
*/
export async function parseIntelliJPlugin(pluginRoot) {
if (typeof pluginRoot !== 'string' || !pluginRoot) return null;
const warnings = [];
const libDir = join(pluginRoot, 'lib');
try {
const s = await stat(libDir);
if (!s.isDirectory()) {
warnings.push('IDE-JB-NO-LIB-DIR: lib is not a directory');
return { manifest: null, warnings };
}
} catch {
warnings.push('IDE-JB-NO-LIB-DIR: lib directory missing');
return { manifest: null, warnings };
}
let jarNames;
try {
jarNames = (await readdir(libDir)).filter(n => n.toLowerCase().endsWith('.jar'));
} catch {
warnings.push('IDE-JB-NO-LIB-DIR: cannot read lib');
return { manifest: null, warnings };
}
if (jarNames.length === 0) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jars in lib/');
return { manifest: null, warnings };
}
const extractionRoot = await mkdtemp(join(tmpdir(), 'llmsec-jb-'));
const extractedJars = [];
try {
for (const jarName of jarNames) {
const jarPath = join(libDir, jarName);
try {
const jarBuffer = await readFile(jarPath);
const jarDir = await mkdtemp(join(extractionRoot, 'jar-'));
await extractToDir(jarBuffer, jarDir);
extractedJars.push({ jarPath, jarName, jarDir });
} catch (err) {
warnings.push(`IDE-JB-JAR-EXTRACT: ${jarName}: ${err.message}`);
}
}
if (extractedJars.length === 0) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jars could be extracted');
return { manifest: null, warnings };
}
// Locate main jar: first one containing META-INF/plugin.xml
let mainJar = null;
const mainJarCandidates = [];
for (const ej of extractedJars) {
const xmlPath = join(ej.jarDir, 'META-INF', 'plugin.xml');
if (await pathExists(xmlPath)) {
mainJarCandidates.push(ej);
if (!mainJar) mainJar = ej;
}
}
if (!mainJar) {
warnings.push('IDE-JB-NO-PLUGIN-XML: no jar contains META-INF/plugin.xml');
return { manifest: null, warnings };
}
if (mainJarCandidates.length > 1) {
warnings.push(`IDE-JB-MULTIPLE-PLUGIN-XML: ${mainJarCandidates.length} jars contain plugin.xml; first wins`);
}
// Parse plugin.xml
let pluginXmlResult;
try {
const xmlRaw = await readFile(join(mainJar.jarDir, 'META-INF', 'plugin.xml'), 'utf8');
pluginXmlResult = parsePluginXml(xmlRaw);
} catch (err) {
warnings.push(`IDE-JB-PLUGIN-XML-READ: ${err.message}`);
return { manifest: null, warnings };
}
if (pluginXmlResult.warnings.length) warnings.push(...pluginXmlResult.warnings);
if (!pluginXmlResult.manifest) {
warnings.push('IDE-JB-PLUGIN-XML-PARSE: unparseable plugin.xml');
return { manifest: null, warnings };
}
const px = pluginXmlResult.manifest;
// Parse main jar MANIFEST.MF
let mainMf = { mainClass: null, premainClass: null, implTitle: null, implVersion: null, premainAttrs: {} };
const mainMfPath = join(mainJar.jarDir, 'META-INF', 'MANIFEST.MF');
if (await pathExists(mainMfPath)) {
try {
const mfRaw = await readFile(mainMfPath, 'utf8');
mainMf = parseManifestMf(mfRaw);
} catch (err) {
warnings.push(`IDE-JB-MANIFEST-MF-READ: ${err.message}`);
}
}
// Walk ALL jar-dirs for native binaries
const nativeBinaries = [];
for (const ej of extractedJars) {
const files = await walkFiles(ej.jarDir);
for (const f of files) {
if (NATIVE_BIN_RE.test(f)) {
try {
const buf = await readFile(f);
const s = await stat(f);
nativeBinaries.push({
path: `${ej.jarName}:${f.slice(ej.jarDir.length + 1)}`,
size: s.size,
sha256: createHash('sha256').update(buf).digest('hex'),
});
} catch (err) {
warnings.push(`IDE-JB-NATIVE-READ: ${err.message}`);
}
}
}
}
// Parse every jar's MANIFEST.MF for bundled-jars list
const bundledJars = [];
for (const ej of extractedJars) {
const mfPath = join(ej.jarDir, 'META-INF', 'MANIFEST.MF');
let mf = { implTitle: null, implVersion: null };
if (await pathExists(mfPath)) {
try {
mf = parseManifestMf(await readFile(mfPath, 'utf8'));
} catch {
// fall through with nulls
}
}
bundledJars.push({
name: ej.jarName,
version: mf.implVersion || null,
shaded: !mf.implTitle || !mf.implVersion,
coords: mf.implTitle || null,
});
}
// Signature check on main jar
let hasSignature = false;
try {
const metaInfDir = join(mainJar.jarDir, 'META-INF');
const metaEntries = await readdir(metaInfDir);
hasSignature = metaEntries.some(f => SIGNATURE_FILE_RE.test(f));
} catch { /* no META-INF */ }
const pluginId = px.pluginId || basename(pluginRoot);
const manifest = {
type: 'jetbrains',
id: pluginId.toLowerCase(),
pluginId,
publisher: (px.vendor || '').toLowerCase(),
name: px.name || '',
version: px.version || '',
engines: {},
main: null,
browser: null,
activationEvents: [],
contributes: {},
extensionPack: [],
extensionDependencies: [],
extensionKind: [],
categories: [],
capabilities: {},
scripts: {},
repository: px.vendorUrl || null,
dependencies: {},
hasSignature,
sinceBuild: px.sinceBuild,
untilBuild: px.untilBuild,
depends: px.depends,
extensionDeclarations: px.extensionDeclarations,
applicationComponents: px.applicationComponents,
listeners: px.listeners,
themeProviders: px.themeProviders,
hasPremainClass: Boolean(mainMf.premainClass),
premainClass: mainMf.premainClass || null,
nativeBinaries,
bundledJars,
};
return { manifest, warnings };
} catch (err) {
warnings.push(`IDE-JB-UNCAUGHT: ${err.message}`);
return { manifest: null, warnings };
} finally {
await rm(extractionRoot, { recursive: true, force: true });
}
}