ktg-plugin-marketplace/plugins/llm-security/scanners/lib/ide-extension-parser.mjs
2026-04-18 10:07:14 +02:00

411 lines
14 KiB
JavaScript

// ide-extension-parser.mjs — Parse IDE extension manifests into a normalized shape.
// Zero dependencies (Node.js builtins only).
//
// Two extension types are supported:
// - type: 'vscode' → parseVSCodeExtension (package.json + contributes)
// - type: 'jetbrains' → parseIntelliJPlugin (plugin.xml + MANIFEST.MF inside JARs)
import { readFile, access } from 'node:fs/promises';
import { join } from 'node:path';
async function pathExists(p) {
try { await access(p); return true; } catch { return false; }
}
/**
* @typedef {object} ParsedManifest
* @property {'vscode'|'jetbrains'} type
*
* // Shared / VS Code fields (kept at top level for backward compat with runIdeChecks)
* @property {string} id
* @property {string} publisher
* @property {string} name
* @property {string} version
* @property {object} engines
* @property {string|null} main
* @property {string|null} browser
* @property {string[]} activationEvents
* @property {object} contributes
* @property {string[]} extensionPack
* @property {string[]} extensionDependencies
* @property {string[]} extensionKind
* @property {string[]} categories
* @property {object} capabilities
* @property {object} scripts
* @property {object|string|null} repository
* @property {object} dependencies
* @property {boolean} hasSignature
*
* // JetBrains-only fields (present only when type === 'jetbrains')
* @property {string} [pluginId]
* @property {string|null} [sinceBuild]
* @property {string|null} [untilBuild]
* @property {Array<{id:string, optional:boolean, configFile:string|null}>} [depends]
* @property {Array<{namespace:string, name:string, attrs:object}>} [extensionDeclarations]
* @property {string[]} [applicationComponents]
* @property {Array<{topic:string, class:string}>} [listeners]
* @property {boolean} [hasPremainClass]
* @property {string|null} [premainClass]
* @property {Array<{path:string, size:number, sha256:string}>} [nativeBinaries]
* @property {Array<{name:string, version:string|null, shaded:boolean, coords:string|null}>} [bundledJars]
* @property {Array<{id:string, path:string}>} [themeProviders]
*/
/**
* Parse a VS Code extension directory.
* @param {string} extRoot - Absolute path to extracted extension root.
* @returns {Promise<{ manifest: ParsedManifest, warnings: string[] } | null>}
*/
export async function parseVSCodeExtension(extRoot) {
const warnings = [];
const pkgPath = join(extRoot, 'package.json');
let raw;
try {
raw = await readFile(pkgPath, 'utf8');
} catch (err) {
return null;
}
let pkg;
try {
pkg = JSON.parse(raw);
} catch (err) {
warnings.push(`malformed package.json at ${pkgPath}: ${err.message}`);
return null;
}
if (!pkg || typeof pkg !== 'object') {
warnings.push(`package.json at ${pkgPath} is not an object`);
return null;
}
const publisher = typeof pkg.publisher === 'string' ? pkg.publisher : '';
const name = typeof pkg.name === 'string' ? pkg.name : '';
const version = typeof pkg.version === 'string' ? pkg.version : '';
if (!publisher || !name) {
warnings.push(`missing publisher/name in ${pkgPath}`);
return null;
}
const hasSignature = await pathExists(join(extRoot, '.signature.p7s'));
const manifest = {
type: 'vscode',
id: `${publisher}.${name}`.toLowerCase(),
publisher: publisher.toLowerCase(),
name: name.toLowerCase(),
version,
engines: pkg.engines && typeof pkg.engines === 'object' ? pkg.engines : {},
main: typeof pkg.main === 'string' ? pkg.main : null,
browser: typeof pkg.browser === 'string' ? pkg.browser : null,
activationEvents: Array.isArray(pkg.activationEvents) ? pkg.activationEvents.filter(e => typeof e === 'string') : [],
contributes: pkg.contributes && typeof pkg.contributes === 'object' ? pkg.contributes : {},
extensionPack: Array.isArray(pkg.extensionPack) ? pkg.extensionPack.filter(e => typeof e === 'string') : [],
extensionDependencies: Array.isArray(pkg.extensionDependencies) ? pkg.extensionDependencies.filter(e => typeof e === 'string') : [],
extensionKind: Array.isArray(pkg.extensionKind) ? pkg.extensionKind.filter(e => typeof e === 'string') : [],
categories: Array.isArray(pkg.categories) ? pkg.categories.filter(c => typeof c === 'string') : [],
capabilities: pkg.capabilities && typeof pkg.capabilities === 'object' ? pkg.capabilities : {},
scripts: pkg.scripts && typeof pkg.scripts === 'object' ? pkg.scripts : {},
repository: pkg.repository || null,
dependencies: pkg.dependencies && typeof pkg.dependencies === 'object' ? pkg.dependencies : {},
hasSignature,
};
return { manifest, warnings };
}
/**
* Parse a .vsix file. Stub — caller extracts first via lib/zip-extract.mjs.
* @param {string} vsixPath
* @throws {Error}
*/
export async function parseVsixFile(vsixPath) {
throw new Error(`VSIX parsing not implemented in library-direct form. Extract manually (unzip ${vsixPath}) and pass the extracted directory.`);
}
// ---------------------------------------------------------------------------
// JetBrains helpers — zero-dep plugin.xml + MANIFEST.MF parsers
// ---------------------------------------------------------------------------
const NAMED_ENTITIES = {
amp: '&', lt: '<', gt: '>', quot: '"', apos: "'",
};
/**
* Decode XML entity references in text content (non-CDATA).
* @param {string} s
* @returns {string}
*/
function decodeEntities(s) {
return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (full, inner) => {
if (inner.startsWith('#x') || inner.startsWith('#X')) {
const cp = parseInt(inner.slice(2), 16);
return Number.isFinite(cp) ? String.fromCodePoint(cp) : full;
}
if (inner.startsWith('#')) {
const cp = parseInt(inner.slice(1), 10);
return Number.isFinite(cp) ? String.fromCodePoint(cp) : full;
}
return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, inner)
? NAMED_ENTITIES[inner]
: full;
});
}
/**
* Capture the first match of a named element. Returns its text content
* (with CDATA honoured, otherwise entity-decoded), or null.
* @param {string} xml
* @param {string} tag
* @returns {string|null}
*/
function firstElementText(xml, tag) {
const re = new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)</${tag}>`, 'i');
const m = xml.match(re);
if (!m) return null;
return normalizeTextContent(m[1]);
}
function normalizeTextContent(raw) {
const trimmed = raw.trim();
if (trimmed.startsWith('<![CDATA[') && trimmed.endsWith(']]>')) {
return trimmed.slice(9, -3);
}
return decodeEntities(trimmed);
}
/**
* Parse a `key="value"` or `key='value'` attribute list.
* @param {string} attrBlob - e.g. ` id="X" path='Y'`
* @returns {Record<string,string>}
*/
function parseAttrs(attrBlob) {
const attrs = {};
if (!attrBlob) return attrs;
const re = /([\w-]+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
let m;
while ((m = re.exec(attrBlob)) !== null) {
attrs[m[1]] = decodeEntities(m[2] !== undefined ? m[2] : m[3]);
}
return attrs;
}
/**
* Parse a JetBrains `plugin.xml` document.
*
* Regex-based, intentionally lenient. Malformed input returns
* `{ manifest: null, warnings: [...] }` rather than throwing.
*
* @param {string} xmlString
* @returns {{ manifest: object|null, warnings: string[] }}
*/
export function parsePluginXml(xmlString) {
const warnings = [];
if (typeof xmlString !== 'string') {
return { manifest: null, warnings: ['input is not a string'] };
}
// --- Pre-processing pipeline ---
let xml = xmlString.replace(/^\uFEFF/, '');
xml = xml.replace(/\r\n?/g, '\n');
xml = xml.replace(/<!--[\s\S]*?-->/g, '');
// Bail on obviously malformed (no <idea-plugin> root and no <id>)
if (!/<idea-plugin\b/i.test(xml) && !/<id\b/i.test(xml)) {
warnings.push('no <idea-plugin> or <id> element found — not a plugin.xml');
return { manifest: null, warnings };
}
// Bail on unbalanced-tag smell: count open vs close for <id> and <name>.
// Both are required non-self-closing elements in plugin.xml, so any mismatch
// signals truncation or malformed input.
const balanceChecks = [
{ open: /<id\b[^>]*>/g, close: /<\/id>/g, tag: 'id' },
{ open: /<name\b[^>]*>/g, close: /<\/name>/g, tag: 'name' },
];
for (const { open, close, tag } of balanceChecks) {
const o = (xml.match(open) || []).length;
const c = (xml.match(close) || []).length;
if (o > 0 && o !== c) {
warnings.push(`unbalanced <${tag}> tags — truncated input`);
return { manifest: null, warnings };
}
}
const safe = (fn, label) => {
try { return fn(); } catch (err) {
warnings.push(`${label}: ${err.message}`);
return null;
}
};
const pluginId = safe(() => firstElementText(xml, 'id'), 'pluginId') || '';
const name = safe(() => firstElementText(xml, 'name'), 'name') || '';
const version = safe(() => firstElementText(xml, 'version'), 'version') || '';
// <vendor url="..." email="...">Name</vendor>
let vendor = '';
let vendorUrl = null;
const vendorMatch = xml.match(/<vendor\b([^>]*)>([\s\S]*?)<\/vendor>/i);
if (vendorMatch) {
const attrs = parseAttrs(vendorMatch[1]);
vendorUrl = attrs.url || null;
vendor = normalizeTextContent(vendorMatch[2]);
}
// <idea-version since-build="A" until-build="B"/>
let sinceBuild = null;
let untilBuild = null;
const ideaVersionMatch = xml.match(/<idea-version\b([^>]*)\/?\s*>/i);
if (ideaVersionMatch) {
const attrs = parseAttrs(ideaVersionMatch[1]);
sinceBuild = attrs['since-build'] || null;
untilBuild = attrs['until-build'] || null;
}
// <depends optional="true" config-file="X">ID</depends>
const depends = [];
const dependsRe = /<depends\b([^>]*)>([\s\S]*?)<\/depends>/gi;
let dm;
while ((dm = dependsRe.exec(xml)) !== null) {
const attrs = parseAttrs(dm[1]);
depends.push({
id: normalizeTextContent(dm[2]),
optional: attrs.optional === 'true',
configFile: attrs['config-file'] || null,
});
}
// <extensions defaultExtensionNs="com.intellij">...<extensions>
const extensionDeclarations = [];
const themeProviders = [];
const extBlockRe = /<extensions\b([^>]*)>([\s\S]*?)<\/extensions>/gi;
let em;
while ((em = extBlockRe.exec(xml)) !== null) {
const attrs = parseAttrs(em[1]);
const ns = attrs.defaultExtensionNs || 'com.intellij';
const body = em[2];
// Children: <(name) ... /> or <(name) ...>...</(name)>
// Use [^>]*? (non-greedy, slash allowed in attr values like path="/x/y")
// so self-closing elements with slashes in attributes still match.
const childRe = /<([\w.-]+)\b([^>]*?)(?:\/\s*>|>([\s\S]*?)<\/\1>)/g;
let cm;
while ((cm = childRe.exec(body)) !== null) {
const childName = cm[1];
const childAttrs = parseAttrs(cm[2]);
extensionDeclarations.push({ namespace: ns, name: childName, attrs: childAttrs });
if (childName === 'themeProvider') {
themeProviders.push({
id: childAttrs.id || '',
path: childAttrs.path || '',
});
}
}
}
// <application-components><component><implementation-class>X</implementation-class></component></application-components>
const applicationComponents = [];
const appCompBlockRe = /<application-components\b[^>]*>([\s\S]*?)<\/application-components>/gi;
let am;
while ((am = appCompBlockRe.exec(xml)) !== null) {
const implRe = /<implementation-class>\s*([\s\S]*?)\s*<\/implementation-class>/g;
let im;
while ((im = implRe.exec(am[1])) !== null) {
applicationComponents.push(decodeEntities(im[1]).trim());
}
}
// <applicationListener topic="X" class="Y"/>
const listeners = [];
const listenerRe = /<applicationListener\b([^/>]*)\/?\s*>/gi;
let lm;
while ((lm = listenerRe.exec(xml)) !== null) {
const attrs = parseAttrs(lm[1]);
listeners.push({
topic: attrs.topic || '',
class: attrs.class || '',
});
}
return {
manifest: {
pluginId,
name,
version,
vendor,
vendorUrl,
sinceBuild,
untilBuild,
depends,
extensionDeclarations,
applicationComponents,
listeners,
themeProviders,
},
warnings,
};
}
/**
* Parse a `META-INF/MANIFEST.MF` file. Simple `Key: Value` line protocol.
* Handles RFC-822 72-char continuation lines (lines starting with space/tab
* are appended to the previous line's value).
*
* @param {string} mfString
* @returns {{mainClass: string|null, premainClass: string|null, implTitle: string|null, implVersion: string|null, premainAttrs: object}}
*/
export function parseManifestMf(mfString) {
const out = {
mainClass: null,
premainClass: null,
implTitle: null,
implVersion: null,
premainAttrs: {},
};
if (typeof mfString !== 'string' || mfString.length === 0) return out;
// Pre-processing
let s = mfString.replace(/^\uFEFF/, '');
s = s.replace(/\r\n?/g, '\n');
// Concatenate continuation lines (lines beginning with a single space or tab).
const rawLines = s.split('\n');
const logical = [];
for (const line of rawLines) {
if (line.length > 0 && (line[0] === ' ' || line[0] === '\t') && logical.length > 0) {
logical[logical.length - 1] += line.slice(1);
} else {
logical.push(line);
}
}
for (const line of logical) {
if (!line || !line.includes(': ')) continue;
const idx = line.indexOf(': ');
const key = line.slice(0, idx).trim();
const value = line.slice(idx + 2);
if (key === 'Main-Class') out.mainClass = value;
else if (key === 'Premain-Class') out.premainClass = value;
else if (key === 'Implementation-Title') out.implTitle = value;
else if (key === 'Implementation-Version') out.implVersion = value;
// Forensic collection of all Premain-* + Agent-* attributes
if (/^(Premain-|Agent-|Boot-Class-Path|Can-)/.test(key)) {
out.premainAttrs[key] = value;
}
}
return out;
}
/**
* Parse an IntelliJ plugin directory. Implemented in Step 6 (v6.6.0).
* Stub preserved until Step 6 lands.
* @param {string} pluginRoot
* @returns {Promise<null>}
*/
export async function parseIntelliJPlugin(pluginRoot) {
return null;
}