feat(llm-security): URL-fetch support for JetBrains Marketplace (v6.6.0)

This commit is contained in:
Kjell Tore Guttormsen 2026-04-18 10:46:13 +02:00
commit 378e177000
3 changed files with 569 additions and 26 deletions

View file

@ -33,9 +33,14 @@ import {
loadJetBrainsBlocklist,
normalizeId,
} from './lib/ide-extension-data.mjs';
import { fetchVsixFromUrl, detectUrlType } from './lib/vsix-fetch.mjs';
import { fetchVsixFromUrl, fetchPluginFromUrl, detectUrlType } from './lib/vsix-fetch.mjs';
import { extractToDir, ZipError } from './lib/zip-extract.mjs';
import { runVsixWorker } from './lib/vsix-sandbox.mjs';
import {
runVsixWorker,
runPluginWorker,
DEFAULT_VSIX_WORKER_PATH,
DEFAULT_JETBRAINS_WORKER_PATH,
} from './lib/vsix-sandbox.mjs';
import { scan as scanUnicode } from './unicode-scanner.mjs';
import { scan as scanEntropy } from './entropy-scanner.mjs';
@ -132,6 +137,111 @@ async function fetchAndExtractVsixUrl(url, opts = {}) {
}
}
/**
* Generalized URL fetch + extract for JetBrains plugins (and callable for VSIX
* too via `workerKind: 'vsix'`). Uses the generalized `runPluginWorker` from
* `vsix-sandbox.mjs` so both worker kinds share the same sandbox pipeline.
*
* JetBrains-specific differences from the VSIX helper:
* - Worker is `DEFAULT_JETBRAINS_WORKER_PATH` (emits the plugin root under
* <tempDir>, not `<tempDir>/extension`).
* - In-process fallback uses `fetchJetBrainsPlugin` + manual extRoot probe
* mirroring the worker (first child of tempDir with `lib/*.jar`).
*
* Caller MUST `await rm(result.tempDir, { recursive: true, force: true })` in finally.
*
* @param {string} url
* @param {{ useSandbox?: boolean, workerKind?: 'jetbrains'|'vsix' }} [opts]
* @returns {Promise<{ extRoot: string, tempDir: string, source: object, sandbox: 'sandbox-exec'|'bwrap'|null|'in-process' }>}
*/
async function fetchAndExtractPluginUrl(url, opts = {}) {
const useSandbox = opts.useSandbox !== false;
const workerKind = opts.workerKind || 'jetbrains';
const workerPath = workerKind === 'vsix' ? DEFAULT_VSIX_WORKER_PATH : DEFAULT_JETBRAINS_WORKER_PATH;
const tempDir = await mkdtemp(join(tmpdir(), `llm-sec-${workerKind}-`));
try {
if (useSandbox) {
const { ok, sandbox, payload } = await runPluginWorker(
workerPath,
['--url', url, '--tmpdir', tempDir],
tempDir,
);
if (!ok) {
const msg = payload && payload.error ? payload.error : 'worker failed';
throw new Error(msg);
}
const { type: kind, ...sourceMeta } = payload.source;
const source = {
type: 'url',
kind,
url,
finalUrl: payload.finalUrl,
sha256: payload.sha256,
size: payload.size,
sandbox: sandbox || 'none',
...sourceMeta,
};
return { extRoot: payload.extRoot, tempDir, source, sandbox: sandbox || null };
}
// In-process path — used by tests that mock globalThis.fetch.
let fetched;
try {
fetched = await fetchPluginFromUrl(url);
} catch (err) {
throw new Error(`fetch failed: ${err.message}`);
}
try {
await extractToDir(fetched.buffer, tempDir);
} catch (err) {
if (err instanceof ZipError) {
throw new Error(`malformed plugin archive (${err.code}): ${err.message}`);
}
throw err;
}
// JetBrains archives: first child dir containing lib/*.jar is the plugin root.
let extRoot = tempDir;
if (workerKind === 'jetbrains') {
try {
const { readdirSync, statSync } = await import('node:fs');
for (const name of readdirSync(tempDir)) {
const candidate = join(tempDir, name);
try {
if (!statSync(candidate).isDirectory()) continue;
const libDir = join(candidate, 'lib');
if (!statSync(libDir).isDirectory()) continue;
const libEntries = readdirSync(libDir);
if (libEntries.some((n) => n.toLowerCase().endsWith('.jar'))) {
extRoot = candidate;
break;
}
} catch { /* skip */ }
}
} catch { /* fallback to tempDir */ }
} else {
const nested = join(tempDir, 'extension');
if (existsSync(nested)) extRoot = nested;
}
const { type: kind, ...sourceMeta } = fetched.source;
const source = {
type: 'url',
kind,
url,
finalUrl: fetched.finalUrl,
sha256: fetched.sha256,
size: fetched.size,
sandbox: 'in-process',
...sourceMeta,
};
return { extRoot, tempDir, source, sandbox: 'in-process' };
} catch (err) {
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
throw err;
}
}
// ---------------------------------------------------------------------------
// IDE-specific checks (operate on parsed manifest)
// ---------------------------------------------------------------------------
@ -694,13 +804,28 @@ export async function scan(target, options = {}) {
let urlSource = null;
let urlTempDir = null;
// URL mode: fetch VSIX, extract to temp dir, then treat extracted dir as single target.
// URL mode: fetch plugin archive, extract to temp dir, then treat extracted dir as single target.
if (isUrlTarget(target)) {
const detected = detectUrlType(target);
if (detected.type === 'unknown') {
warnings.push(`unsupported URL: ${target} (expected VS Code Marketplace, OpenVSX, or direct .vsix)`);
warnings.push(`unsupported URL: ${target} (expected VS Code Marketplace, OpenVSX, direct .vsix, or plugins.jetbrains.com)`);
} else if (detected.type === 'github') {
warnings.push('GitHub repo URLs are not supported in v6.4.0 — would require build step. Use the Marketplace, OpenVSX, or a direct .vsix link.');
} else if (detected.type === 'jetbrains') {
try {
const fetched = await fetchAndExtractPluginUrl(target, {
useSandbox: options.useSandbox,
workerKind: 'jetbrains',
});
urlSource = fetched.source;
urlTempDir = fetched.tempDir;
target = fetched.extRoot;
if (fetched.sandbox === null && options.useSandbox !== false) {
warnings.push('OS sandbox unavailable on this platform — JetBrains plugin extracted without sandbox-exec/bwrap. Defense-in-depth reduced to in-process zip-extract validation.');
}
} catch (err) {
warnings.push(`URL fetch/extract failed: ${err.message}`);
}
} else {
try {
const fetched = await fetchAndExtractVsixUrl(target, { useSandbox: options.useSandbox });
@ -726,28 +851,64 @@ export async function scan(target, options = {}) {
if (urlFetchFailed) {
// Don't fall through to discovery when the user asked for a specific URL.
} else if (singleTargetPath) {
// Single-directory mode
const parsed = await parseVSCodeExtension(singleTargetPath);
if (!parsed) {
warnings.push(`cannot parse extension at ${singleTargetPath}`);
// Single-directory mode — detect plugin type from layout.
// - `lib/*.jar` subtree → JetBrains plugin (parsed via parseIntelliJPlugin)
// - `package.json` at root → VS Code extension (parsed via parseVSCodeExtension)
// - neither → warn + skip
const hasLibDir = existsSync(join(singleTargetPath, 'lib'));
const hasPackageJson = existsSync(join(singleTargetPath, 'package.json'));
const isJetBrainsLayout = hasLibDir && !hasPackageJson;
if (isJetBrainsLayout) {
const parsed = await parseIntelliJPlugin(singleTargetPath);
if (!parsed || !parsed.manifest) {
warnings.push(`cannot parse JetBrains plugin at ${singleTargetPath}`);
if (parsed && parsed.warnings) warnings.push(...parsed.warnings);
} else {
const m = parsed.manifest;
extensions.push({
id: m.id,
publisher: m.publisher || null,
name: m.name || null,
version: m.version || null,
location: singleTargetPath,
type: 'jetbrains',
source: null,
isBuiltin: false,
installedTimestamp: null,
targetPlatform: null,
publisherDisplayName: null,
signed: false,
rootDir: singleTargetPath,
});
rootsScanned.push(singleTargetPath);
warnings.push(...parsed.warnings);
}
} else if (hasPackageJson) {
const parsed = await parseVSCodeExtension(singleTargetPath);
if (!parsed) {
warnings.push(`cannot parse extension at ${singleTargetPath}`);
} else {
const m = parsed.manifest;
extensions.push({
id: m.id,
publisher: m.publisher,
name: m.name,
version: m.version,
location: singleTargetPath,
type: 'vscode',
source: null,
isBuiltin: false,
installedTimestamp: null,
targetPlatform: null,
publisherDisplayName: null,
signed: m.hasSignature,
rootDir: singleTargetPath,
});
rootsScanned.push(singleTargetPath);
}
} else {
const m = parsed.manifest;
extensions.push({
id: m.id,
publisher: m.publisher,
name: m.name,
version: m.version,
location: singleTargetPath,
type: 'vscode',
source: null,
isBuiltin: false,
installedTimestamp: null,
targetPlatform: null,
publisherDisplayName: null,
signed: m.hasSignature,
rootDir: singleTargetPath,
});
rootsScanned.push(singleTargetPath);
warnings.push(`cannot determine plugin type at ${singleTargetPath} (no package.json, no lib/ dir)`);
}
} else {
// Discovery mode
@ -762,7 +923,9 @@ export async function scan(target, options = {}) {
rootsScanned.push(...vs.rootsScanned);
}
if (!options.vscodeOnly) {
const jb = await discoverJetBrainsExtensions({});
const jb = await discoverJetBrainsExtensions({
rootsOverride: options.rootsOverride,
});
extensions.push(...jb.extensions);
warnings.push(...jb.warnings);
rootsScanned.push(...jb.rootsScanned);

View file

@ -0,0 +1,114 @@
#!/usr/bin/env node
// jetbrains-fetch-worker.mjs — Sub-process worker that fetches a JetBrains
// plugin URL and extracts the downloaded ZIP into a writable directory.
// Mirrors `vsix-fetch-worker.mjs` IPC exactly.
//
// Contract:
// stdin: none
// argv: --url <url> --tmpdir <writable-dir>
// stdout: single JSON line {ok:true, sha256, size, finalUrl, source, extRoot}
// on success, or {ok:false, error:"<msg>", code?:"<ZIP_CODE>"} on failure
// stderr: never (silent — all errors via JSON on stdout)
// exit: 0 on success, 1 on any failure (caller still parses stdout)
//
// Key JetBrains-specific difference from the VSIX worker: plugin archives are
// NOT nested under `extension/`. The top-level entry is usually the plugin
// directory itself — identified by the presence of a `lib/` subdir containing
// at least one `*.jar`. If no such directory is found, fall back to `<tmpdir>`
// itself and let the parser surface a warning.
import { existsSync, readdirSync, statSync } from 'node:fs';
import { join } from 'node:path';
import { fetchJetBrainsPlugin, detectUrlType } from './vsix-fetch.mjs';
import { extractToDir, ZipError } from './zip-extract.mjs';
function emit(obj) {
process.stdout.write(JSON.stringify(obj) + '\n');
}
function parseArgs(argv) {
const out = { url: null, tmpdir: null };
for (let i = 0; i < argv.length; i++) {
if (argv[i] === '--url' && i + 1 < argv.length) out.url = argv[++i];
else if (argv[i] === '--tmpdir' && i + 1 < argv.length) out.tmpdir = argv[++i];
}
return out;
}
// Find the top-level plugin dir inside an extracted JetBrains archive.
// JetBrains plugin zips contain `<plugin-name>/lib/*.jar`. We walk immediate
// children of <tmpdir>; the first child with a `lib/` subdir containing a
// `.jar` file is the plugin root. Fallback: return <tmpdir>.
function findPluginRoot(tmpdir) {
let entries;
try { entries = readdirSync(tmpdir); } catch { return tmpdir; }
for (const name of entries) {
const candidate = join(tmpdir, name);
let s;
try { s = statSync(candidate); } catch { continue; }
if (!s.isDirectory()) continue;
const libDir = join(candidate, 'lib');
let libStat;
try { libStat = statSync(libDir); } catch { continue; }
if (!libStat.isDirectory()) continue;
let libEntries;
try { libEntries = readdirSync(libDir); } catch { continue; }
if (libEntries.some((n) => n.toLowerCase().endsWith('.jar'))) {
return candidate;
}
}
return tmpdir;
}
async function main() {
const { url, tmpdir: dir } = parseArgs(process.argv.slice(2));
if (!url || !dir) {
emit({ ok: false, error: 'missing --url or --tmpdir' });
process.exit(1);
}
const detected = detectUrlType(url);
if (detected.type !== 'jetbrains') {
emit({ ok: false, error: `worker expected JetBrains URL, got type=${detected.type}` });
process.exit(1);
}
let fetched;
try {
fetched = await fetchJetBrainsPlugin({
numericId: detected.numericId,
xmlId: detected.xmlId,
version: detected.version,
});
} catch (err) {
emit({ ok: false, error: `fetch failed: ${err.message}` });
process.exit(1);
}
try {
await extractToDir(fetched.buffer, dir);
} catch (err) {
if (err instanceof ZipError) {
emit({ ok: false, error: `malformed JetBrains plugin (${err.code}): ${err.message}`, code: err.code });
} else {
emit({ ok: false, error: `extract failed: ${err.message}` });
}
process.exit(1);
}
const extRoot = existsSync(dir) ? findPluginRoot(dir) : dir;
emit({
ok: true,
sha256: fetched.sha256,
size: fetched.size,
finalUrl: fetched.finalUrl,
source: fetched.source,
extRoot,
});
process.exit(0);
}
main().catch((err) => {
emit({ ok: false, error: `worker crashed: ${err.message || String(err)}` });
process.exit(1);
});

View file

@ -0,0 +1,266 @@
// jetbrains-fetch.test.mjs — Integration tests for `/security ide-scan <url>`
// with a JetBrains Marketplace URL. Mocks `globalThis.fetch` so we never hit
// real plugins.jetbrains.com endpoints. `useSandbox: false` is required because
// mocks do not cross process boundaries — this mirrors the VSIX test strategy.
//
// Covers:
// 1. Spawned worker emits well-formed JSON when fed a bogus URL (sub-process
// path — no mock, we just assert the IPC contract holds).
// 2. End-to-end `scan()` on a `/plugin/<numericId>-<slug>` URL resolves
// numericId → xmlId via metadata, then downloads + extracts.
// 3. End-to-end `scan()` on a `/plugin/download?pluginId=<xmlId>` URL
// skips the metadata round-trip and downloads directly.
// 4. Network failure / malformed archive bubble up as warnings.
// 5. URL kind discriminator (`meta.source.kind === 'jetbrains'`) distinguishes
// JetBrains plugins from VS Code extensions in the envelope.
//
// See: plan step 12 (`ultraplan-2026-04-17-jetbrains-ide-scan.md`).
import { describe, it, before, after } from 'node:test';
import assert from 'node:assert/strict';
import { spawn } from 'node:child_process';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { resetCounter } from '../../scanners/lib/output.mjs';
import { scan } from '../../scanners/ide-extension-scanner.mjs';
import { createZip } from '../helpers/zip-writer.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const JB_WORKER_PATH = join(
__dirname,
'..',
'..',
'scanners',
'lib',
'jetbrains-fetch-worker.mjs',
);
const realFetch = globalThis.fetch;
function mockBufferResponse(buffer, { status = 200 } = {}) {
const stream = new ReadableStream({
start(controller) { controller.enqueue(buffer); controller.close(); },
});
return new Response(stream, {
status,
headers: { 'content-type': 'application/zip' },
});
}
function jsonResponse(obj, { status = 200 } = {}) {
return new Response(JSON.stringify(obj), {
status,
headers: { 'content-type': 'application/json' },
});
}
function installFetchRouter(routes) {
globalThis.fetch = async (url) => {
const handler = routes(String(url));
if (!handler) throw new Error(`unrouted fetch: ${url}`);
return handler;
};
}
// Build a synthetic JetBrains plugin archive with the layout
// <plugin-name>/lib/<plugin>.jar → containing META-INF/plugin.xml.
// The outer archive is what plugins.jetbrains.com ships; the inner jar is what
// parseIntelliJPlugin walks for the manifest.
function buildBenignJetBrainsArchive() {
const pluginXml = `<?xml version="1.0"?>
<idea-plugin>
<id>com.example.benign</id>
<name>Benign</name>
<version>1.0.0</version>
<vendor>Example</vendor>
</idea-plugin>`;
const innerJar = createZip([
{ name: 'META-INF/plugin.xml', data: pluginXml },
{ name: 'META-INF/MANIFEST.MF', data: 'Manifest-Version: 1.0\n' },
]);
return createZip([
{ name: 'com.example.benign/lib/main.jar', data: innerJar },
]);
}
// ---------------------------------------------------------------------------
// 1. Worker IPC contract
// ---------------------------------------------------------------------------
describe('jetbrains-fetch-worker — IPC contract', () => {
it('emits ok:false JSON on missing args and exits 1', async () => {
const child = spawn('node', [JB_WORKER_PATH], { stdio: ['ignore', 'pipe', 'pipe'] });
let out = '';
child.stdout.on('data', (c) => { out += c.toString('utf8'); });
const code = await new Promise((resolve) => child.on('close', resolve));
assert.equal(code, 1);
const parsed = JSON.parse(out.trim());
assert.equal(parsed.ok, false);
assert.match(parsed.error, /missing --url or --tmpdir/);
});
it('emits ok:false JSON when given a non-JetBrains URL', async () => {
// Reject non-JetBrains URLs at the worker level — defense-in-depth in case
// orchestrator routes a wrong URL to the JB worker.
const child = spawn(
'node',
[JB_WORKER_PATH, '--url', 'https://example.com/x.vsix', '--tmpdir', '/tmp'],
{ stdio: ['ignore', 'pipe', 'pipe'] },
);
let out = '';
child.stdout.on('data', (c) => { out += c.toString('utf8'); });
const code = await new Promise((resolve) => child.on('close', resolve));
assert.equal(code, 1);
const parsed = JSON.parse(out.trim());
assert.equal(parsed.ok, false);
assert.match(parsed.error, /expected JetBrains URL/);
});
});
// ---------------------------------------------------------------------------
// 2. End-to-end scan() with mocked fetch
// ---------------------------------------------------------------------------
describe('ide-extension-scanner — JetBrains URL mode', () => {
before(() => resetCounter());
after(() => { globalThis.fetch = realFetch; });
it('resolves numericId → xmlId via metadata, then downloads + scans', async () => {
const archive = buildBenignJetBrainsArchive();
const calls = [];
installFetchRouter((url) => {
calls.push(url);
if (/\/api\/plugins\/7973$/.test(url)) {
return jsonResponse({ xmlId: 'com.example.benign' });
}
if (/\/plugin\/download\?pluginId=com\.example\.benign/.test(url)) {
return mockBufferResponse(archive);
}
return null;
});
const env = await scan(
'https://plugins.jetbrains.com/plugin/7973-benign',
{ useSandbox: false },
);
// Two fetches: metadata + download.
assert.equal(calls.length, 2, `calls: ${calls.join(', ')}`);
assert.match(calls[0], /\/api\/plugins\/7973/);
assert.match(calls[1], /\/plugin\/download\?pluginId=com\.example\.benign/);
// Envelope shape.
assert.ok(env.meta.source, 'expected meta.source to be set');
assert.equal(env.meta.source.type, 'url');
assert.equal(env.meta.source.kind, 'jetbrains');
assert.equal(env.meta.source.xmlId, 'com.example.benign');
assert.equal(env.meta.source.numericId, '7973');
assert.match(env.meta.source.sha256, /^[a-f0-9]{64}$/);
assert.equal(env.meta.source.sandbox, 'in-process');
assert.equal(env.meta.target, 'https://plugins.jetbrains.com/plugin/7973-benign');
// Scanner parsed the inner plugin.xml and produced exactly one JB extension.
assert.equal(env.extensions.length, 1);
assert.equal(env.extensions[0].type, 'jetbrains');
assert.equal(env.extensions[0].id, 'com.example.benign');
assert.equal(env.extensions[0].version, '1.0.0');
});
it('downloads by xmlId directly (no metadata round-trip)', async () => {
const archive = buildBenignJetBrainsArchive();
let metaCalled = false;
let downloadCalled = false;
installFetchRouter((url) => {
if (/\/api\/plugins\//.test(url)) {
metaCalled = true;
return jsonResponse({ xmlId: 'should.not.be.used' });
}
if (/\/plugin\/download\?pluginId=com\.example\.benign/.test(url)) {
downloadCalled = true;
return mockBufferResponse(archive);
}
return null;
});
const env = await scan(
'https://plugins.jetbrains.com/plugin/download?pluginId=com.example.benign',
{ useSandbox: false },
);
assert.equal(metaCalled, false, 'metadata should not be fetched when xmlId is explicit');
assert.equal(downloadCalled, true);
assert.equal(env.meta.source.kind, 'jetbrains');
assert.equal(env.meta.source.xmlId, 'com.example.benign');
assert.equal(env.extensions.length, 1);
assert.equal(env.extensions[0].type, 'jetbrains');
});
it('passes version query through unchanged', async () => {
const archive = buildBenignJetBrainsArchive();
const calls = [];
installFetchRouter((url) => {
calls.push(url);
if (/\/plugin\/download\?pluginId=com\.example\.benign/.test(url)) {
return mockBufferResponse(archive);
}
return null;
});
await scan(
'https://plugins.jetbrains.com/plugin/download?pluginId=com.example.benign&version=2.3.4',
{ useSandbox: false },
);
assert.equal(calls.length, 1);
assert.match(calls[0], /version=2\.3\.4/);
});
it('reports fetch network failure as a warning, no extensions scanned', async () => {
installFetchRouter(() => { throw new Error('ECONNREFUSED'); });
const env = await scan(
'https://plugins.jetbrains.com/plugin/download?pluginId=com.example.benign',
{ useSandbox: false },
);
assert.equal(env.extensions.length, 0);
assert.ok(
env.meta.warnings.some((w) => /URL fetch\/extract failed/.test(w)),
`warnings: ${env.meta.warnings.join(' | ')}`,
);
});
it('reports malformed archive as a warning, no extensions scanned', async () => {
installFetchRouter((url) => {
if (/\/plugin\/download/.test(url)) {
return mockBufferResponse(Buffer.from('not a zip at all'));
}
return null;
});
const env = await scan(
'https://plugins.jetbrains.com/plugin/download?pluginId=com.example.benign',
{ useSandbox: false },
);
assert.equal(env.extensions.length, 0);
assert.ok(
env.meta.warnings.some((w) => /malformed plugin archive|URL fetch\/extract failed/.test(w)),
`warnings: ${env.meta.warnings.join(' | ')}`,
);
});
it('cannot reach JetBrains path via the VS Code-only toggle', async () => {
// Sanity: --vscode-only should not short-circuit URL fetches, but also
// shouldn't gate on extension type (URL scan fetches regardless of toggle).
const archive = buildBenignJetBrainsArchive();
installFetchRouter((url) => {
if (/\/plugin\/download/.test(url)) return mockBufferResponse(archive);
return null;
});
const env = await scan(
'https://plugins.jetbrains.com/plugin/download?pluginId=com.example.benign',
{ useSandbox: false, vscodeOnly: true },
);
// The URL was explicitly JB; we still scan it.
assert.equal(env.meta.source.kind, 'jetbrains');
assert.equal(env.extensions.length, 1);
assert.equal(env.extensions[0].type, 'jetbrains');
});
});