feat(llm-security): /security ide-scan <url> — Marketplace/OpenVSX/direct VSIX (v6.4.0)
Pre-installation verification of VS Code extensions via URL — fetch a remote VSIX, extract it in a hardened sandbox, and run the existing IDE scanner pipeline against it. No npm dependencies. Sources: - VS Code Marketplace (publisher.gallery.vsassets.io direct download) - OpenVSX (open-vsx.org official API) - Direct .vsix HTTPS URLs Defenses: - HTTPS-only, TLS verified, manual redirect with per-source host whitelist - 30s total timeout via AbortController - 50MB compressed cap, 500MB uncompressed, 100x expansion ratio - Zero-dep ZIP extractor: zip-slip, absolute paths, drive letters, NUL bytes, symlinks (Unix mode 0xA000), depth limits, ZIP64 rejected, encrypted rejected - SHA-256 streamed during fetch, surfaced in meta.source - Temp dir cleanup in all paths (try/finally) Files: - scanners/lib/vsix-fetch.mjs (HTTPS fetcher, host whitelist, streaming SHA-256) - scanners/lib/zip-extract.mjs (zero-dep parser with hardening caps) - knowledge/marketplace-api-notes.md (endpoint reference) - 3 test files (48 tests added: vsix-fetch, zip-extract, ide-extension-url) Tests: 1296 → 1344 (all green). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
6252e55700
commit
fe0193956d
16 changed files with 1543 additions and 22 deletions
|
|
@ -13,7 +13,9 @@
|
|||
// Library: import { scan, discoverAll } from './ide-extension-scanner.mjs'
|
||||
|
||||
import { resolve, join, relative } from 'node:path';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { writeFileSync, existsSync } from 'node:fs';
|
||||
import { mkdtemp, rm, stat } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { discoverFiles } from './lib/file-discovery.mjs';
|
||||
import { finding, scannerResult } from './lib/output.mjs';
|
||||
|
|
@ -25,6 +27,8 @@ import {
|
|||
} from './lib/ide-extension-discovery.mjs';
|
||||
import { parseVSCodeExtension, parseVsixFile } from './lib/ide-extension-parser.mjs';
|
||||
import { loadTopVSCode, loadVSCodeBlocklist, normalizeId } from './lib/ide-extension-data.mjs';
|
||||
import { fetchVsixFromUrl, detectUrlType } from './lib/vsix-fetch.mjs';
|
||||
import { extractToDir, ZipError } from './lib/zip-extract.mjs';
|
||||
|
||||
import { scan as scanUnicode } from './unicode-scanner.mjs';
|
||||
import { scan as scanEntropy } from './entropy-scanner.mjs';
|
||||
|
|
@ -33,9 +37,66 @@ import { scan as scanTaint } from './taint-tracer.mjs';
|
|||
import { scan as scanMemoryPoisoning } from './memory-poisoning-scanner.mjs';
|
||||
import { scan as scanSupplyChain } from './supply-chain-recheck.mjs';
|
||||
|
||||
const VERSION = '6.3.0';
|
||||
const VERSION = '6.4.0';
|
||||
const SCANNER = 'IDE';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// URL → temp dir orchestration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function isUrlTarget(target) {
|
||||
return typeof target === 'string' && /^https?:\/\//i.test(target);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a VSIX from a URL, extract it to a temp dir, and return the path that
|
||||
* `parseVSCodeExtension` should be pointed at. VSIX layout always nests the
|
||||
* extension under `extension/`.
|
||||
*
|
||||
* Caller MUST `await rm(result.tempDir, { recursive: true, force: true })` in finally.
|
||||
*
|
||||
* @param {string} url
|
||||
* @returns {Promise<{ extRoot: string, tempDir: string, source: object }>}
|
||||
*/
|
||||
async function fetchAndExtractVsixUrl(url) {
|
||||
const tempDir = await mkdtemp(join(tmpdir(), 'llm-sec-vsix-'));
|
||||
try {
|
||||
let fetched;
|
||||
try {
|
||||
fetched = await fetchVsixFromUrl(url);
|
||||
} catch (err) {
|
||||
throw new Error(`fetch failed: ${err.message}`);
|
||||
}
|
||||
try {
|
||||
await extractToDir(fetched.buffer, tempDir);
|
||||
} catch (err) {
|
||||
if (err instanceof ZipError) {
|
||||
throw new Error(`malformed VSIX (${err.code}): ${err.message}`);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// VSIX nests files under `extension/`. If that doesn't exist, fall back to
|
||||
// the temp dir itself (some packagers omit the wrapper).
|
||||
const nested = join(tempDir, 'extension');
|
||||
const extRoot = existsSync(nested) ? nested : tempDir;
|
||||
const { type: kind, ...sourceMeta } = fetched.source;
|
||||
const source = {
|
||||
type: 'url',
|
||||
kind, // 'marketplace' | 'openvsx' | 'vsix'
|
||||
url,
|
||||
finalUrl: fetched.finalUrl,
|
||||
sha256: fetched.sha256,
|
||||
size: fetched.size,
|
||||
...sourceMeta,
|
||||
};
|
||||
return { extRoot, tempDir, source };
|
||||
} catch (err) {
|
||||
// Cleanup on error before propagating.
|
||||
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IDE-specific checks (operate on parsed manifest)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -386,10 +447,38 @@ export async function scan(target, options = {}) {
|
|||
const warnings = [];
|
||||
let extensions = [];
|
||||
let rootsScanned = [];
|
||||
let urlSource = null;
|
||||
let urlTempDir = null;
|
||||
|
||||
const singleTargetPath = target && target !== '.' && target !== 'all' ? resolve(target) : null;
|
||||
// URL mode: fetch VSIX, extract to temp dir, then treat extracted dir as single target.
|
||||
if (isUrlTarget(target)) {
|
||||
const detected = detectUrlType(target);
|
||||
if (detected.type === 'unknown') {
|
||||
warnings.push(`unsupported URL: ${target} (expected VS Code Marketplace, OpenVSX, or direct .vsix)`);
|
||||
} else if (detected.type === 'github') {
|
||||
warnings.push('GitHub repo URLs are not supported in v6.4.0 — would require build step. Use the Marketplace, OpenVSX, or a direct .vsix link.');
|
||||
} else {
|
||||
try {
|
||||
const fetched = await fetchAndExtractVsixUrl(target);
|
||||
urlSource = fetched.source;
|
||||
urlTempDir = fetched.tempDir;
|
||||
target = fetched.extRoot; // forward into single-target path mode
|
||||
} catch (err) {
|
||||
warnings.push(`URL fetch/extract failed: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (singleTargetPath) {
|
||||
const urlFetchFailed = isUrlTarget(target) && !urlSource;
|
||||
const singleTargetPath = target && target !== '.' && target !== 'all' && !isUrlTarget(target)
|
||||
? resolve(target)
|
||||
: null;
|
||||
|
||||
try {
|
||||
|
||||
if (urlFetchFailed) {
|
||||
// Don't fall through to discovery when the user asked for a specific URL.
|
||||
} else if (singleTargetPath) {
|
||||
// Single-directory mode
|
||||
const parsed = await parseVSCodeExtension(singleTargetPath);
|
||||
if (!parsed) {
|
||||
|
|
@ -453,7 +542,7 @@ export async function scan(target, options = {}) {
|
|||
meta: {
|
||||
scanner: 'ide-extension-scanner',
|
||||
version: VERSION,
|
||||
target: singleTargetPath || (target || 'discover-all'),
|
||||
target: urlSource ? urlSource.url : (singleTargetPath || (target || 'discover-all')),
|
||||
timestamp: new Date().toISOString(),
|
||||
node_version: process.version,
|
||||
duration_ms: Date.now() - started,
|
||||
|
|
@ -463,6 +552,7 @@ export async function scan(target, options = {}) {
|
|||
},
|
||||
roots_scanned: rootsScanned,
|
||||
online: options.online === true,
|
||||
source: urlSource,
|
||||
warnings,
|
||||
},
|
||||
extensions: perExt,
|
||||
|
|
@ -476,6 +566,11 @@ export async function scan(target, options = {}) {
|
|||
extensions_warning: warningCount,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
if (urlTempDir) {
|
||||
await rm(urlTempDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -535,7 +630,10 @@ async function main() {
|
|||
console.log(`ide-extension-scanner v${VERSION}
|
||||
Usage: node ide-extension-scanner.mjs [target] [options]
|
||||
|
||||
target: omitted/"."/"all" = discover all installed; path to extracted extension directory = single scan
|
||||
target: omitted/"."/"all" = discover all installed; path to extracted extension directory = single scan;
|
||||
https://marketplace.visualstudio.com/items?itemName=publisher.name = fetch from Marketplace;
|
||||
https://open-vsx.org/extension/publisher/name[/version] = fetch from OpenVSX;
|
||||
https://example.com/path/foo.vsix = direct VSIX download
|
||||
|
||||
Options:
|
||||
--vscode-only Skip JetBrains discovery
|
||||
|
|
|
|||
285
plugins/llm-security/scanners/lib/vsix-fetch.mjs
Normal file
285
plugins/llm-security/scanners/lib/vsix-fetch.mjs
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
// vsix-fetch.mjs — Fetch VSIX packages from VS Code Marketplace, OpenVSX, or direct URL.
|
||||
// Zero dependencies. Streams to memory with strict size cap, computes SHA-256 on the fly.
|
||||
//
|
||||
// Defenses:
|
||||
// - HTTPS only (no plain HTTP, no protocol downgrade on redirects)
|
||||
// - 30s total timeout (network + body)
|
||||
// - 50MB max compressed size (abort streaming when exceeded)
|
||||
// - TLS verification always enabled
|
||||
// - No follow on cross-origin redirects (same registered host only)
|
||||
// - Marketplace endpoint is undocumented but stable; documented in
|
||||
// knowledge/marketplace-api-notes.md.
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
|
||||
const MAX_VSIX_BYTES = 50 * 1024 * 1024; // 50MB
|
||||
const FETCH_TIMEOUT_MS = 30_000;
|
||||
|
||||
const MARKETPLACE_HOSTS = new Set([
|
||||
'marketplace.visualstudio.com',
|
||||
]);
|
||||
const OPENVSX_HOSTS = new Set([
|
||||
'open-vsx.org',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Detect what kind of URL this is.
|
||||
* @param {string} url
|
||||
* @returns {{ type: 'marketplace'|'openvsx'|'vsix'|'github'|'unknown', publisher?: string, name?: string, version?: string }}
|
||||
*/
|
||||
export function detectUrlType(url) {
|
||||
let u;
|
||||
try { u = new URL(url); } catch { return { type: 'unknown' }; }
|
||||
if (u.protocol !== 'https:') return { type: 'unknown' };
|
||||
|
||||
// VS Code Marketplace: items?itemName=publisher.name
|
||||
if (MARKETPLACE_HOSTS.has(u.hostname)) {
|
||||
const itemName = u.searchParams.get('itemName');
|
||||
if (!itemName || !itemName.includes('.')) return { type: 'unknown' };
|
||||
const dot = itemName.indexOf('.');
|
||||
const publisher = itemName.slice(0, dot);
|
||||
const name = itemName.slice(dot + 1);
|
||||
if (!publisher || !name) return { type: 'unknown' };
|
||||
return { type: 'marketplace', publisher, name };
|
||||
}
|
||||
|
||||
// OpenVSX: /extension/{publisher}/{name}[/{version}]
|
||||
if (OPENVSX_HOSTS.has(u.hostname)) {
|
||||
const parts = u.pathname.split('/').filter(Boolean);
|
||||
if (parts[0] !== 'extension' || parts.length < 3) return { type: 'unknown' };
|
||||
const [, publisher, name, version] = parts;
|
||||
return { type: 'openvsx', publisher, name, version: version || null };
|
||||
}
|
||||
|
||||
// GitHub repo (not supported in v6.4.0)
|
||||
if (u.hostname === 'github.com') {
|
||||
return { type: 'github' };
|
||||
}
|
||||
|
||||
// Direct .vsix link
|
||||
if (u.pathname.toLowerCase().endsWith('.vsix')) {
|
||||
return { type: 'vsix' };
|
||||
}
|
||||
|
||||
return { type: 'unknown' };
|
||||
}
|
||||
|
||||
function isAllowedHost(hostname, originalType) {
|
||||
if (originalType === 'marketplace') {
|
||||
// Marketplace API redirects to vsassets cdn (vstmrblob).
|
||||
return MARKETPLACE_HOSTS.has(hostname)
|
||||
|| hostname.endsWith('.gallerycdn.vsassets.io')
|
||||
|| hostname.endsWith('.vsassets.io');
|
||||
}
|
||||
if (originalType === 'openvsx') {
|
||||
return OPENVSX_HOSTS.has(hostname)
|
||||
|| hostname === 'openvsxorg.blob.core.windows.net'
|
||||
|| hostname.endsWith('.openvsx.org');
|
||||
}
|
||||
// Direct vsix: only same host as the original URL (caller enforces).
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream the body of a Response into a Buffer with size cap and SHA-256.
|
||||
* Aborts via the AbortController if cap is exceeded.
|
||||
* @param {Response} res
|
||||
* @param {AbortController} controller
|
||||
* @returns {Promise<{ buffer: Buffer, sha256: string, size: number }>}
|
||||
*/
|
||||
async function readBodyCapped(res, controller) {
|
||||
if (!res.body) throw new Error('response has no body');
|
||||
const hash = createHash('sha256');
|
||||
const chunks = [];
|
||||
let size = 0;
|
||||
const reader = res.body.getReader();
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
if (!value) continue;
|
||||
size += value.byteLength;
|
||||
if (size > MAX_VSIX_BYTES) {
|
||||
try { controller.abort(); } catch {}
|
||||
throw new Error(`VSIX exceeds maximum size (${MAX_VSIX_BYTES} bytes)`);
|
||||
}
|
||||
hash.update(value);
|
||||
chunks.push(Buffer.from(value));
|
||||
}
|
||||
return { buffer: Buffer.concat(chunks), sha256: hash.digest('hex'), size };
|
||||
}
|
||||
|
||||
async function httpsFetch(url, init, originalType) {
|
||||
const u = new URL(url);
|
||||
if (u.protocol !== 'https:') {
|
||||
throw new Error(`refusing non-HTTPS URL: ${url}`);
|
||||
}
|
||||
if (!isAllowedHost(u.hostname, originalType)) {
|
||||
throw new Error(`refusing redirect to disallowed host: ${u.hostname}`);
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
...init,
|
||||
signal: controller.signal,
|
||||
// Manual redirect handling so we can validate every hop.
|
||||
redirect: 'manual',
|
||||
});
|
||||
if (res.status >= 300 && res.status < 400) {
|
||||
const loc = res.headers.get('location');
|
||||
if (!loc) throw new Error(`HTTP ${res.status} without Location header`);
|
||||
const next = new URL(loc, url).toString();
|
||||
// Cap redirect depth via init counter.
|
||||
const depth = (init && init.__depth) || 0;
|
||||
if (depth >= 5) throw new Error('too many redirects');
|
||||
return httpsFetch(next, { ...init, __depth: depth + 1, method: 'GET', body: undefined }, originalType);
|
||||
}
|
||||
if (!res.ok) {
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText} for ${url}`);
|
||||
}
|
||||
const out = await readBodyCapped(res, controller);
|
||||
return { ...out, finalUrl: url };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a VSIX from the VS Code Marketplace by publisher.name.
|
||||
* Uses the undocumented but stable gallery API:
|
||||
* POST https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery
|
||||
* The response includes a download URL; we then GET that.
|
||||
* Falls back to the well-known direct URL pattern if extensionquery is not usable.
|
||||
*
|
||||
* @param {string} publisher
|
||||
* @param {string} name
|
||||
* @returns {Promise<{ buffer: Buffer, sha256: string, size: number, finalUrl: string, source: object }>}
|
||||
*/
|
||||
export async function fetchMarketplaceVsix(publisher, name) {
|
||||
// Direct download URL pattern (well-known, used by `vsce` and `code` itself):
|
||||
// https://{publisher}.gallery.vsassets.io/_apis/public/gallery/publisher/{publisher}/extension/{name}/latest/assetbyname/Microsoft.VisualStudio.Services.VSIXPackage
|
||||
const directUrl =
|
||||
`https://${encodeURIComponent(publisher)}.gallery.vsassets.io` +
|
||||
`/_apis/public/gallery/publisher/${encodeURIComponent(publisher)}` +
|
||||
`/extension/${encodeURIComponent(name)}/latest/assetbyname/Microsoft.VisualStudio.Services.VSIXPackage`;
|
||||
|
||||
const out = await httpsFetch(directUrl, { method: 'GET' }, 'marketplace');
|
||||
return {
|
||||
...out,
|
||||
source: { type: 'marketplace', publisher, name, requestedUrl: directUrl },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a VSIX from OpenVSX. If version is omitted, hits the "latest" endpoint to resolve.
|
||||
* Direct file pattern:
|
||||
* https://open-vsx.org/api/{pub}/{name}/{version}/file/{pub}.{name}-{version}.vsix
|
||||
* Without version we hit:
|
||||
* https://open-vsx.org/api/{pub}/{name}/latest
|
||||
* to resolve, then download.
|
||||
*
|
||||
* @param {string} publisher
|
||||
* @param {string} name
|
||||
* @param {string|null} version
|
||||
*/
|
||||
export async function fetchOpenVsxVsix(publisher, name, version) {
|
||||
let resolvedVersion = version;
|
||||
if (!resolvedVersion) {
|
||||
const meta = await httpsFetch(
|
||||
`https://open-vsx.org/api/${encodeURIComponent(publisher)}/${encodeURIComponent(name)}/latest`,
|
||||
{ method: 'GET', headers: { Accept: 'application/json' } },
|
||||
'openvsx',
|
||||
);
|
||||
let info;
|
||||
try { info = JSON.parse(meta.buffer.toString('utf8')); }
|
||||
catch { throw new Error('OpenVSX returned non-JSON metadata'); }
|
||||
if (!info || typeof info.version !== 'string') {
|
||||
throw new Error('OpenVSX metadata missing version');
|
||||
}
|
||||
resolvedVersion = info.version;
|
||||
}
|
||||
|
||||
const url =
|
||||
`https://open-vsx.org/api/${encodeURIComponent(publisher)}/${encodeURIComponent(name)}` +
|
||||
`/${encodeURIComponent(resolvedVersion)}/file/` +
|
||||
`${encodeURIComponent(publisher)}.${encodeURIComponent(name)}-${encodeURIComponent(resolvedVersion)}.vsix`;
|
||||
|
||||
const out = await httpsFetch(url, { method: 'GET' }, 'openvsx');
|
||||
return {
|
||||
...out,
|
||||
source: { type: 'openvsx', publisher, name, version: resolvedVersion, requestedUrl: url },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a VSIX from a direct URL.
|
||||
* @param {string} url
|
||||
*/
|
||||
export async function fetchDirectVsix(url) {
|
||||
const u = new URL(url);
|
||||
if (u.protocol !== 'https:') {
|
||||
throw new Error('direct VSIX URL must be HTTPS');
|
||||
}
|
||||
// Track host so redirects must stay on the same registered host.
|
||||
const sourceHost = u.hostname;
|
||||
const out = await httpsFetchSameHost(url, sourceHost);
|
||||
return {
|
||||
...out,
|
||||
source: { type: 'vsix', requestedUrl: url },
|
||||
};
|
||||
}
|
||||
|
||||
async function httpsFetchSameHost(url, sourceHost) {
|
||||
const u = new URL(url);
|
||||
if (u.protocol !== 'https:') {
|
||||
throw new Error(`refusing non-HTTPS URL: ${url}`);
|
||||
}
|
||||
if (u.hostname !== sourceHost) {
|
||||
throw new Error(`refusing cross-host redirect: ${u.hostname} != ${sourceHost}`);
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(url, { signal: controller.signal, redirect: 'manual' });
|
||||
if (res.status >= 300 && res.status < 400) {
|
||||
const loc = res.headers.get('location');
|
||||
if (!loc) throw new Error(`HTTP ${res.status} without Location header`);
|
||||
const next = new URL(loc, url).toString();
|
||||
return httpsFetchSameHost(next, sourceHost);
|
||||
}
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status} ${res.statusText} for ${url}`);
|
||||
const out = await readBodyCapped(res, controller);
|
||||
return { ...out, finalUrl: url };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* High-level dispatch. Detects URL type and returns a fetched VSIX.
|
||||
* @param {string} url
|
||||
* @returns {Promise<{ buffer: Buffer, sha256: string, size: number, finalUrl: string, source: object }>}
|
||||
*/
|
||||
export async function fetchVsixFromUrl(url) {
|
||||
const detected = detectUrlType(url);
|
||||
switch (detected.type) {
|
||||
case 'marketplace':
|
||||
return fetchMarketplaceVsix(detected.publisher, detected.name);
|
||||
case 'openvsx':
|
||||
return fetchOpenVsxVsix(detected.publisher, detected.name, detected.version);
|
||||
case 'vsix':
|
||||
return fetchDirectVsix(url);
|
||||
case 'github':
|
||||
throw new Error('GitHub repo URLs are not supported in v6.4.0 (would require build step). Use Marketplace, OpenVSX, or a direct .vsix URL.');
|
||||
default:
|
||||
throw new Error(`unsupported URL: ${url}`);
|
||||
}
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
MAX_VSIX_BYTES,
|
||||
FETCH_TIMEOUT_MS,
|
||||
isAllowedHost,
|
||||
readBodyCapped,
|
||||
};
|
||||
374
plugins/llm-security/scanners/lib/zip-extract.mjs
Normal file
374
plugins/llm-security/scanners/lib/zip-extract.mjs
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
// zip-extract.mjs — Zero-dependency ZIP parser and safe extractor for VSIX files.
|
||||
//
|
||||
// VSIX is a plain ZIP container. We implement the minimum subset needed to:
|
||||
// - Parse the End of Central Directory (EOCD)
|
||||
// - Walk Central Directory headers to enumerate entries
|
||||
// - Read each Local File Header and inflate its data with node:zlib
|
||||
// - Validate every entry name BEFORE creating any file (zip-slip, absolute, symlinks)
|
||||
// - Enforce caps: total entries, total uncompressed bytes, expansion ratio, depth
|
||||
//
|
||||
// Compression methods supported:
|
||||
// 0 STORE (no compression)
|
||||
// 8 DEFLATE (raw deflate, via zlib.createInflateRaw)
|
||||
// Anything else is rejected.
|
||||
//
|
||||
// Spec: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
|
||||
//
|
||||
// SECURITY NOTES
|
||||
// - We resolve every entry path inside `targetDir` and require startsWith(targetDir + sep)
|
||||
// - We reject absolute paths, drive letters, NUL bytes, and `..` segments after normalization
|
||||
// - We reject entries whose external_attr indicates a symbolic link (0xA000 in upper word, Unix)
|
||||
// - We reject ZIP64 entries (we don't claim to support them; >4GB is out of scope for VSIX)
|
||||
// - We reject entries with general purpose bit 0 (encryption) set
|
||||
// - Directories are not extracted from entries — created on-demand from file paths
|
||||
|
||||
import { createInflateRaw } from 'node:zlib';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join, resolve, sep, normalize } from 'node:path';
|
||||
|
||||
const SIG_LFH = 0x04034b50; // Local File Header
|
||||
const SIG_CD = 0x02014b50; // Central Directory
|
||||
const SIG_EOCD = 0x06054b50; // End of Central Directory
|
||||
const SIG_EOCD64_LOC = 0x07064b50; // ZIP64 EOCD locator (presence => reject)
|
||||
const SIG_EOCD64 = 0x06064b50; // ZIP64 EOCD record
|
||||
|
||||
const DEFAULT_CAPS = Object.freeze({
|
||||
maxEntries: 10_000,
|
||||
maxUncompressedBytes: 500 * 1024 * 1024, // 500MB
|
||||
maxExpansionRatio: 100, // sum uncompressed / sum compressed
|
||||
maxDepth: 20,
|
||||
});
|
||||
|
||||
class ZipError extends Error {
|
||||
constructor(message, code = 'ZIP_INVALID') {
|
||||
super(message);
|
||||
this.code = code;
|
||||
this.name = 'ZipError';
|
||||
}
|
||||
}
|
||||
|
||||
function readU16(buf, off) { return buf.readUInt16LE(off); }
|
||||
function readU32(buf, off) { return buf.readUInt32LE(off); }
|
||||
|
||||
/**
|
||||
* Locate the End of Central Directory (EOCD) by scanning backwards from end of buffer.
|
||||
* EOCD is 22 bytes minimum; comment can extend it up to 65557 bytes total.
|
||||
*/
|
||||
function findEOCD(buf) {
|
||||
const minOff = Math.max(0, buf.length - 22 - 0xFFFF);
|
||||
for (let off = buf.length - 22; off >= minOff; off--) {
|
||||
if (readU32(buf, off) === SIG_EOCD) {
|
||||
// Sanity: comment length must fit
|
||||
const commentLen = readU16(buf, off + 20);
|
||||
if (off + 22 + commentLen === buf.length) return off;
|
||||
}
|
||||
}
|
||||
throw new ZipError('EOCD signature not found', 'ZIP_NO_EOCD');
|
||||
}
|
||||
|
||||
function parseEOCD(buf, off) {
|
||||
return {
|
||||
diskNumber: readU16(buf, off + 4),
|
||||
cdDisk: readU16(buf, off + 6),
|
||||
cdEntriesOnDisk: readU16(buf, off + 8),
|
||||
cdEntriesTotal: readU16(buf, off + 10),
|
||||
cdSize: readU32(buf, off + 12),
|
||||
cdOffset: readU32(buf, off + 16),
|
||||
commentLength: readU16(buf, off + 20),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse central directory entries and return a structured list.
|
||||
* @param {Buffer} buf
|
||||
* @returns {Array}
|
||||
*/
|
||||
function parseCentralDirectory(buf) {
|
||||
// Reject ZIP64 (we do not implement it).
|
||||
// Look for ZIP64 EOCD locator in the 20 bytes immediately before EOCD.
|
||||
const eocdOff = findEOCD(buf);
|
||||
if (eocdOff >= 20 && readU32(buf, eocdOff - 20) === SIG_EOCD64_LOC) {
|
||||
throw new ZipError('ZIP64 archives are not supported', 'ZIP_ZIP64');
|
||||
}
|
||||
|
||||
const eocd = parseEOCD(buf, eocdOff);
|
||||
if (eocd.diskNumber !== 0 || eocd.cdDisk !== 0) {
|
||||
throw new ZipError('multi-disk archives are not supported', 'ZIP_MULTIDISK');
|
||||
}
|
||||
if (eocd.cdEntriesOnDisk !== eocd.cdEntriesTotal) {
|
||||
throw new ZipError('split central directory not supported', 'ZIP_SPLIT_CD');
|
||||
}
|
||||
if (eocd.cdOffset === 0xFFFFFFFF || eocd.cdSize === 0xFFFFFFFF) {
|
||||
throw new ZipError('ZIP64 fields detected', 'ZIP_ZIP64');
|
||||
}
|
||||
if (eocd.cdOffset + eocd.cdSize > buf.length) {
|
||||
throw new ZipError('central directory extends past EOF', 'ZIP_BAD_CD_OFFSET');
|
||||
}
|
||||
|
||||
const entries = [];
|
||||
let p = eocd.cdOffset;
|
||||
for (let i = 0; i < eocd.cdEntriesTotal; i++) {
|
||||
if (p + 46 > buf.length) throw new ZipError('truncated central directory', 'ZIP_TRUNCATED_CD');
|
||||
if (readU32(buf, p) !== SIG_CD) throw new ZipError('bad central directory signature', 'ZIP_BAD_CD_SIG');
|
||||
|
||||
const versionMadeBy = readU16(buf, p + 4);
|
||||
const generalFlags = readU16(buf, p + 8);
|
||||
const method = readU16(buf, p + 10);
|
||||
const crc32 = readU32(buf, p + 16);
|
||||
const compSize = readU32(buf, p + 20);
|
||||
const uncompSize = readU32(buf, p + 24);
|
||||
const nameLen = readU16(buf, p + 28);
|
||||
const extraLen = readU16(buf, p + 30);
|
||||
const commentLen = readU16(buf, p + 32);
|
||||
const externalAttr = readU32(buf, p + 38);
|
||||
const lfhOffset = readU32(buf, p + 42);
|
||||
|
||||
if (compSize === 0xFFFFFFFF || uncompSize === 0xFFFFFFFF || lfhOffset === 0xFFFFFFFF) {
|
||||
throw new ZipError('ZIP64 fields detected in entry', 'ZIP_ZIP64');
|
||||
}
|
||||
|
||||
const nameStart = p + 46;
|
||||
if (nameStart + nameLen > buf.length) throw new ZipError('entry name extends past EOF', 'ZIP_BAD_NAME');
|
||||
const rawName = buf.slice(nameStart, nameStart + nameLen).toString('utf8');
|
||||
|
||||
entries.push({
|
||||
versionMadeBy,
|
||||
generalFlags,
|
||||
method,
|
||||
crc32,
|
||||
compSize,
|
||||
uncompSize,
|
||||
nameLen,
|
||||
extraLen,
|
||||
commentLen,
|
||||
externalAttr,
|
||||
lfhOffset,
|
||||
name: rawName,
|
||||
});
|
||||
|
||||
p += 46 + nameLen + extraLen + commentLen;
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate an entry name and return a relative path safe to join with targetDir.
|
||||
* Throws ZipError on any rejected pattern. Returns null for entries that should be skipped (directory entries).
|
||||
*/
|
||||
function validateEntryName(rawName, caps) {
|
||||
if (rawName.length === 0) throw new ZipError('empty entry name', 'ZIP_BAD_NAME');
|
||||
if (rawName.length > 1024) throw new ZipError('entry name exceeds 1024 chars', 'ZIP_BAD_NAME');
|
||||
if (rawName.includes('\u0000')) throw new ZipError('NUL byte in entry name', 'ZIP_BAD_NAME');
|
||||
|
||||
// Directory entries end with '/' — skip; we'll create dirs as needed for files.
|
||||
if (rawName.endsWith('/')) return null;
|
||||
|
||||
// Reject Windows absolute paths (drive letters, UNC) and POSIX absolute paths.
|
||||
if (rawName.startsWith('/') || rawName.startsWith('\\')) {
|
||||
throw new ZipError(`absolute entry path: ${rawName}`, 'ZIP_ABSOLUTE');
|
||||
}
|
||||
if (/^[A-Za-z]:[\\/]/.test(rawName)) {
|
||||
throw new ZipError(`drive-letter entry path: ${rawName}`, 'ZIP_ABSOLUTE');
|
||||
}
|
||||
|
||||
// Normalize: convert backslashes to forward slashes (some zip tools emit \).
|
||||
const unixName = rawName.replace(/\\/g, '/');
|
||||
|
||||
// Reject any path component equal to '..' — even after normalization it must escape.
|
||||
const parts = unixName.split('/');
|
||||
for (const part of parts) {
|
||||
if (part === '..') {
|
||||
throw new ZipError(`parent traversal in entry: ${rawName}`, 'ZIP_TRAVERSAL');
|
||||
}
|
||||
}
|
||||
|
||||
// Final path normalization (collapses '.' segments, NFC).
|
||||
const normalized = normalize(unixName).normalize('NFC');
|
||||
if (normalized.startsWith('..') || normalized.includes(`${sep}..${sep}`) || normalized === '..') {
|
||||
throw new ZipError(`parent traversal after normalization: ${rawName}`, 'ZIP_TRAVERSAL');
|
||||
}
|
||||
if (normalized.split(sep).length > caps.maxDepth) {
|
||||
throw new ZipError(`entry path exceeds depth ${caps.maxDepth}: ${rawName}`, 'ZIP_DEEP');
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect symlink entries from the external_attr field.
|
||||
* For Unix-made entries (versionMadeBy upper byte = 3), the high 16 bits are mode bits.
|
||||
* Symlink mode is 0xA000.
|
||||
*/
|
||||
function isSymlink(entry) {
|
||||
const madeByOs = (entry.versionMadeBy >>> 8) & 0xFF;
|
||||
if (madeByOs !== 3) return false; // Only Unix encodes mode bits
|
||||
const mode = (entry.externalAttr >>> 16) & 0xFFFF;
|
||||
return (mode & 0xF000) === 0xA000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inflate raw deflate bytes via node:zlib, with a hard upper bound on output size.
|
||||
* Aborts as soon as output exceeds maxBytes (zip-bomb defense).
|
||||
*/
|
||||
function inflateBounded(input, expectedSize, maxBytes) {
|
||||
return new Promise((resolvePromise, reject) => {
|
||||
const stream = createInflateRaw();
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
let aborted = false;
|
||||
|
||||
stream.on('data', chunk => {
|
||||
if (aborted) return;
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
aborted = true;
|
||||
stream.destroy(new ZipError(`inflate exceeds cap (${maxBytes} bytes)`, 'ZIP_BOMB'));
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
stream.on('end', () => {
|
||||
if (aborted) return;
|
||||
if (total !== expectedSize) {
|
||||
reject(new ZipError(
|
||||
`inflated size ${total} does not match expected ${expectedSize}`,
|
||||
'ZIP_SIZE_MISMATCH',
|
||||
));
|
||||
return;
|
||||
}
|
||||
resolvePromise(Buffer.concat(chunks, total));
|
||||
});
|
||||
stream.on('error', err => {
|
||||
if (err instanceof ZipError) reject(err);
|
||||
else reject(new ZipError(`inflate failed: ${err.message}`, 'ZIP_INFLATE'));
|
||||
});
|
||||
|
||||
stream.end(input);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the data section of one entry given its central directory record.
|
||||
* @param {Buffer} buf
|
||||
* @param {object} entry
|
||||
* @param {number} remainingBudget - max bytes we may still inflate this archive
|
||||
* @returns {Promise<Buffer>}
|
||||
*/
|
||||
async function readEntryData(buf, entry, remainingBudget) {
|
||||
const lfhOff = entry.lfhOffset;
|
||||
if (lfhOff + 30 > buf.length) throw new ZipError('LFH past EOF', 'ZIP_BAD_LFH');
|
||||
if (readU32(buf, lfhOff) !== SIG_LFH) throw new ZipError('bad LFH signature', 'ZIP_BAD_LFH');
|
||||
|
||||
const lfhNameLen = readU16(buf, lfhOff + 26);
|
||||
const lfhExtraLen = readU16(buf, lfhOff + 28);
|
||||
const dataStart = lfhOff + 30 + lfhNameLen + lfhExtraLen;
|
||||
const dataEnd = dataStart + entry.compSize;
|
||||
if (dataEnd > buf.length) throw new ZipError('entry data past EOF', 'ZIP_BAD_DATA');
|
||||
|
||||
const compressed = buf.slice(dataStart, dataEnd);
|
||||
const cap = Math.min(entry.uncompSize, remainingBudget);
|
||||
|
||||
if (entry.method === 0) {
|
||||
if (entry.compSize !== entry.uncompSize) {
|
||||
throw new ZipError('STORED entry compSize != uncompSize', 'ZIP_BAD_STORED');
|
||||
}
|
||||
if (entry.uncompSize > remainingBudget) {
|
||||
throw new ZipError('STORED entry exceeds budget', 'ZIP_BOMB');
|
||||
}
|
||||
return compressed;
|
||||
}
|
||||
if (entry.method === 8) {
|
||||
return inflateBounded(compressed, entry.uncompSize, cap);
|
||||
}
|
||||
throw new ZipError(`unsupported compression method ${entry.method}`, 'ZIP_BAD_METHOD');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a VSIX/ZIP buffer to targetDir with all caps and validations applied.
|
||||
* targetDir must already exist or be creatable; caller owns cleanup.
|
||||
*
|
||||
* @param {Buffer} buf - The full ZIP buffer
|
||||
* @param {string} targetDir - Absolute path to extract into
|
||||
* @param {object} [opts]
|
||||
* @param {object} [opts.caps] - Override default caps
|
||||
* @returns {Promise<{ entries: number, bytes: number, files: string[] }>}
|
||||
*/
|
||||
export async function extractToDir(buf, targetDir, opts = {}) {
|
||||
const caps = { ...DEFAULT_CAPS, ...(opts.caps || {}) };
|
||||
const absTarget = resolve(targetDir);
|
||||
await mkdir(absTarget, { recursive: true });
|
||||
|
||||
const entries = parseCentralDirectory(buf);
|
||||
if (entries.length > caps.maxEntries) {
|
||||
throw new ZipError(`too many entries (${entries.length} > ${caps.maxEntries})`, 'ZIP_TOO_MANY_ENTRIES');
|
||||
}
|
||||
|
||||
let totalUncomp = 0;
|
||||
let totalComp = 0;
|
||||
const files = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
if (entry.generalFlags & 0x0001) {
|
||||
throw new ZipError(`encrypted entry not allowed: ${entry.name}`, 'ZIP_ENCRYPTED');
|
||||
}
|
||||
if (isSymlink(entry)) {
|
||||
throw new ZipError(`symlink entry not allowed: ${entry.name}`, 'ZIP_SYMLINK');
|
||||
}
|
||||
|
||||
const safeName = validateEntryName(entry.name, caps);
|
||||
if (safeName === null) continue; // directory entry
|
||||
|
||||
const fullPath = join(absTarget, safeName);
|
||||
const resolved = resolve(fullPath);
|
||||
if (resolved !== absTarget && !resolved.startsWith(absTarget + sep)) {
|
||||
throw new ZipError(`zip-slip: ${entry.name} resolves outside target`, 'ZIP_TRAVERSAL');
|
||||
}
|
||||
|
||||
if (entry.uncompSize > caps.maxUncompressedBytes) {
|
||||
throw new ZipError(`entry ${entry.name} exceeds maxUncompressedBytes`, 'ZIP_BOMB');
|
||||
}
|
||||
const remaining = caps.maxUncompressedBytes - totalUncomp;
|
||||
const data = await readEntryData(buf, entry, remaining);
|
||||
|
||||
totalUncomp += data.length;
|
||||
totalComp += Math.max(1, entry.compSize); // avoid div-by-zero in ratio
|
||||
if (totalUncomp > caps.maxUncompressedBytes) {
|
||||
throw new ZipError(`total uncompressed exceeds cap`, 'ZIP_BOMB');
|
||||
}
|
||||
if (totalUncomp / totalComp > caps.maxExpansionRatio) {
|
||||
throw new ZipError(
|
||||
`expansion ratio exceeds ${caps.maxExpansionRatio}x (${totalUncomp}/${totalComp})`,
|
||||
'ZIP_BOMB',
|
||||
);
|
||||
}
|
||||
|
||||
await mkdir(dirname(resolved), { recursive: true });
|
||||
await writeFile(resolved, data);
|
||||
files.push(safeName);
|
||||
}
|
||||
|
||||
return { entries: files.length, bytes: totalUncomp, files };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse central directory only — no extraction. Useful for inspection / tests.
|
||||
*/
|
||||
export function listEntries(buf) {
|
||||
return parseCentralDirectory(buf).map(e => ({
|
||||
name: e.name,
|
||||
method: e.method,
|
||||
compSize: e.compSize,
|
||||
uncompSize: e.uncompSize,
|
||||
isDir: e.name.endsWith('/'),
|
||||
isSymlink: isSymlink(e),
|
||||
}));
|
||||
}
|
||||
|
||||
export { ZipError };
|
||||
export const __testing = {
|
||||
DEFAULT_CAPS,
|
||||
validateEntryName,
|
||||
isSymlink,
|
||||
parseCentralDirectory,
|
||||
inflateBounded,
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue