Pre-installation verification of VS Code extensions via URL — fetch a remote VSIX, extract it in a hardened sandbox, and run the existing IDE scanner pipeline against it. No npm dependencies. Sources: - VS Code Marketplace (publisher.gallery.vsassets.io direct download) - OpenVSX (open-vsx.org official API) - Direct .vsix HTTPS URLs Defenses: - HTTPS-only, TLS verified, manual redirect with per-source host whitelist - 30s total timeout via AbortController - 50MB compressed cap, 500MB uncompressed, 100x expansion ratio - Zero-dep ZIP extractor: zip-slip, absolute paths, drive letters, NUL bytes, symlinks (Unix mode 0xA000), depth limits, ZIP64 rejected, encrypted rejected - SHA-256 streamed during fetch, surfaced in meta.source - Temp dir cleanup in all paths (try/finally) Files: - scanners/lib/vsix-fetch.mjs (HTTPS fetcher, host whitelist, streaming SHA-256) - scanners/lib/zip-extract.mjs (zero-dep parser with hardening caps) - knowledge/marketplace-api-notes.md (endpoint reference) - 3 test files (48 tests added: vsix-fetch, zip-extract, ide-extension-url) Tests: 1296 → 1344 (all green). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
374 lines
13 KiB
JavaScript
374 lines
13 KiB
JavaScript
// zip-extract.mjs — Zero-dependency ZIP parser and safe extractor for VSIX files.
|
|
//
|
|
// VSIX is a plain ZIP container. We implement the minimum subset needed to:
|
|
// - Parse the End of Central Directory (EOCD)
|
|
// - Walk Central Directory headers to enumerate entries
|
|
// - Read each Local File Header and inflate its data with node:zlib
|
|
// - Validate every entry name BEFORE creating any file (zip-slip, absolute, symlinks)
|
|
// - Enforce caps: total entries, total uncompressed bytes, expansion ratio, depth
|
|
//
|
|
// Compression methods supported:
|
|
// 0 STORE (no compression)
|
|
// 8 DEFLATE (raw deflate, via zlib.createInflateRaw)
|
|
// Anything else is rejected.
|
|
//
|
|
// Spec: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
|
|
//
|
|
// SECURITY NOTES
|
|
// - We resolve every entry path inside `targetDir` and require startsWith(targetDir + sep)
|
|
// - We reject absolute paths, drive letters, NUL bytes, and `..` segments after normalization
|
|
// - We reject entries whose external_attr indicates a symbolic link (0xA000 in upper word, Unix)
|
|
// - We reject ZIP64 entries (we don't claim to support them; >4GB is out of scope for VSIX)
|
|
// - We reject entries with general purpose bit 0 (encryption) set
|
|
// - Directories are not extracted from entries — created on-demand from file paths
|
|
|
|
import { createInflateRaw } from 'node:zlib';
|
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
import { dirname, join, resolve, sep, normalize } from 'node:path';
|
|
|
|
const SIG_LFH = 0x04034b50; // Local File Header
|
|
const SIG_CD = 0x02014b50; // Central Directory
|
|
const SIG_EOCD = 0x06054b50; // End of Central Directory
|
|
const SIG_EOCD64_LOC = 0x07064b50; // ZIP64 EOCD locator (presence => reject)
|
|
const SIG_EOCD64 = 0x06064b50; // ZIP64 EOCD record
|
|
|
|
const DEFAULT_CAPS = Object.freeze({
|
|
maxEntries: 10_000,
|
|
maxUncompressedBytes: 500 * 1024 * 1024, // 500MB
|
|
maxExpansionRatio: 100, // sum uncompressed / sum compressed
|
|
maxDepth: 20,
|
|
});
|
|
|
|
class ZipError extends Error {
|
|
constructor(message, code = 'ZIP_INVALID') {
|
|
super(message);
|
|
this.code = code;
|
|
this.name = 'ZipError';
|
|
}
|
|
}
|
|
|
|
function readU16(buf, off) { return buf.readUInt16LE(off); }
|
|
function readU32(buf, off) { return buf.readUInt32LE(off); }
|
|
|
|
/**
|
|
* Locate the End of Central Directory (EOCD) by scanning backwards from end of buffer.
|
|
* EOCD is 22 bytes minimum; comment can extend it up to 65557 bytes total.
|
|
*/
|
|
function findEOCD(buf) {
|
|
const minOff = Math.max(0, buf.length - 22 - 0xFFFF);
|
|
for (let off = buf.length - 22; off >= minOff; off--) {
|
|
if (readU32(buf, off) === SIG_EOCD) {
|
|
// Sanity: comment length must fit
|
|
const commentLen = readU16(buf, off + 20);
|
|
if (off + 22 + commentLen === buf.length) return off;
|
|
}
|
|
}
|
|
throw new ZipError('EOCD signature not found', 'ZIP_NO_EOCD');
|
|
}
|
|
|
|
function parseEOCD(buf, off) {
|
|
return {
|
|
diskNumber: readU16(buf, off + 4),
|
|
cdDisk: readU16(buf, off + 6),
|
|
cdEntriesOnDisk: readU16(buf, off + 8),
|
|
cdEntriesTotal: readU16(buf, off + 10),
|
|
cdSize: readU32(buf, off + 12),
|
|
cdOffset: readU32(buf, off + 16),
|
|
commentLength: readU16(buf, off + 20),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse central directory entries and return a structured list.
|
|
* @param {Buffer} buf
|
|
* @returns {Array}
|
|
*/
|
|
function parseCentralDirectory(buf) {
|
|
// Reject ZIP64 (we do not implement it).
|
|
// Look for ZIP64 EOCD locator in the 20 bytes immediately before EOCD.
|
|
const eocdOff = findEOCD(buf);
|
|
if (eocdOff >= 20 && readU32(buf, eocdOff - 20) === SIG_EOCD64_LOC) {
|
|
throw new ZipError('ZIP64 archives are not supported', 'ZIP_ZIP64');
|
|
}
|
|
|
|
const eocd = parseEOCD(buf, eocdOff);
|
|
if (eocd.diskNumber !== 0 || eocd.cdDisk !== 0) {
|
|
throw new ZipError('multi-disk archives are not supported', 'ZIP_MULTIDISK');
|
|
}
|
|
if (eocd.cdEntriesOnDisk !== eocd.cdEntriesTotal) {
|
|
throw new ZipError('split central directory not supported', 'ZIP_SPLIT_CD');
|
|
}
|
|
if (eocd.cdOffset === 0xFFFFFFFF || eocd.cdSize === 0xFFFFFFFF) {
|
|
throw new ZipError('ZIP64 fields detected', 'ZIP_ZIP64');
|
|
}
|
|
if (eocd.cdOffset + eocd.cdSize > buf.length) {
|
|
throw new ZipError('central directory extends past EOF', 'ZIP_BAD_CD_OFFSET');
|
|
}
|
|
|
|
const entries = [];
|
|
let p = eocd.cdOffset;
|
|
for (let i = 0; i < eocd.cdEntriesTotal; i++) {
|
|
if (p + 46 > buf.length) throw new ZipError('truncated central directory', 'ZIP_TRUNCATED_CD');
|
|
if (readU32(buf, p) !== SIG_CD) throw new ZipError('bad central directory signature', 'ZIP_BAD_CD_SIG');
|
|
|
|
const versionMadeBy = readU16(buf, p + 4);
|
|
const generalFlags = readU16(buf, p + 8);
|
|
const method = readU16(buf, p + 10);
|
|
const crc32 = readU32(buf, p + 16);
|
|
const compSize = readU32(buf, p + 20);
|
|
const uncompSize = readU32(buf, p + 24);
|
|
const nameLen = readU16(buf, p + 28);
|
|
const extraLen = readU16(buf, p + 30);
|
|
const commentLen = readU16(buf, p + 32);
|
|
const externalAttr = readU32(buf, p + 38);
|
|
const lfhOffset = readU32(buf, p + 42);
|
|
|
|
if (compSize === 0xFFFFFFFF || uncompSize === 0xFFFFFFFF || lfhOffset === 0xFFFFFFFF) {
|
|
throw new ZipError('ZIP64 fields detected in entry', 'ZIP_ZIP64');
|
|
}
|
|
|
|
const nameStart = p + 46;
|
|
if (nameStart + nameLen > buf.length) throw new ZipError('entry name extends past EOF', 'ZIP_BAD_NAME');
|
|
const rawName = buf.slice(nameStart, nameStart + nameLen).toString('utf8');
|
|
|
|
entries.push({
|
|
versionMadeBy,
|
|
generalFlags,
|
|
method,
|
|
crc32,
|
|
compSize,
|
|
uncompSize,
|
|
nameLen,
|
|
extraLen,
|
|
commentLen,
|
|
externalAttr,
|
|
lfhOffset,
|
|
name: rawName,
|
|
});
|
|
|
|
p += 46 + nameLen + extraLen + commentLen;
|
|
}
|
|
return entries;
|
|
}
|
|
|
|
/**
|
|
* Validate an entry name and return a relative path safe to join with targetDir.
|
|
* Throws ZipError on any rejected pattern. Returns null for entries that should be skipped (directory entries).
|
|
*/
|
|
function validateEntryName(rawName, caps) {
|
|
if (rawName.length === 0) throw new ZipError('empty entry name', 'ZIP_BAD_NAME');
|
|
if (rawName.length > 1024) throw new ZipError('entry name exceeds 1024 chars', 'ZIP_BAD_NAME');
|
|
if (rawName.includes('\u0000')) throw new ZipError('NUL byte in entry name', 'ZIP_BAD_NAME');
|
|
|
|
// Directory entries end with '/' — skip; we'll create dirs as needed for files.
|
|
if (rawName.endsWith('/')) return null;
|
|
|
|
// Reject Windows absolute paths (drive letters, UNC) and POSIX absolute paths.
|
|
if (rawName.startsWith('/') || rawName.startsWith('\\')) {
|
|
throw new ZipError(`absolute entry path: ${rawName}`, 'ZIP_ABSOLUTE');
|
|
}
|
|
if (/^[A-Za-z]:[\\/]/.test(rawName)) {
|
|
throw new ZipError(`drive-letter entry path: ${rawName}`, 'ZIP_ABSOLUTE');
|
|
}
|
|
|
|
// Normalize: convert backslashes to forward slashes (some zip tools emit \).
|
|
const unixName = rawName.replace(/\\/g, '/');
|
|
|
|
// Reject any path component equal to '..' — even after normalization it must escape.
|
|
const parts = unixName.split('/');
|
|
for (const part of parts) {
|
|
if (part === '..') {
|
|
throw new ZipError(`parent traversal in entry: ${rawName}`, 'ZIP_TRAVERSAL');
|
|
}
|
|
}
|
|
|
|
// Final path normalization (collapses '.' segments, NFC).
|
|
const normalized = normalize(unixName).normalize('NFC');
|
|
if (normalized.startsWith('..') || normalized.includes(`${sep}..${sep}`) || normalized === '..') {
|
|
throw new ZipError(`parent traversal after normalization: ${rawName}`, 'ZIP_TRAVERSAL');
|
|
}
|
|
if (normalized.split(sep).length > caps.maxDepth) {
|
|
throw new ZipError(`entry path exceeds depth ${caps.maxDepth}: ${rawName}`, 'ZIP_DEEP');
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
/**
|
|
* Detect symlink entries from the external_attr field.
|
|
* For Unix-made entries (versionMadeBy upper byte = 3), the high 16 bits are mode bits.
|
|
* Symlink mode is 0xA000.
|
|
*/
|
|
function isSymlink(entry) {
|
|
const madeByOs = (entry.versionMadeBy >>> 8) & 0xFF;
|
|
if (madeByOs !== 3) return false; // Only Unix encodes mode bits
|
|
const mode = (entry.externalAttr >>> 16) & 0xFFFF;
|
|
return (mode & 0xF000) === 0xA000;
|
|
}
|
|
|
|
/**
|
|
* Inflate raw deflate bytes via node:zlib, with a hard upper bound on output size.
|
|
* Aborts as soon as output exceeds maxBytes (zip-bomb defense).
|
|
*/
|
|
function inflateBounded(input, expectedSize, maxBytes) {
|
|
return new Promise((resolvePromise, reject) => {
|
|
const stream = createInflateRaw();
|
|
const chunks = [];
|
|
let total = 0;
|
|
let aborted = false;
|
|
|
|
stream.on('data', chunk => {
|
|
if (aborted) return;
|
|
total += chunk.length;
|
|
if (total > maxBytes) {
|
|
aborted = true;
|
|
stream.destroy(new ZipError(`inflate exceeds cap (${maxBytes} bytes)`, 'ZIP_BOMB'));
|
|
return;
|
|
}
|
|
chunks.push(chunk);
|
|
});
|
|
stream.on('end', () => {
|
|
if (aborted) return;
|
|
if (total !== expectedSize) {
|
|
reject(new ZipError(
|
|
`inflated size ${total} does not match expected ${expectedSize}`,
|
|
'ZIP_SIZE_MISMATCH',
|
|
));
|
|
return;
|
|
}
|
|
resolvePromise(Buffer.concat(chunks, total));
|
|
});
|
|
stream.on('error', err => {
|
|
if (err instanceof ZipError) reject(err);
|
|
else reject(new ZipError(`inflate failed: ${err.message}`, 'ZIP_INFLATE'));
|
|
});
|
|
|
|
stream.end(input);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Read the data section of one entry given its central directory record.
|
|
* @param {Buffer} buf
|
|
* @param {object} entry
|
|
* @param {number} remainingBudget - max bytes we may still inflate this archive
|
|
* @returns {Promise<Buffer>}
|
|
*/
|
|
async function readEntryData(buf, entry, remainingBudget) {
|
|
const lfhOff = entry.lfhOffset;
|
|
if (lfhOff + 30 > buf.length) throw new ZipError('LFH past EOF', 'ZIP_BAD_LFH');
|
|
if (readU32(buf, lfhOff) !== SIG_LFH) throw new ZipError('bad LFH signature', 'ZIP_BAD_LFH');
|
|
|
|
const lfhNameLen = readU16(buf, lfhOff + 26);
|
|
const lfhExtraLen = readU16(buf, lfhOff + 28);
|
|
const dataStart = lfhOff + 30 + lfhNameLen + lfhExtraLen;
|
|
const dataEnd = dataStart + entry.compSize;
|
|
if (dataEnd > buf.length) throw new ZipError('entry data past EOF', 'ZIP_BAD_DATA');
|
|
|
|
const compressed = buf.slice(dataStart, dataEnd);
|
|
const cap = Math.min(entry.uncompSize, remainingBudget);
|
|
|
|
if (entry.method === 0) {
|
|
if (entry.compSize !== entry.uncompSize) {
|
|
throw new ZipError('STORED entry compSize != uncompSize', 'ZIP_BAD_STORED');
|
|
}
|
|
if (entry.uncompSize > remainingBudget) {
|
|
throw new ZipError('STORED entry exceeds budget', 'ZIP_BOMB');
|
|
}
|
|
return compressed;
|
|
}
|
|
if (entry.method === 8) {
|
|
return inflateBounded(compressed, entry.uncompSize, cap);
|
|
}
|
|
throw new ZipError(`unsupported compression method ${entry.method}`, 'ZIP_BAD_METHOD');
|
|
}
|
|
|
|
/**
|
|
* Extract a VSIX/ZIP buffer to targetDir with all caps and validations applied.
|
|
* targetDir must already exist or be creatable; caller owns cleanup.
|
|
*
|
|
* @param {Buffer} buf - The full ZIP buffer
|
|
* @param {string} targetDir - Absolute path to extract into
|
|
* @param {object} [opts]
|
|
* @param {object} [opts.caps] - Override default caps
|
|
* @returns {Promise<{ entries: number, bytes: number, files: string[] }>}
|
|
*/
|
|
export async function extractToDir(buf, targetDir, opts = {}) {
|
|
const caps = { ...DEFAULT_CAPS, ...(opts.caps || {}) };
|
|
const absTarget = resolve(targetDir);
|
|
await mkdir(absTarget, { recursive: true });
|
|
|
|
const entries = parseCentralDirectory(buf);
|
|
if (entries.length > caps.maxEntries) {
|
|
throw new ZipError(`too many entries (${entries.length} > ${caps.maxEntries})`, 'ZIP_TOO_MANY_ENTRIES');
|
|
}
|
|
|
|
let totalUncomp = 0;
|
|
let totalComp = 0;
|
|
const files = [];
|
|
|
|
for (const entry of entries) {
|
|
if (entry.generalFlags & 0x0001) {
|
|
throw new ZipError(`encrypted entry not allowed: ${entry.name}`, 'ZIP_ENCRYPTED');
|
|
}
|
|
if (isSymlink(entry)) {
|
|
throw new ZipError(`symlink entry not allowed: ${entry.name}`, 'ZIP_SYMLINK');
|
|
}
|
|
|
|
const safeName = validateEntryName(entry.name, caps);
|
|
if (safeName === null) continue; // directory entry
|
|
|
|
const fullPath = join(absTarget, safeName);
|
|
const resolved = resolve(fullPath);
|
|
if (resolved !== absTarget && !resolved.startsWith(absTarget + sep)) {
|
|
throw new ZipError(`zip-slip: ${entry.name} resolves outside target`, 'ZIP_TRAVERSAL');
|
|
}
|
|
|
|
if (entry.uncompSize > caps.maxUncompressedBytes) {
|
|
throw new ZipError(`entry ${entry.name} exceeds maxUncompressedBytes`, 'ZIP_BOMB');
|
|
}
|
|
const remaining = caps.maxUncompressedBytes - totalUncomp;
|
|
const data = await readEntryData(buf, entry, remaining);
|
|
|
|
totalUncomp += data.length;
|
|
totalComp += Math.max(1, entry.compSize); // avoid div-by-zero in ratio
|
|
if (totalUncomp > caps.maxUncompressedBytes) {
|
|
throw new ZipError(`total uncompressed exceeds cap`, 'ZIP_BOMB');
|
|
}
|
|
if (totalUncomp / totalComp > caps.maxExpansionRatio) {
|
|
throw new ZipError(
|
|
`expansion ratio exceeds ${caps.maxExpansionRatio}x (${totalUncomp}/${totalComp})`,
|
|
'ZIP_BOMB',
|
|
);
|
|
}
|
|
|
|
await mkdir(dirname(resolved), { recursive: true });
|
|
await writeFile(resolved, data);
|
|
files.push(safeName);
|
|
}
|
|
|
|
return { entries: files.length, bytes: totalUncomp, files };
|
|
}
|
|
|
|
/**
|
|
* Parse central directory only — no extraction. Useful for inspection / tests.
|
|
*/
|
|
export function listEntries(buf) {
|
|
return parseCentralDirectory(buf).map(e => ({
|
|
name: e.name,
|
|
method: e.method,
|
|
compSize: e.compSize,
|
|
uncompSize: e.uncompSize,
|
|
isDir: e.name.endsWith('/'),
|
|
isSymlink: isSymlink(e),
|
|
}));
|
|
}
|
|
|
|
export { ZipError };
|
|
export const __testing = {
|
|
DEFAULT_CAPS,
|
|
validateEntryName,
|
|
isSymlink,
|
|
parseCentralDirectory,
|
|
inflateBounded,
|
|
};
|