feat(llm-security): OS sandbox for /security ide-scan <url> (v6.5.0)

VSIX fetch + extract for URL targets now runs in a sub-process wrapped by sandbox-exec (macOS) or bwrap (Linux), reusing the same primitives proven by the v5.1 git-clone sandbox. Defense-in-depth — even if our own zip-extract.mjs ever has a bypass, the kernel refuses any write outside the per-scan temp directory. New files: - scanners/lib/vsix-fetch-worker.mjs — sub-process worker. Argv: --url --tmpdir; emits one JSON line on stdout (ok/sha256/size/source/extRoot or ok:false/error/code). Silent on stderr. Exit 0/1. - scanners/lib/vsix-sandbox.mjs — wrapper. Exports buildSandboxProfile, buildBwrapArgs, buildSandboxedWorker, runVsixWorker. 35s timeout, 1 MB stdout cap. Changes: - scanners/ide-extension-scanner.mjs: fetchAndExtractVsixUrl is now sandbox-aware (useSandbox option, default true). In-process logic preserved as fallback. New meta.source.sandbox field: 'sandbox-exec' | 'bwrap' | 'none' | 'in-process'. - scan(target, { useSandbox }) defaults to true; tests pass false because globalThis.fetch mocks do not cross process boundaries. - Windows fallback: in-process with meta.warnings advisory. Tests: - 8 new tests in tests/scanners/vsix-sandbox.test.mjs (per-platform profile generation, worker arg construction, live worker exit behavior on invalid URLs — no network). - Existing URL tests updated to opt out of sandbox (useSandbox: false). - 1344 → 1352 tests, all green. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 17:28:57 +02:00 · 2026-04-17 17:28:57 +02:00 · 9f893c3858
commit 9f893c3858
parent fe0193956d
11 changed files with 434 additions and 24 deletions
--- a/plugins/llm-security/scanners/ide-extension-scanner.mjs
+++ b/plugins/llm-security/scanners/ide-extension-scanner.mjs
@ -29,6 +29,7 @@ import { parseVSCodeExtension, parseVsixFile } from './lib/ide-extension-parser.
 import { loadTopVSCode, loadVSCodeBlocklist, normalizeId } from './lib/ide-extension-data.mjs';
 import { fetchVsixFromUrl, detectUrlType } from './lib/vsix-fetch.mjs';
 import { extractToDir, ZipError } from './lib/zip-extract.mjs';
+import { runVsixWorker } from './lib/vsix-sandbox.mjs';

 import { scan as scanUnicode } from './unicode-scanner.mjs';
 import { scan as scanEntropy } from './entropy-scanner.mjs';
@ -37,7 +38,7 @@ import { scan as scanTaint } from './taint-tracer.mjs';
 import { scan as scanMemoryPoisoning } from './memory-poisoning-scanner.mjs';
 import { scan as scanSupplyChain } from './supply-chain-recheck.mjs';

-const VERSION = '6.4.0';
+const VERSION = '6.5.0';
 const SCANNER = 'IDE';

 // ---------------------------------------------------------------------------
@ -53,14 +54,44 @@ function isUrlTarget(target) {
 * `parseVSCodeExtension` should be pointed at. VSIX layout always nests the
 * extension under `extension/`.
 *
+ * Two modes:
+ *   - useSandbox=true (default for CLI):  spawns vsix-fetch-worker.mjs under
+ *     sandbox-exec (macOS) / bwrap (Linux) so any FS write is restricted to
+ *     <tempDir>. Defense-in-depth against zip-extract bugs.
+ *   - useSandbox=false:  runs fetch + extract in-process. Used by tests that
+ *     mock globalThis.fetch (mocking does not cross process boundaries).
+ *
 * Caller MUST `await rm(result.tempDir, { recursive: true, force: true })` in finally.
 *
 * @param {string} url
- * @returns {Promise<{ extRoot: string, tempDir: string, source: object }>}
+ * @param {{ useSandbox?: boolean }} [opts]
+ * @returns {Promise<{ extRoot: string, tempDir: string, source: object, sandbox: 'sandbox-exec'|'bwrap'|null|'in-process' }>}
 */
-async function fetchAndExtractVsixUrl(url) {
+async function fetchAndExtractVsixUrl(url, opts = {}) {
+  const useSandbox = opts.useSandbox !== false;
  const tempDir = await mkdtemp(join(tmpdir(), 'llm-sec-vsix-'));
  try {
+    if (useSandbox) {
+      const { ok, sandbox, payload } = await runVsixWorker(url, tempDir);
+      if (!ok) {
+        const msg = payload && payload.error ? payload.error : 'worker failed';
+        throw new Error(msg);
+      }
+      const { type: kind, ...sourceMeta } = payload.source;
+      const source = {
+        type: 'url',
+        kind,
+        url,
+        finalUrl: payload.finalUrl,
+        sha256: payload.sha256,
+        size: payload.size,
+        sandbox: sandbox || 'none',
+        ...sourceMeta,
+      };
+      return { extRoot: payload.extRoot, tempDir, source, sandbox: sandbox || null };
+    }
+
+    // In-process path (tests, or fallback when caller wants no sub-process).
    let fetched;
    try {
      fetched = await fetchVsixFromUrl(url);
@ -75,23 +106,21 @@ async function fetchAndExtractVsixUrl(url) {
      }
      throw err;
    }
-    // VSIX nests files under `extension/`. If that doesn't exist, fall back to
-    // the temp dir itself (some packagers omit the wrapper).
    const nested = join(tempDir, 'extension');
    const extRoot = existsSync(nested) ? nested : tempDir;
    const { type: kind, ...sourceMeta } = fetched.source;
    const source = {
      type: 'url',
-      kind, // 'marketplace' | 'openvsx' | 'vsix'
+      kind,
      url,
      finalUrl: fetched.finalUrl,
      sha256: fetched.sha256,
      size: fetched.size,
+      sandbox: 'in-process',
      ...sourceMeta,
    };
-    return { extRoot, tempDir, source };
+    return { extRoot, tempDir, source, sandbox: 'in-process' };
  } catch (err) {
-    // Cleanup on error before propagating.
    await rm(tempDir, { recursive: true, force: true }).catch(() => {});
    throw err;
  }
@ -459,10 +488,13 @@ export async function scan(target, options = {}) {
      warnings.push('GitHub repo URLs are not supported in v6.4.0 — would require build step. Use the Marketplace, OpenVSX, or a direct .vsix link.');
    } else {
      try {
-        const fetched = await fetchAndExtractVsixUrl(target);
+        const fetched = await fetchAndExtractVsixUrl(target, { useSandbox: options.useSandbox });
        urlSource = fetched.source;
        urlTempDir = fetched.tempDir;
        target = fetched.extRoot; // forward into single-target path mode
+        if (fetched.sandbox === null && options.useSandbox !== false) {
+          warnings.push('OS sandbox unavailable on this platform — VSIX extracted without sandbox-exec/bwrap. Defense-in-depth reduced to in-process zip-extract validation.');
+        }
      } catch (err) {
        warnings.push(`URL fetch/extract failed: ${err.message}`);
      }
--- a/plugins/llm-security/scanners/lib/vsix-fetch-worker.mjs
+++ b/plugins/llm-security/scanners/lib/vsix-fetch-worker.mjs
@ -0,0 +1,76 @@
+#!/usr/bin/env node
+// vsix-fetch-worker.mjs — Sub-process worker that fetches a VSIX URL and extracts
+// it to a writable directory. Designed to be spawned under sandbox-exec (macOS),
+// bwrap (Linux), or directly (Windows fallback).
+//
+// Contract:
+//   stdin:  none
+//   argv:   --url <url> --tmpdir <writable-dir>
+//   stdout: single JSON line {ok:true, sha256, size, finalUrl, source, extRoot}
+//           on success, or {ok:false, error:"<msg>", code?:"<ZIP_CODE>"} on failure
+//   stderr: never (silent — all errors via JSON on stdout)
+//   exit:   0 on success, 1 on any failure (caller still parses stdout)
+//
+// Why a worker: the parent process can wrap this command in sandbox-exec / bwrap
+// so any filesystem write the ZIP extractor performs is restricted to <tmpdir>.
+// Defense-in-depth — even if our own zip-slip / symlink validation has a bug,
+// the OS sandbox cannot let bytes land outside <tmpdir>.
+
+import { writeFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { fetchVsixFromUrl } from './vsix-fetch.mjs';
+import { extractToDir, ZipError } from './zip-extract.mjs';
+
+function emit(obj) {
+  process.stdout.write(JSON.stringify(obj) + '\n');
+}
+
+function parseArgs(argv) {
+  const out = { url: null, tmpdir: null };
+  for (let i = 0; i < argv.length; i++) {
+    if (argv[i] === '--url' && i + 1 < argv.length) out.url = argv[++i];
+    else if (argv[i] === '--tmpdir' && i + 1 < argv.length) out.tmpdir = argv[++i];
+  }
+  return out;
+}
+
+async function main() {
+  const { url, tmpdir: dir } = parseArgs(process.argv.slice(2));
+  if (!url || !dir) {
+    emit({ ok: false, error: 'missing --url or --tmpdir' });
+    process.exit(1);
+  }
+  let fetched;
+  try {
+    fetched = await fetchVsixFromUrl(url);
+  } catch (err) {
+    emit({ ok: false, error: `fetch failed: ${err.message}` });
+    process.exit(1);
+  }
+  try {
+    await extractToDir(fetched.buffer, dir);
+  } catch (err) {
+    if (err instanceof ZipError) {
+      emit({ ok: false, error: `malformed VSIX (${err.code}): ${err.message}`, code: err.code });
+    } else {
+      emit({ ok: false, error: `extract failed: ${err.message}` });
+    }
+    process.exit(1);
+  }
+  const nested = join(dir, 'extension');
+  const extRoot = existsSync(nested) ? nested : dir;
+  emit({
+    ok: true,
+    sha256: fetched.sha256,
+    size: fetched.size,
+    finalUrl: fetched.finalUrl,
+    source: fetched.source,
+    extRoot,
+  });
+  process.exit(0);
+}
+
+main().catch((err) => {
+  emit({ ok: false, error: `worker crashed: ${err.message || String(err)}` });
+  process.exit(1);
+});
--- a/plugins/llm-security/scanners/lib/vsix-sandbox.mjs
+++ b/plugins/llm-security/scanners/lib/vsix-sandbox.mjs
@ -0,0 +1,169 @@
+// vsix-sandbox.mjs — Spawn vsix-fetch-worker.mjs under an OS sandbox so any
+// filesystem writes the ZIP extractor performs are restricted to a single
+// temp directory.
+//
+// Reuses the same sandbox primitives proven by `git-clone.mjs`:
+//   - macOS: sandbox-exec with a deny-file-write profile + subpath allow
+//   - Linux: bwrap with --ro-bind / --bind / --unshare-all
+//   - Windows / fallback: spawn directly + WARN to stderr (no OS sandbox)
+//
+// Defense-in-depth: even if our own zip-extract.mjs has a zip-slip / symlink
+// bypass we did not foresee, the OS will refuse the write. The in-process
+// validation in zip-extract.mjs remains the first line of defense.
+
+import { spawn } from 'node:child_process';
+import { spawnSync } from 'node:child_process';
+import { realpathSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { dirname, resolve as resolvePath } from 'node:path';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const WORKER_PATH = resolvePath(__dirname, 'vsix-fetch-worker.mjs');
+const WORKER_TIMEOUT_MS = 35_000; // fetch is 30s, give worker 5s of slack
+const MAX_OUTPUT_BYTES = 1024 * 1024; // 1MB JSON cap (output is tiny in practice)
+
+/**
+ * Build the sandbox-exec profile that allows writes only to `allowedWritePath`.
+ * Mirrors `buildSandboxProfile` in git-clone.mjs.
+ * @returns {string|null}  null if not on macOS or sandbox-exec missing
+ */
+export function buildSandboxProfile(allowedWritePath) {
+  if (process.platform !== 'darwin') return null;
+  const check = spawnSync('which', ['sandbox-exec'], { encoding: 'utf8' });
+  if (check.status !== 0) return null;
+  const realPath = realpathSync(allowedWritePath);
+  return [
+    '(version 1)',
+    '(allow default)',
+    '(deny file-write*)',
+    `(allow file-write* (subpath "${realPath}"))`,
+    '(allow file-write* (literal "/dev/null"))',
+    '(allow file-write* (literal "/dev/tty"))',
+  ].join('');
+}
+
+/**
+ * Build bwrap arguments that allow writes only to `allowedWritePath`.
+ * Mirrors `buildBwrapArgs` in git-clone.mjs.
+ * @returns {string[]|null}  null if not on Linux or bwrap unusable
+ */
+export function buildBwrapArgs(allowedWritePath, innerArgs) {
+  if (process.platform !== 'linux') return null;
+  const check = spawnSync('which', ['bwrap'], { encoding: 'utf8' });
+  if (check.status !== 0) return null;
+  // Probe — bwrap is shipped on Ubuntu 24.04+ but may need admin AppArmor config.
+  const probe = spawnSync(
+    'bwrap',
+    ['--ro-bind', '/', '/', '--dev', '/dev', '/bin/true'],
+    { stdio: 'ignore', timeout: 5000 },
+  );
+  if (probe.status !== 0) return null;
+  return [
+    '--ro-bind', '/', '/',
+    '--bind', allowedWritePath, allowedWritePath,
+    '--dev', '/dev',
+    '--unshare-all',
+    '--new-session',
+    '--die-with-parent',
+    ...innerArgs,
+  ];
+}
+
+/**
+ * Resolve the spawn command for a worker invocation. Returns the OS sandbox
+ * if available; otherwise plain `node` with a `sandbox: null` flag so the
+ * caller can WARN. Identical pattern to `buildSandboxedClone`.
+ *
+ * @param {string} tmpDir  writable temp dir for the worker
+ * @param {string[]} workerArgs  argv for the worker (after `node <worker>`)
+ * @returns {{cmd:string, args:string[], sandbox: 'sandbox-exec'|'bwrap'|null}}
+ */
+export function buildSandboxedWorker(tmpDir, workerArgs) {
+  const innerArgs = ['node', WORKER_PATH, ...workerArgs];
+
+  const profile = buildSandboxProfile(tmpDir);
+  if (profile) {
+    return { cmd: 'sandbox-exec', args: ['-p', profile, ...innerArgs], sandbox: 'sandbox-exec' };
+  }
+
+  const bwrapArgs = buildBwrapArgs(tmpDir, innerArgs);
+  if (bwrapArgs) {
+    return { cmd: 'bwrap', args: bwrapArgs, sandbox: 'bwrap' };
+  }
+
+  // Fallback — Windows or sandbox tools missing. Worker still runs, just not boxed.
+  return { cmd: innerArgs[0], args: innerArgs.slice(1), sandbox: null };
+}
+
+/**
+ * Spawn the VSIX worker and parse its single JSON line of output.
+ *
+ * @param {string} url       URL to fetch (passed via argv to worker)
+ * @param {string} tmpDir    writable directory for extracted files
+ * @param {object} [opts]
+ * @param {boolean} [opts.allowFallback=true]  if false, throw when no OS sandbox
+ * @returns {Promise<{ok:boolean, sandbox:'sandbox-exec'|'bwrap'|null, payload:object}>}
+ */
+export function runVsixWorker(url, tmpDir, opts = {}) {
+  const { allowFallback = true } = opts;
+  const { cmd, args, sandbox } = buildSandboxedWorker(tmpDir, ['--url', url, '--tmpdir', tmpDir]);
+
+  if (!sandbox && !allowFallback) {
+    return Promise.reject(new Error('no OS sandbox available and fallback disabled'));
+  }
+
+  return new Promise((resolve, reject) => {
+    const child = spawn(cmd, args, {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: WORKER_TIMEOUT_MS,
+      env: { ...process.env },
+    });
+
+    let stdout = '';
+    let stdoutBytes = 0;
+    let truncated = false;
+    child.stdout.on('data', (chunk) => {
+      stdoutBytes += chunk.length;
+      if (stdoutBytes > MAX_OUTPUT_BYTES) {
+        truncated = true;
+        try { child.kill('SIGKILL'); } catch {}
+        return;
+      }
+      stdout += chunk.toString('utf8');
+    });
+
+    let stderr = '';
+    child.stderr.on('data', (chunk) => { stderr += chunk.toString('utf8'); });
+
+    child.on('error', (err) => reject(new Error(`worker spawn failed: ${err.message}`)));
+
+    child.on('close', (code, signal) => {
+      if (truncated) {
+        return reject(new Error('worker output exceeded 1MB cap'));
+      }
+      if (signal === 'SIGTERM' && code === null) {
+        return reject(new Error(`worker timed out after ${WORKER_TIMEOUT_MS}ms`));
+      }
+      // Parse the last non-empty line as JSON (worker writes one line on success/failure).
+      const lines = stdout.split('\n').map((l) => l.trim()).filter(Boolean);
+      const last = lines[lines.length - 1];
+      if (!last) {
+        const tail = stderr.trim().slice(0, 200);
+        return reject(new Error(`worker produced no output${tail ? ` (stderr: ${tail})` : ''}`));
+      }
+      let payload;
+      try {
+        payload = JSON.parse(last);
+      } catch {
+        return reject(new Error(`worker emitted non-JSON: ${last.slice(0, 120)}`));
+      }
+      resolve({ ok: payload.ok === true, sandbox, payload });
+    });
+  });
+}
+
+export const __testing = {
+  WORKER_PATH,
+  WORKER_TIMEOUT_MS,
+  MAX_OUTPUT_BYTES,
+};