ktg-plugin-marketplace/plugins/llm-security/scanners/lib/vsix-sandbox.mjs

// vsix-sandbox.mjs — Spawn vsix-fetch-worker.mjs under an OS sandbox so any
// filesystem writes the ZIP extractor performs are restricted to a single
// temp directory.
//
// Reuses the same sandbox primitives proven by `git-clone.mjs`:
//   - macOS: sandbox-exec with a deny-file-write profile + subpath allow
//   - Linux: bwrap with --ro-bind / --bind / --unshare-all
//   - Windows / fallback: spawn directly + WARN to stderr (no OS sandbox)
//
// Defense-in-depth: even if our own zip-extract.mjs has a zip-slip / symlink
// bypass we did not foresee, the OS will refuse the write. The in-process
// validation in zip-extract.mjs remains the first line of defense.

import { spawn } from 'node:child_process';
import { spawnSync } from 'node:child_process';
import { realpathSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, resolve as resolvePath } from 'node:path';

const __dirname = dirname(fileURLToPath(import.meta.url));
const WORKER_PATH = resolvePath(__dirname, 'vsix-fetch-worker.mjs');
const WORKER_TIMEOUT_MS = 35_000; // fetch is 30s, give worker 5s of slack
const MAX_OUTPUT_BYTES = 1024 * 1024; // 1MB JSON cap (output is tiny in practice)

/**
 * Build the sandbox-exec profile that allows writes only to `allowedWritePath`.
 * Mirrors `buildSandboxProfile` in git-clone.mjs.
 * @returns {string|null}  null if not on macOS or sandbox-exec missing
 */
export function buildSandboxProfile(allowedWritePath) {
  if (process.platform !== 'darwin') return null;
  const check = spawnSync('which', ['sandbox-exec'], { encoding: 'utf8' });
  if (check.status !== 0) return null;
  const realPath = realpathSync(allowedWritePath);
  return [
    '(version 1)',
    '(allow default)',
    '(deny file-write*)',
    `(allow file-write* (subpath "${realPath}"))`,
    '(allow file-write* (literal "/dev/null"))',
    '(allow file-write* (literal "/dev/tty"))',
  ].join('');
}

/**
 * Build bwrap arguments that allow writes only to `allowedWritePath`.
 * Mirrors `buildBwrapArgs` in git-clone.mjs.
 * @returns {string[]|null}  null if not on Linux or bwrap unusable
 */
export function buildBwrapArgs(allowedWritePath, innerArgs) {
  if (process.platform !== 'linux') return null;
  const check = spawnSync('which', ['bwrap'], { encoding: 'utf8' });
  if (check.status !== 0) return null;
  // Probe — bwrap is shipped on Ubuntu 24.04+ but may need admin AppArmor config.
  const probe = spawnSync(
    'bwrap',
    ['--ro-bind', '/', '/', '--dev', '/dev', '/bin/true'],
    { stdio: 'ignore', timeout: 5000 },
  );
  if (probe.status !== 0) return null;
  return [
    '--ro-bind', '/', '/',
    '--bind', allowedWritePath, allowedWritePath,
    '--dev', '/dev',
    '--unshare-all',
    '--new-session',
    '--die-with-parent',
    ...innerArgs,
  ];
}

/**
 * Resolve the spawn command for a worker invocation. Returns the OS sandbox
 * if available; otherwise plain `node` with a `sandbox: null` flag so the
 * caller can WARN. Identical pattern to `buildSandboxedClone`.
 *
 * @param {string} tmpDir  writable temp dir for the worker
 * @param {string[]} workerArgs  argv for the worker (after `node <worker>`)
 * @returns {{cmd:string, args:string[], sandbox: 'sandbox-exec'|'bwrap'|null}}
 */
export function buildSandboxedWorker(tmpDir, workerArgs) {
  const innerArgs = ['node', WORKER_PATH, ...workerArgs];

  const profile = buildSandboxProfile(tmpDir);
  if (profile) {
    return { cmd: 'sandbox-exec', args: ['-p', profile, ...innerArgs], sandbox: 'sandbox-exec' };
  }

  const bwrapArgs = buildBwrapArgs(tmpDir, innerArgs);
  if (bwrapArgs) {
    return { cmd: 'bwrap', args: bwrapArgs, sandbox: 'bwrap' };
  }

  // Fallback — Windows or sandbox tools missing. Worker still runs, just not boxed.
  return { cmd: innerArgs[0], args: innerArgs.slice(1), sandbox: null };
}

/**
 * Spawn the VSIX worker and parse its single JSON line of output.
 *
 * @param {string} url       URL to fetch (passed via argv to worker)
 * @param {string} tmpDir    writable directory for extracted files
 * @param {object} [opts]
 * @param {boolean} [opts.allowFallback=true]  if false, throw when no OS sandbox
 * @returns {Promise<{ok:boolean, sandbox:'sandbox-exec'|'bwrap'|null, payload:object}>}
 */
export function runVsixWorker(url, tmpDir, opts = {}) {
  const { allowFallback = true } = opts;
  const { cmd, args, sandbox } = buildSandboxedWorker(tmpDir, ['--url', url, '--tmpdir', tmpDir]);

  if (!sandbox && !allowFallback) {
    return Promise.reject(new Error('no OS sandbox available and fallback disabled'));
  }

  return new Promise((resolve, reject) => {
    const child = spawn(cmd, args, {
      stdio: ['ignore', 'pipe', 'pipe'],
      timeout: WORKER_TIMEOUT_MS,
      env: { ...process.env },
    });

    let stdout = '';
    let stdoutBytes = 0;
    let truncated = false;
    child.stdout.on('data', (chunk) => {
      stdoutBytes += chunk.length;
      if (stdoutBytes > MAX_OUTPUT_BYTES) {
        truncated = true;
        try { child.kill('SIGKILL'); } catch {}
        return;
      }
      stdout += chunk.toString('utf8');
    });

    let stderr = '';
    child.stderr.on('data', (chunk) => { stderr += chunk.toString('utf8'); });

    child.on('error', (err) => reject(new Error(`worker spawn failed: ${err.message}`)));

    child.on('close', (code, signal) => {
      if (truncated) {
        return reject(new Error('worker output exceeded 1MB cap'));
      }
      if (signal === 'SIGTERM' && code === null) {
        return reject(new Error(`worker timed out after ${WORKER_TIMEOUT_MS}ms`));
      }
      // Parse the last non-empty line as JSON (worker writes one line on success/failure).
      const lines = stdout.split('\n').map((l) => l.trim()).filter(Boolean);
      const last = lines[lines.length - 1];
      if (!last) {
        const tail = stderr.trim().slice(0, 200);
        return reject(new Error(`worker produced no output${tail ? ` (stderr: ${tail})` : ''}`));
      }
      let payload;
      try {
        payload = JSON.parse(last);
      } catch {
        return reject(new Error(`worker emitted non-JSON: ${last.slice(0, 120)}`));
      }
      resolve({ ok: payload.ok === true, sandbox, payload });
    });
  });
}

export const __testing = {
  WORKER_PATH,
  WORKER_TIMEOUT_MS,
  MAX_OUTPUT_BYTES,
};