feat(humanizer): wire humanizer into scan-orchestrator main with --raw bypass

Adds --json and --raw flags to scan-orchestrator CLI main(). Default mode runs humanizeEnvelope(env) before serialization; --json and --raw bypass the humanizer for v5.0.0 byte-equal output (SC-6 / SC-7 paths). Save-baseline path always writes the raw v5.0.0-shape envelope so future humanizer-data updates do not trigger false-positive drift findings. runAllScanners() unchanged — it remains the v5.0.0-shape source of truth for in-process callers (posture, scoring, drift, etc.). Wave 3 / Step 5 of v5.1.0 humanizer. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 17:31:37 +02:00 · 2026-05-01 17:31:37 +02:00 · 5ff6594976
commit 5ff6594976
parent 79a4249e0b
2 changed files with 158 additions and 2 deletions
--- a/plugins/config-audit/scanners/scan-orchestrator.mjs
+++ b/plugins/config-audit/scanners/scan-orchestrator.mjs
@ -13,6 +13,7 @@ import { resetCounter } from './lib/output.mjs';
 import { envelope } from './lib/output.mjs';
 import { discoverConfigFiles, discoverConfigFilesMulti, discoverFullMachinePaths } from './lib/file-discovery.mjs';
 import { loadSuppressions, applySuppressions, formatSuppressionSummary } from './lib/suppression.mjs';
+import { humanizeEnvelope } from './lib/humanizer.mjs';

 // Scanner registry — import order determines execution order
 import { scan as scanClaudeMd } from './claude-md-linter.mjs';
@ -201,6 +202,10 @@ async function main() {
      // handled below
    } else if (args[i] === '--include-fixtures') {
      // handled below
+    } else if (args[i] === '--json') {
+      // handled below — explicit machine-readable mode (bypass humanizer)
+    } else if (args[i] === '--raw') {
+      // handled below — v5.0.0 verbatim mode (bypass humanizer)
    } else if (!args[i].startsWith('-')) {
      targetPath = args[i];
    }
@ -210,6 +215,8 @@ async function main() {
  const fullMachine = args.includes('--full-machine');
  const suppress = !args.includes('--no-suppress');
  const filterFixtures = !args.includes('--include-fixtures');
+  const jsonMode = args.includes('--json');
+  const rawMode = args.includes('--raw');

  process.stderr.write(`Config-Audit Scanner v2.2.0\n`);
  process.stderr.write(`Target: ${resolve(targetPath)}\n`);
@ -218,7 +225,9 @@ async function main() {

  const result = await runAllScanners(targetPath, { includeGlobal, fullMachine, suppress, filterFixtures });

-  const json = JSON.stringify(result, null, 2);
+  // Default mode runs the humanizer; --json and --raw bypass for v5.0.0 byte-equal output.
+  const output = (jsonMode || rawMode) ? result : humanizeEnvelope(result);
+  const json = JSON.stringify(output, null, 2);

  if (outputFile) {
    await writeFile(outputFile, json, 'utf-8');
@ -229,7 +238,9 @@ async function main() {

  if (saveBaseline) {
    const bPath = baselinePath || resolve(targetPath, '.config-audit-baseline.json');
-    await writeFile(bPath, json, 'utf-8');
+    // Always save baselines as raw v5.0.0-shape envelope so future humanizer
+    // changes don't trigger false-positive drift findings.
+    await writeFile(bPath, JSON.stringify(result, null, 2), 'utf-8');
    process.stderr.write(`Baseline saved to ${bPath}\n`);
  }

--- a/plugins/config-audit/tests/scanners/scan-orchestrator-humanizer.test.mjs
+++ b/plugins/config-audit/tests/scanners/scan-orchestrator-humanizer.test.mjs
@ -0,0 +1,145 @@
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { readFile, unlink } from 'node:fs/promises';
+
+const exec = promisify(execFile);
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const REPO = resolve(__dirname, '../..');
+const CLI = resolve(REPO, 'scanners/scan-orchestrator.mjs');
+const FIXTURE = resolve(REPO, 'tests/fixtures/marketplace-medium');
+const SNAPSHOT_PATH = resolve(REPO, 'tests/snapshots/v5.0.0/scan-orchestrator.json');
+
+/**
+ * Normalize a scan-orchestrator envelope for snapshot comparison by
+ * blanking out time-varying fields (timestamp, durations, target path).
+ * Returns a NEW object — does not mutate input.
+ */
+function normalizeEnvelope(env) {
+  const out = JSON.parse(JSON.stringify(env));
+  if (out.meta) {
+    out.meta.target = '<TARGET>';
+    out.meta.timestamp = '<TIMESTAMP>';
+  }
+  if (Array.isArray(out.scanners)) {
+    for (const s of out.scanners) {
+      s.duration_ms = 0;
+    }
+  }
+  return out;
+}
+
+async function runOrchestrator(flags) {
+  const out = `/tmp/scan-orch-humanizer-${process.pid}-${Date.now()}-${Math.random()}.json`;
+  try {
+    await exec('node', [CLI, FIXTURE, '--output-file', out, ...flags], {
+      timeout: 60000,
+      cwd: REPO,
+    });
+    const written = await readFile(out, 'utf-8');
+    return JSON.parse(written);
+  } finally {
+    await unlink(out).catch(() => {});
+  }
+}
+
+describe('scan-orchestrator humanizer wiring (Step 5)', () => {
+  describe('--json mode (SC-6: byte-equal v5.0.0)', () => {
+    it('produces envelope structurally equal to v5.0.0 snapshot', async () => {
+      const actual = await runOrchestrator(['--json']);
+      const expected = JSON.parse(await readFile(SNAPSHOT_PATH, 'utf-8'));
+      assert.deepStrictEqual(normalizeEnvelope(actual), normalizeEnvelope(expected));
+    });
+
+    it('does NOT add humanizer fields to findings', async () => {
+      const actual = await runOrchestrator(['--json']);
+      for (const s of actual.scanners) {
+        for (const f of s.findings) {
+          assert.equal(f.userImpactCategory, undefined,
+            `${f.id}: --json findings must not have userImpactCategory`);
+          assert.equal(f.userActionLanguage, undefined,
+            `${f.id}: --json findings must not have userActionLanguage`);
+          assert.equal(f.relevanceContext, undefined,
+            `${f.id}: --json findings must not have relevanceContext`);
+        }
+      }
+    });
+  });
+
+  describe('--raw mode (SC-7: byte-equal v5.0.0)', () => {
+    it('produces envelope structurally equal to v5.0.0 snapshot', async () => {
+      const actual = await runOrchestrator(['--raw']);
+      const expected = JSON.parse(await readFile(SNAPSHOT_PATH, 'utf-8'));
+      assert.deepStrictEqual(normalizeEnvelope(actual), normalizeEnvelope(expected));
+    });
+
+    it('does NOT add humanizer fields to findings', async () => {
+      const actual = await runOrchestrator(['--raw']);
+      for (const s of actual.scanners) {
+        for (const f of s.findings) {
+          assert.equal(f.userImpactCategory, undefined,
+            `${f.id}: --raw findings must not have userImpactCategory`);
+        }
+      }
+    });
+  });
+
+  describe('default mode (humanized)', () => {
+    it('preserves envelope-level shape', async () => {
+      const actual = await runOrchestrator([]);
+      assert.ok(actual.meta, 'meta present');
+      assert.ok(Array.isArray(actual.scanners), 'scanners array present');
+      assert.ok(actual.aggregate, 'aggregate present');
+      assert.equal(actual.scanners.length, 12, 'all 12 scanners present');
+    });
+
+    it('preserves scanner shape (scanner/status/findings/counts)', async () => {
+      const actual = await runOrchestrator([]);
+      for (const s of actual.scanners) {
+        assert.ok(typeof s.scanner === 'string', 'scanner name string');
+        assert.ok(typeof s.status === 'string', 'status string');
+        assert.ok(Array.isArray(s.findings), 'findings array');
+        assert.ok(s.counts, 'counts object');
+      }
+    });
+
+    it('adds humanizer fields to every finding', async () => {
+      const actual = await runOrchestrator([]);
+      let totalFindings = 0;
+      for (const s of actual.scanners) {
+        for (const f of s.findings) {
+          totalFindings++;
+          assert.equal(typeof f.userImpactCategory, 'string',
+            `${f.id}: userImpactCategory must be string`);
+          assert.equal(typeof f.userActionLanguage, 'string',
+            `${f.id}: userActionLanguage must be string`);
+          assert.equal(typeof f.relevanceContext, 'string',
+            `${f.id}: relevanceContext must be string`);
+          assert.ok(['test-fixture-no-impact', 'affects-this-machine-only', 'affects-everyone'].includes(f.relevanceContext),
+            `${f.id}: relevanceContext must be one of allowed values, got ${f.relevanceContext}`);
+        }
+      }
+      assert.ok(totalFindings > 0, 'expected at least one finding to assert against');
+    });
+
+    it('preserves stable identifiers (id, scanner, severity)', async () => {
+      const actualHumanized = await runOrchestrator([]);
+      const actualRaw = await runOrchestrator(['--raw']);
+
+      const flatHumanized = actualHumanized.scanners.flatMap(s => s.findings);
+      const flatRaw = actualRaw.scanners.flatMap(s => s.findings);
+      assert.equal(flatHumanized.length, flatRaw.length, 'finding count matches');
+
+      for (let i = 0; i < flatHumanized.length; i++) {
+        const h = flatHumanized[i];
+        const r = flatRaw[i];
+        assert.equal(h.id, r.id, `finding ${i} id matches`);
+        assert.equal(h.scanner, r.scanner, `finding ${i} scanner matches`);
+        assert.equal(h.severity, r.severity, `finding ${i} severity matches`);
+      }
+    });
+  });
+});