From 8b146bf489b0ec92359560939e4d1743e91947c9 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 18:16:23 +0200 Subject: [PATCH] feat(humanizer): scenario read-test corpus + runner (SC-4) [skip-docs] Step 9 of v5.1.0 humanizer Wave 4. Adds tests/scenario-read-test.mjs runner, tests/scenario-read-test.test.mjs wrapper, and 5 scenario fixtures in tests/scenarios/ that feed deterministic raw findings through humanizeFinding and assert the humanized title/description/recommendation match brief-owner-approved regex patterns encoding the ground-truth what/why/whatNext answers. Corpus selection (per brief criteria): - 01-tok-cascade.json - TOK/CPS category (token efficiency) - 02-cps-volatile.json - TOK/CPS category (cache prefix stability) - 03-cnf-conflict.json - CNF category (conflicts) - 04-gap-no-claude-md.json - GAP category (feature gap) - 05-set-invalid-json.json - SET category, AND its v5.0.0 title + description carry tier1 'invalid' (the brief criterion 'one finding whose v5.0.0 description uses a forbidden word'). Runner mechanics: - Loads scenarios matching ^\\d{2}-[a-z0-9-]+\\.json$ in sorted order. - Calls humanizeFinding(scannerInput) and matches each humanized field against its declared pattern (case-insensitive regex). - Verifies humanizer-added structural fields (userImpactCategory, userActionLanguage, relevanceContext) are non-empty strings. - Per session decision (1a) acceptance is deterministic regex matching without a runtime human approval gate. Wrapper adds 3 tests: scenario-match (binds runner to node --test), category-coverage (TOK/CPS, CNF, GAP, SET all present), and tier1-presence (at least one v5.0.0 title or description contains a tier1 forbidden word). Tests: 736 to 739 (+3 SC-4 tests). Full suite passes. Co-Authored-By: Claude Opus 4.7 --- .../config-audit/tests/scenario-read-test.mjs | 141 ++++++++++++++++++ .../tests/scenario-read-test.test.mjs | 87 +++++++++++ .../tests/scenarios/01-tok-cascade.json | 29 ++++ .../tests/scenarios/02-cps-volatile.json | 29 ++++ .../tests/scenarios/03-cnf-conflict.json | 29 ++++ .../tests/scenarios/04-gap-no-claude-md.json | 29 ++++ .../tests/scenarios/05-set-invalid-json.json | 29 ++++ 7 files changed, 373 insertions(+) create mode 100644 plugins/config-audit/tests/scenario-read-test.mjs create mode 100644 plugins/config-audit/tests/scenario-read-test.test.mjs create mode 100644 plugins/config-audit/tests/scenarios/01-tok-cascade.json create mode 100644 plugins/config-audit/tests/scenarios/02-cps-volatile.json create mode 100644 plugins/config-audit/tests/scenarios/03-cnf-conflict.json create mode 100644 plugins/config-audit/tests/scenarios/04-gap-no-claude-md.json create mode 100644 plugins/config-audit/tests/scenarios/05-set-invalid-json.json diff --git a/plugins/config-audit/tests/scenario-read-test.mjs b/plugins/config-audit/tests/scenario-read-test.mjs new file mode 100644 index 0000000..e0c8a6c --- /dev/null +++ b/plugins/config-audit/tests/scenario-read-test.mjs @@ -0,0 +1,141 @@ +#!/usr/bin/env node +/** + * SC-4 scenario read-test runner. + * + * Loads each scenario in tests/scenarios/0[1-9]-*.json, feeds the + * `scannerInput` into `humanizeFinding`, and asserts that humanized + * `title` / `description` / `recommendation` match the regex patterns + * declared in `expectedHumanized`. The patterns encode the + * brief-owner-approved ground-truth answers ("what / why / what next") + * so that passing the deterministic regex match is equivalent to the + * humanized output answering the three questions a reader would ask. + * + * Per brief-owner decision (1a) the gate is deterministic regex + * matching — no human-in-the-loop step at runtime. + * + * Exit 0 = PASS (all scenarios match), exit 1 = FAIL. + * + * Usage: + * node tests/scenario-read-test.mjs + */ +import { readdir, readFile } from 'node:fs/promises'; +import { resolve, dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { humanizeFinding } from '../scanners/lib/humanizer.mjs'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const SCENARIOS_DIR = resolve(__dirname, 'scenarios'); + +async function loadScenarios() { + const entries = await readdir(SCENARIOS_DIR); + const files = entries + .filter((f) => /^\d{2}-[a-z0-9-]+\.json$/.test(f)) + .sort(); + const scenarios = []; + for (const f of files) { + const raw = await readFile(join(SCENARIOS_DIR, f), 'utf8'); + scenarios.push({ file: f, body: JSON.parse(raw) }); + } + return scenarios; +} + +function checkPattern(field, value, pattern) { + if (typeof value !== 'string') { + return { ok: false, reason: `${field} is not a string (got ${typeof value})` }; + } + let re; + try { + re = new RegExp(pattern, 'i'); + } catch (err) { + return { ok: false, reason: `${field} pattern is not a valid regex: ${err.message}` }; + } + if (!re.test(value)) { + return { + ok: false, + reason: `${field} did not match /${pattern}/i\n actual: ${JSON.stringify(value)}`, + }; + } + return { ok: true }; +} + +/** + * Run one scenario through humanizeFinding and return per-scenario result. + */ +export function runOne(scenario) { + const { findingId, scannerInput, expectedHumanized } = scenario.body; + const humanized = humanizeFinding(scannerInput); + + const failures = []; + for (const [field, key] of [ + ['title', 'titlePattern'], + ['description', 'descriptionPattern'], + ['recommendation', 'recommendationPattern'], + ]) { + const pattern = expectedHumanized?.[key]; + if (typeof pattern !== 'string' || pattern.length === 0) { + failures.push({ field, reason: `missing or empty pattern key "${key}"` }); + continue; + } + const r = checkPattern(field, humanized?.[field], pattern); + if (!r.ok) failures.push({ field, reason: r.reason }); + } + + // Sanity: humanizer-added structural fields must be present + for (const sysField of ['userImpactCategory', 'userActionLanguage', 'relevanceContext']) { + if (typeof humanized?.[sysField] !== 'string' || humanized[sysField].length === 0) { + failures.push({ + field: sysField, + reason: `expected non-empty string from humanizer; got ${JSON.stringify(humanized?.[sysField])}`, + }); + } + } + + return { file: scenario.file, findingId, humanized, failures }; +} + +/** + * Run every scenario, returning aggregate results. + */ +export async function runAll() { + const scenarios = await loadScenarios(); + const results = scenarios.map(runOne); + const failed = results.filter((r) => r.failures.length > 0); + return { scenarios: results, failed, passed: results.length - failed.length, total: results.length }; +} + +async function main() { + const { scenarios, failed, passed, total } = await runAll(); + + if (total === 0) { + process.stderr.write('SC-4 FAIL: no scenarios found in tests/scenarios/\n'); + process.exit(1); + } + + if (failed.length === 0) { + process.stderr.write( + `SC-4 PASS: ${passed}/${total} scenarios match humanizer output\n`, + ); + for (const r of scenarios) { + process.stderr.write(` ${r.file} (${r.findingId}) - OK\n`); + } + process.exit(0); + } + + process.stderr.write(`SC-4 FAIL: ${failed.length}/${total} scenarios did not match\n`); + for (const r of failed) { + process.stderr.write(`\n ${r.file} (${r.findingId})\n`); + for (const f of r.failures) { + process.stderr.write(` [${f.field}] ${f.reason}\n`); + } + } + process.exit(1); +} + +const isDirectRun = + process.argv[1] && resolve(process.argv[1]) === resolve(new URL(import.meta.url).pathname); +if (isDirectRun) { + main().catch((err) => { + process.stderr.write(`Scenario runner error: ${err.message}\n`); + process.exit(2); + }); +} diff --git a/plugins/config-audit/tests/scenario-read-test.test.mjs b/plugins/config-audit/tests/scenario-read-test.test.mjs new file mode 100644 index 0000000..91f0688 --- /dev/null +++ b/plugins/config-audit/tests/scenario-read-test.test.mjs @@ -0,0 +1,87 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { runAll } from './scenario-read-test.mjs'; + +describe('SC-4 scenario read-test (humanizer corpus)', () => { + it('matches all scenarios in tests/scenarios/', async () => { + const { scenarios, failed, passed, total } = await runAll(); + + assert.ok(total >= 5, `expected at least 5 scenarios in corpus, got ${total}`); + + if (failed.length === 0) { + assert.equal(passed, total); + return; + } + + const summary = failed + .map((r) => { + const reasons = r.failures + .map((f) => ` [${f.field}] ${f.reason}`) + .join('\n'); + return ` ${r.file} (${r.findingId})\n${reasons}`; + }) + .join('\n\n'); + + assert.fail( + `SC-4: ${failed.length}/${total} scenarios did not match humanizer output\n\n${summary}`, + ); + void scenarios; // referenced to satisfy lints if helper expands + }); + + it('covers required scanner categories (TOK/CPS, CNF, GAP, SET)', async () => { + const { scenarios } = await runAll(); + const seen = new Set(); + for (const r of scenarios) { + const prefix = r.findingId.split('-')[1]; + seen.add(prefix); + } + // TOK and CPS together cover the "wasted tokens" category — at least one must appear. + const hasTokenCategory = seen.has('TOK') || seen.has('CPS'); + assert.ok(hasTokenCategory, `corpus must include at least one TOK or CPS finding; saw ${[...seen].join(', ')}`); + assert.ok(seen.has('CNF'), `corpus must include at least one CNF finding; saw ${[...seen].join(', ')}`); + assert.ok(seen.has('GAP'), `corpus must include at least one GAP finding; saw ${[...seen].join(', ')}`); + assert.ok(seen.has('SET'), `corpus must include at least one SET finding; saw ${[...seen].join(', ')}`); + }); + + it('includes at least one scenario whose v5.0.0 description carries a tier1 forbidden word', async () => { + const { scenarios } = await runAll(); + // Read the forbidden-words file at runtime so this assertion stays in sync + // with the source of truth (Wave 1 Step 1 artifact). + const { readFile } = await import('node:fs/promises'); + const { resolve, dirname } = await import('node:path'); + const { fileURLToPath } = await import('node:url'); + const __dirname = dirname(fileURLToPath(import.meta.url)); + const forbiddenPath = resolve(__dirname, 'lint-forbidden-words.json'); + const forbidden = JSON.parse(await readFile(forbiddenPath, 'utf8')); + const tier1 = forbidden.tier1.map((e) => e.word); + + const matchesTier1 = (text) => { + if (typeof text !== 'string') return false; + return tier1.some((word) => { + const lower = word.toLowerCase(); + const escaped = lower.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const re = /[ \-./]/.test(lower) + ? new RegExp(escaped, 'i') + : new RegExp(`\\b${escaped}\\b`, 'i'); + return re.test(text); + }); + }; + + let found = false; + for (const scenario of scenarios) { + const file = scenario.file; + const path = resolve(__dirname, 'scenarios', file); + const body = JSON.parse(await readFile(path, 'utf8')); + const desc = body?.scannerInput?.description ?? ''; + const title = body?.scannerInput?.title ?? ''; + if (matchesTier1(desc) || matchesTier1(title)) { + found = true; + break; + } + } + assert.ok( + found, + 'corpus must include at least one scenario whose v5.0.0 title or description contains a tier1 forbidden word', + ); + }); +}); diff --git a/plugins/config-audit/tests/scenarios/01-tok-cascade.json b/plugins/config-audit/tests/scenarios/01-tok-cascade.json new file mode 100644 index 0000000..1975eca --- /dev/null +++ b/plugins/config-audit/tests/scenarios/01-tok-cascade.json @@ -0,0 +1,29 @@ +{ + "_meta": { + "comment": "Scenario 01: TOK CLAUDE.md cascade exceeds 10k tokens. Covers the TOK/CPS (token-efficiency) category. v5.0.0 title contains tier3 'CLAUDE.md' — humanizer rewrites to non-jargon prose." + }, + "findingId": "CA-TOK-001", + "scannerInput": { + "id": "CA-TOK-001", + "scanner": "TOK", + "severity": "high", + "title": "CLAUDE.md cascade exceeds 10k tokens per turn", + "description": "Total CLAUDE.md cascade is 12450 tokens across 4 files.", + "file": ".claude/CLAUDE.md", + "line": null, + "evidence": "tokens=12450; files=4", + "recommendation": "Reduce CLAUDE.md cascade size. Move content into modular skill files or trim verbose sections.", + "category": null, + "autoFixable": false + }, + "expectedHumanized": { + "titlePattern": "instruction files take a lot of space on every turn", + "descriptionPattern": "10,000 tokens|every turn carries that weight", + "recommendationPattern": "Trim or split the largest files" + }, + "groundTruth": { + "what": "The instruction files Claude reads on every turn are large enough that they slow each response.", + "why": "The combined size has gone above 10,000 tokens. That weight loads on every turn and leaves less room for the conversation itself.", + "whatNext": "Trim or split the largest files. The details show which file contributes most." + } +} diff --git a/plugins/config-audit/tests/scenarios/02-cps-volatile.json b/plugins/config-audit/tests/scenarios/02-cps-volatile.json new file mode 100644 index 0000000..e68c99d --- /dev/null +++ b/plugins/config-audit/tests/scenarios/02-cps-volatile.json @@ -0,0 +1,29 @@ +{ + "_meta": { + "comment": "Scenario 02: CPS volatile content inside cached prefix. Covers the CPS half of the TOK/CPS category. Tests that the humanizer explains cache-prefix-stability in user-facing language." + }, + "findingId": "CA-CPS-001", + "scannerInput": { + "id": "CA-CPS-001", + "scanner": "CPS", + "severity": "medium", + "title": "Volatile content inside cached prefix breaks reuse", + "description": "Volatile pattern matched at .claude/CLAUDE.md:42 (timestamp). Lines 31-150 form the cache prefix.", + "file": ".claude/CLAUDE.md", + "line": 42, + "evidence": "Pattern: timestamp; window: 31-150", + "recommendation": "Move volatile content (timestamps, session state) below line 150 or to a separate file.", + "category": null, + "autoFixable": false + }, + "expectedHumanized": { + "titlePattern": "Content that changes between turns sits in the part Claude tries to reuse", + "descriptionPattern": "fresh read every time|slows responses", + "recommendationPattern": "Move the changing content|150 lines" + }, + "groundTruth": { + "what": "Content that changes between turns is inside the part of the file Claude tries to reuse.", + "why": "Claude saves space by reusing the start of your instructions across turns. When that area changes, every turn re-reads the whole start, which slows responses.", + "whatNext": "Move the changing content (timestamps, session notes) below the first 150 lines, or out of the file entirely." + } +} diff --git a/plugins/config-audit/tests/scenarios/03-cnf-conflict.json b/plugins/config-audit/tests/scenarios/03-cnf-conflict.json new file mode 100644 index 0000000..18dd348 --- /dev/null +++ b/plugins/config-audit/tests/scenarios/03-cnf-conflict.json @@ -0,0 +1,29 @@ +{ + "_meta": { + "comment": "Scenario 03: CNF allow/deny conflict. Covers the conflicts category. v5.0.0 title contains tier3 'allow/deny' — humanizer rewrites with non-jargon language." + }, + "findingId": "CA-CNF-001", + "scannerInput": { + "id": "CA-CNF-001", + "scanner": "CNF", + "severity": "high", + "title": "Permission allow/deny conflict", + "description": "Tool 'Bash(git:*)' appears in both allow and deny lists at .claude/settings.json.", + "file": ".claude/settings.json", + "line": null, + "evidence": "tool=Bash(git:*); allow=true; deny=true", + "recommendation": "Remove the tool from either the allow or deny list to make the intent unambiguous.", + "category": null, + "autoFixable": false + }, + "expectedHumanized": { + "titlePattern": "let-in and shut-out by your permissions", + "descriptionPattern": "deny.*priority over an .*allow|looks like the tool is approved", + "recommendationPattern": "Remove either the .*allow.* or the .*deny" + }, + "groundTruth": { + "what": "A tool you have configured is both let-in and shut-out by your permission rules.", + "why": "A `deny` entry takes priority over an `allow`, so the `allow` does nothing — but the configuration looks like the tool is approved, which can mislead readers of the file.", + "whatNext": "Remove either the `allow` or the `deny` entry so the intent is unambiguous." + } +} diff --git a/plugins/config-audit/tests/scenarios/04-gap-no-claude-md.json b/plugins/config-audit/tests/scenarios/04-gap-no-claude-md.json new file mode 100644 index 0000000..8fcedf0 --- /dev/null +++ b/plugins/config-audit/tests/scenarios/04-gap-no-claude-md.json @@ -0,0 +1,29 @@ +{ + "_meta": { + "comment": "Scenario 04: GAP no CLAUDE.md file. Covers the feature-gap category. v5.0.0 title and recommendation contain tier3 'CLAUDE.md' — humanizer wraps the term in backticks." + }, + "findingId": "CA-GAP-001", + "scannerInput": { + "id": "CA-GAP-001", + "scanner": "GAP", + "severity": "medium", + "title": "No CLAUDE.md file", + "description": "No project instructions file detected.", + "file": null, + "line": null, + "evidence": null, + "recommendation": "Create a CLAUDE.md file with project-specific guidance.", + "category": "t1", + "autoFixable": false + }, + "expectedHumanized": { + "titlePattern": "haven'?t added project instructions for Claude", + "descriptionPattern": "highest-impact thing you can add|tells Claude how you work", + "recommendationPattern": "Create .*CLAUDE\\.md.*one-paragraph overview" + }, + "groundTruth": { + "what": "Your project doesn't have a top-level instructions file for Claude yet.", + "why": "A `CLAUDE.md` at the project root is the single highest-impact addition; it tells Claude how you work in this codebase so every session starts informed.", + "whatNext": "Create `CLAUDE.md` with a one-paragraph overview, common commands, and any conventions Claude should know." + } +} diff --git a/plugins/config-audit/tests/scenarios/05-set-invalid-json.json b/plugins/config-audit/tests/scenarios/05-set-invalid-json.json new file mode 100644 index 0000000..7827691 --- /dev/null +++ b/plugins/config-audit/tests/scenarios/05-set-invalid-json.json @@ -0,0 +1,29 @@ +{ + "_meta": { + "comment": "Scenario 05: SET invalid JSON in settings file. Covers the settings category AND the brief criterion 'one finding whose v5.0.0 description contains a tier1 forbidden word' — both the v5.0.0 title and description carry tier1 'invalid'. Humanizer rewrites to plain language." + }, + "findingId": "CA-SET-001", + "scannerInput": { + "id": "CA-SET-001", + "scanner": "SET", + "severity": "critical", + "title": "Invalid JSON in settings file", + "description": ".claude/settings.json contains invalid JSON and will be ignored by Claude Code.", + "file": ".claude/settings.json", + "line": null, + "evidence": "Unexpected token } in JSON at position 187", + "recommendation": "Fix JSON syntax errors. Use a JSON validator.", + "category": null, + "autoFixable": false + }, + "expectedHumanized": { + "titlePattern": "settings file isn'?t readable as JSON", + "descriptionPattern": "Claude Code can'?t parse the file|settings are skipped", + "recommendationPattern": "Open the file and fix the JSON syntax" + }, + "groundTruth": { + "what": "Your settings file can't be read as JSON, so none of the settings inside take effect.", + "why": "Claude Code parses the settings file once at startup. When that parse fails, all settings inside are skipped silently, and you get the defaults.", + "whatNext": "Open the file and fix the JSON syntax shown in the details (often a missing comma, an extra trailing comma, or an unterminated quote)." + } +}