feat(humanizer): scenario read-test corpus + runner (SC-4) [skip-docs]
Step 9 of v5.1.0 humanizer Wave 4. Adds tests/scenario-read-test.mjs
runner, tests/scenario-read-test.test.mjs wrapper, and 5 scenario
fixtures in tests/scenarios/ that feed deterministic raw findings
through humanizeFinding and assert the humanized
title/description/recommendation match brief-owner-approved regex
patterns encoding the ground-truth what/why/whatNext answers.
Corpus selection (per brief criteria):
- 01-tok-cascade.json - TOK/CPS category (token efficiency)
- 02-cps-volatile.json - TOK/CPS category (cache prefix stability)
- 03-cnf-conflict.json - CNF category (conflicts)
- 04-gap-no-claude-md.json - GAP category (feature gap)
- 05-set-invalid-json.json - SET category, AND its v5.0.0 title +
description carry tier1 'invalid' (the brief criterion 'one finding
whose v5.0.0 description uses a forbidden word').
Runner mechanics:
- Loads scenarios matching ^\\d{2}-[a-z0-9-]+\\.json$ in sorted order.
- Calls humanizeFinding(scannerInput) and matches each humanized field
against its declared pattern (case-insensitive regex).
- Verifies humanizer-added structural fields (userImpactCategory,
userActionLanguage, relevanceContext) are non-empty strings.
- Per session decision (1a) acceptance is deterministic regex matching
without a runtime human approval gate.
Wrapper adds 3 tests: scenario-match (binds runner to node --test),
category-coverage (TOK/CPS, CNF, GAP, SET all present), and
tier1-presence (at least one v5.0.0 title or description contains a
tier1 forbidden word).
Tests: 736 to 739 (+3 SC-4 tests). Full suite passes.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
c5c937e94e
commit
8b146bf489
7 changed files with 373 additions and 0 deletions
141
plugins/config-audit/tests/scenario-read-test.mjs
Normal file
141
plugins/config-audit/tests/scenario-read-test.mjs
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* SC-4 scenario read-test runner.
|
||||
*
|
||||
* Loads each scenario in tests/scenarios/0[1-9]-*.json, feeds the
|
||||
* `scannerInput` into `humanizeFinding`, and asserts that humanized
|
||||
* `title` / `description` / `recommendation` match the regex patterns
|
||||
* declared in `expectedHumanized`. The patterns encode the
|
||||
* brief-owner-approved ground-truth answers ("what / why / what next")
|
||||
* so that passing the deterministic regex match is equivalent to the
|
||||
* humanized output answering the three questions a reader would ask.
|
||||
*
|
||||
* Per brief-owner decision (1a) the gate is deterministic regex
|
||||
* matching — no human-in-the-loop step at runtime.
|
||||
*
|
||||
* Exit 0 = PASS (all scenarios match), exit 1 = FAIL.
|
||||
*
|
||||
* Usage:
|
||||
* node tests/scenario-read-test.mjs
|
||||
*/
|
||||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { humanizeFinding } from '../scanners/lib/humanizer.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const SCENARIOS_DIR = resolve(__dirname, 'scenarios');
|
||||
|
||||
async function loadScenarios() {
|
||||
const entries = await readdir(SCENARIOS_DIR);
|
||||
const files = entries
|
||||
.filter((f) => /^\d{2}-[a-z0-9-]+\.json$/.test(f))
|
||||
.sort();
|
||||
const scenarios = [];
|
||||
for (const f of files) {
|
||||
const raw = await readFile(join(SCENARIOS_DIR, f), 'utf8');
|
||||
scenarios.push({ file: f, body: JSON.parse(raw) });
|
||||
}
|
||||
return scenarios;
|
||||
}
|
||||
|
||||
function checkPattern(field, value, pattern) {
|
||||
if (typeof value !== 'string') {
|
||||
return { ok: false, reason: `${field} is not a string (got ${typeof value})` };
|
||||
}
|
||||
let re;
|
||||
try {
|
||||
re = new RegExp(pattern, 'i');
|
||||
} catch (err) {
|
||||
return { ok: false, reason: `${field} pattern is not a valid regex: ${err.message}` };
|
||||
}
|
||||
if (!re.test(value)) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: `${field} did not match /${pattern}/i\n actual: ${JSON.stringify(value)}`,
|
||||
};
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run one scenario through humanizeFinding and return per-scenario result.
|
||||
*/
|
||||
export function runOne(scenario) {
|
||||
const { findingId, scannerInput, expectedHumanized } = scenario.body;
|
||||
const humanized = humanizeFinding(scannerInput);
|
||||
|
||||
const failures = [];
|
||||
for (const [field, key] of [
|
||||
['title', 'titlePattern'],
|
||||
['description', 'descriptionPattern'],
|
||||
['recommendation', 'recommendationPattern'],
|
||||
]) {
|
||||
const pattern = expectedHumanized?.[key];
|
||||
if (typeof pattern !== 'string' || pattern.length === 0) {
|
||||
failures.push({ field, reason: `missing or empty pattern key "${key}"` });
|
||||
continue;
|
||||
}
|
||||
const r = checkPattern(field, humanized?.[field], pattern);
|
||||
if (!r.ok) failures.push({ field, reason: r.reason });
|
||||
}
|
||||
|
||||
// Sanity: humanizer-added structural fields must be present
|
||||
for (const sysField of ['userImpactCategory', 'userActionLanguage', 'relevanceContext']) {
|
||||
if (typeof humanized?.[sysField] !== 'string' || humanized[sysField].length === 0) {
|
||||
failures.push({
|
||||
field: sysField,
|
||||
reason: `expected non-empty string from humanizer; got ${JSON.stringify(humanized?.[sysField])}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { file: scenario.file, findingId, humanized, failures };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run every scenario, returning aggregate results.
|
||||
*/
|
||||
export async function runAll() {
|
||||
const scenarios = await loadScenarios();
|
||||
const results = scenarios.map(runOne);
|
||||
const failed = results.filter((r) => r.failures.length > 0);
|
||||
return { scenarios: results, failed, passed: results.length - failed.length, total: results.length };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { scenarios, failed, passed, total } = await runAll();
|
||||
|
||||
if (total === 0) {
|
||||
process.stderr.write('SC-4 FAIL: no scenarios found in tests/scenarios/\n');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (failed.length === 0) {
|
||||
process.stderr.write(
|
||||
`SC-4 PASS: ${passed}/${total} scenarios match humanizer output\n`,
|
||||
);
|
||||
for (const r of scenarios) {
|
||||
process.stderr.write(` ${r.file} (${r.findingId}) - OK\n`);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
process.stderr.write(`SC-4 FAIL: ${failed.length}/${total} scenarios did not match\n`);
|
||||
for (const r of failed) {
|
||||
process.stderr.write(`\n ${r.file} (${r.findingId})\n`);
|
||||
for (const f of r.failures) {
|
||||
process.stderr.write(` [${f.field}] ${f.reason}\n`);
|
||||
}
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const isDirectRun =
|
||||
process.argv[1] && resolve(process.argv[1]) === resolve(new URL(import.meta.url).pathname);
|
||||
if (isDirectRun) {
|
||||
main().catch((err) => {
|
||||
process.stderr.write(`Scenario runner error: ${err.message}\n`);
|
||||
process.exit(2);
|
||||
});
|
||||
}
|
||||
87
plugins/config-audit/tests/scenario-read-test.test.mjs
Normal file
87
plugins/config-audit/tests/scenario-read-test.test.mjs
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { runAll } from './scenario-read-test.mjs';
|
||||
|
||||
describe('SC-4 scenario read-test (humanizer corpus)', () => {
|
||||
it('matches all scenarios in tests/scenarios/', async () => {
|
||||
const { scenarios, failed, passed, total } = await runAll();
|
||||
|
||||
assert.ok(total >= 5, `expected at least 5 scenarios in corpus, got ${total}`);
|
||||
|
||||
if (failed.length === 0) {
|
||||
assert.equal(passed, total);
|
||||
return;
|
||||
}
|
||||
|
||||
const summary = failed
|
||||
.map((r) => {
|
||||
const reasons = r.failures
|
||||
.map((f) => ` [${f.field}] ${f.reason}`)
|
||||
.join('\n');
|
||||
return ` ${r.file} (${r.findingId})\n${reasons}`;
|
||||
})
|
||||
.join('\n\n');
|
||||
|
||||
assert.fail(
|
||||
`SC-4: ${failed.length}/${total} scenarios did not match humanizer output\n\n${summary}`,
|
||||
);
|
||||
void scenarios; // referenced to satisfy lints if helper expands
|
||||
});
|
||||
|
||||
it('covers required scanner categories (TOK/CPS, CNF, GAP, SET)', async () => {
|
||||
const { scenarios } = await runAll();
|
||||
const seen = new Set();
|
||||
for (const r of scenarios) {
|
||||
const prefix = r.findingId.split('-')[1];
|
||||
seen.add(prefix);
|
||||
}
|
||||
// TOK and CPS together cover the "wasted tokens" category — at least one must appear.
|
||||
const hasTokenCategory = seen.has('TOK') || seen.has('CPS');
|
||||
assert.ok(hasTokenCategory, `corpus must include at least one TOK or CPS finding; saw ${[...seen].join(', ')}`);
|
||||
assert.ok(seen.has('CNF'), `corpus must include at least one CNF finding; saw ${[...seen].join(', ')}`);
|
||||
assert.ok(seen.has('GAP'), `corpus must include at least one GAP finding; saw ${[...seen].join(', ')}`);
|
||||
assert.ok(seen.has('SET'), `corpus must include at least one SET finding; saw ${[...seen].join(', ')}`);
|
||||
});
|
||||
|
||||
it('includes at least one scenario whose v5.0.0 description carries a tier1 forbidden word', async () => {
|
||||
const { scenarios } = await runAll();
|
||||
// Read the forbidden-words file at runtime so this assertion stays in sync
|
||||
// with the source of truth (Wave 1 Step 1 artifact).
|
||||
const { readFile } = await import('node:fs/promises');
|
||||
const { resolve, dirname } = await import('node:path');
|
||||
const { fileURLToPath } = await import('node:url');
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const forbiddenPath = resolve(__dirname, 'lint-forbidden-words.json');
|
||||
const forbidden = JSON.parse(await readFile(forbiddenPath, 'utf8'));
|
||||
const tier1 = forbidden.tier1.map((e) => e.word);
|
||||
|
||||
const matchesTier1 = (text) => {
|
||||
if (typeof text !== 'string') return false;
|
||||
return tier1.some((word) => {
|
||||
const lower = word.toLowerCase();
|
||||
const escaped = lower.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const re = /[ \-./]/.test(lower)
|
||||
? new RegExp(escaped, 'i')
|
||||
: new RegExp(`\\b${escaped}\\b`, 'i');
|
||||
return re.test(text);
|
||||
});
|
||||
};
|
||||
|
||||
let found = false;
|
||||
for (const scenario of scenarios) {
|
||||
const file = scenario.file;
|
||||
const path = resolve(__dirname, 'scenarios', file);
|
||||
const body = JSON.parse(await readFile(path, 'utf8'));
|
||||
const desc = body?.scannerInput?.description ?? '';
|
||||
const title = body?.scannerInput?.title ?? '';
|
||||
if (matchesTier1(desc) || matchesTier1(title)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert.ok(
|
||||
found,
|
||||
'corpus must include at least one scenario whose v5.0.0 title or description contains a tier1 forbidden word',
|
||||
);
|
||||
});
|
||||
});
|
||||
29
plugins/config-audit/tests/scenarios/01-tok-cascade.json
Normal file
29
plugins/config-audit/tests/scenarios/01-tok-cascade.json
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_meta": {
|
||||
"comment": "Scenario 01: TOK CLAUDE.md cascade exceeds 10k tokens. Covers the TOK/CPS (token-efficiency) category. v5.0.0 title contains tier3 'CLAUDE.md' — humanizer rewrites to non-jargon prose."
|
||||
},
|
||||
"findingId": "CA-TOK-001",
|
||||
"scannerInput": {
|
||||
"id": "CA-TOK-001",
|
||||
"scanner": "TOK",
|
||||
"severity": "high",
|
||||
"title": "CLAUDE.md cascade exceeds 10k tokens per turn",
|
||||
"description": "Total CLAUDE.md cascade is 12450 tokens across 4 files.",
|
||||
"file": ".claude/CLAUDE.md",
|
||||
"line": null,
|
||||
"evidence": "tokens=12450; files=4",
|
||||
"recommendation": "Reduce CLAUDE.md cascade size. Move content into modular skill files or trim verbose sections.",
|
||||
"category": null,
|
||||
"autoFixable": false
|
||||
},
|
||||
"expectedHumanized": {
|
||||
"titlePattern": "instruction files take a lot of space on every turn",
|
||||
"descriptionPattern": "10,000 tokens|every turn carries that weight",
|
||||
"recommendationPattern": "Trim or split the largest files"
|
||||
},
|
||||
"groundTruth": {
|
||||
"what": "The instruction files Claude reads on every turn are large enough that they slow each response.",
|
||||
"why": "The combined size has gone above 10,000 tokens. That weight loads on every turn and leaves less room for the conversation itself.",
|
||||
"whatNext": "Trim or split the largest files. The details show which file contributes most."
|
||||
}
|
||||
}
|
||||
29
plugins/config-audit/tests/scenarios/02-cps-volatile.json
Normal file
29
plugins/config-audit/tests/scenarios/02-cps-volatile.json
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_meta": {
|
||||
"comment": "Scenario 02: CPS volatile content inside cached prefix. Covers the CPS half of the TOK/CPS category. Tests that the humanizer explains cache-prefix-stability in user-facing language."
|
||||
},
|
||||
"findingId": "CA-CPS-001",
|
||||
"scannerInput": {
|
||||
"id": "CA-CPS-001",
|
||||
"scanner": "CPS",
|
||||
"severity": "medium",
|
||||
"title": "Volatile content inside cached prefix breaks reuse",
|
||||
"description": "Volatile pattern matched at .claude/CLAUDE.md:42 (timestamp). Lines 31-150 form the cache prefix.",
|
||||
"file": ".claude/CLAUDE.md",
|
||||
"line": 42,
|
||||
"evidence": "Pattern: timestamp; window: 31-150",
|
||||
"recommendation": "Move volatile content (timestamps, session state) below line 150 or to a separate file.",
|
||||
"category": null,
|
||||
"autoFixable": false
|
||||
},
|
||||
"expectedHumanized": {
|
||||
"titlePattern": "Content that changes between turns sits in the part Claude tries to reuse",
|
||||
"descriptionPattern": "fresh read every time|slows responses",
|
||||
"recommendationPattern": "Move the changing content|150 lines"
|
||||
},
|
||||
"groundTruth": {
|
||||
"what": "Content that changes between turns is inside the part of the file Claude tries to reuse.",
|
||||
"why": "Claude saves space by reusing the start of your instructions across turns. When that area changes, every turn re-reads the whole start, which slows responses.",
|
||||
"whatNext": "Move the changing content (timestamps, session notes) below the first 150 lines, or out of the file entirely."
|
||||
}
|
||||
}
|
||||
29
plugins/config-audit/tests/scenarios/03-cnf-conflict.json
Normal file
29
plugins/config-audit/tests/scenarios/03-cnf-conflict.json
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_meta": {
|
||||
"comment": "Scenario 03: CNF allow/deny conflict. Covers the conflicts category. v5.0.0 title contains tier3 'allow/deny' — humanizer rewrites with non-jargon language."
|
||||
},
|
||||
"findingId": "CA-CNF-001",
|
||||
"scannerInput": {
|
||||
"id": "CA-CNF-001",
|
||||
"scanner": "CNF",
|
||||
"severity": "high",
|
||||
"title": "Permission allow/deny conflict",
|
||||
"description": "Tool 'Bash(git:*)' appears in both allow and deny lists at .claude/settings.json.",
|
||||
"file": ".claude/settings.json",
|
||||
"line": null,
|
||||
"evidence": "tool=Bash(git:*); allow=true; deny=true",
|
||||
"recommendation": "Remove the tool from either the allow or deny list to make the intent unambiguous.",
|
||||
"category": null,
|
||||
"autoFixable": false
|
||||
},
|
||||
"expectedHumanized": {
|
||||
"titlePattern": "let-in and shut-out by your permissions",
|
||||
"descriptionPattern": "deny.*priority over an .*allow|looks like the tool is approved",
|
||||
"recommendationPattern": "Remove either the .*allow.* or the .*deny"
|
||||
},
|
||||
"groundTruth": {
|
||||
"what": "A tool you have configured is both let-in and shut-out by your permission rules.",
|
||||
"why": "A `deny` entry takes priority over an `allow`, so the `allow` does nothing — but the configuration looks like the tool is approved, which can mislead readers of the file.",
|
||||
"whatNext": "Remove either the `allow` or the `deny` entry so the intent is unambiguous."
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_meta": {
|
||||
"comment": "Scenario 04: GAP no CLAUDE.md file. Covers the feature-gap category. v5.0.0 title and recommendation contain tier3 'CLAUDE.md' — humanizer wraps the term in backticks."
|
||||
},
|
||||
"findingId": "CA-GAP-001",
|
||||
"scannerInput": {
|
||||
"id": "CA-GAP-001",
|
||||
"scanner": "GAP",
|
||||
"severity": "medium",
|
||||
"title": "No CLAUDE.md file",
|
||||
"description": "No project instructions file detected.",
|
||||
"file": null,
|
||||
"line": null,
|
||||
"evidence": null,
|
||||
"recommendation": "Create a CLAUDE.md file with project-specific guidance.",
|
||||
"category": "t1",
|
||||
"autoFixable": false
|
||||
},
|
||||
"expectedHumanized": {
|
||||
"titlePattern": "haven'?t added project instructions for Claude",
|
||||
"descriptionPattern": "highest-impact thing you can add|tells Claude how you work",
|
||||
"recommendationPattern": "Create .*CLAUDE\\.md.*one-paragraph overview"
|
||||
},
|
||||
"groundTruth": {
|
||||
"what": "Your project doesn't have a top-level instructions file for Claude yet.",
|
||||
"why": "A `CLAUDE.md` at the project root is the single highest-impact addition; it tells Claude how you work in this codebase so every session starts informed.",
|
||||
"whatNext": "Create `CLAUDE.md` with a one-paragraph overview, common commands, and any conventions Claude should know."
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"_meta": {
|
||||
"comment": "Scenario 05: SET invalid JSON in settings file. Covers the settings category AND the brief criterion 'one finding whose v5.0.0 description contains a tier1 forbidden word' — both the v5.0.0 title and description carry tier1 'invalid'. Humanizer rewrites to plain language."
|
||||
},
|
||||
"findingId": "CA-SET-001",
|
||||
"scannerInput": {
|
||||
"id": "CA-SET-001",
|
||||
"scanner": "SET",
|
||||
"severity": "critical",
|
||||
"title": "Invalid JSON in settings file",
|
||||
"description": ".claude/settings.json contains invalid JSON and will be ignored by Claude Code.",
|
||||
"file": ".claude/settings.json",
|
||||
"line": null,
|
||||
"evidence": "Unexpected token } in JSON at position 187",
|
||||
"recommendation": "Fix JSON syntax errors. Use a JSON validator.",
|
||||
"category": null,
|
||||
"autoFixable": false
|
||||
},
|
||||
"expectedHumanized": {
|
||||
"titlePattern": "settings file isn'?t readable as JSON",
|
||||
"descriptionPattern": "Claude Code can'?t parse the file|settings are skipped",
|
||||
"recommendationPattern": "Open the file and fix the JSON syntax"
|
||||
},
|
||||
"groundTruth": {
|
||||
"what": "Your settings file can't be read as JSON, so none of the settings inside take effect.",
|
||||
"why": "Claude Code parses the settings file once at startup. When that parse fails, all settings inside are skipped silently, and you get the defaults.",
|
||||
"whatNext": "Open the file and fix the JSON syntax shown in the details (often a missing comma, an extra trailing comma, or an unterminated quote)."
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue