feat(humanizer): forbidden-words data file (tier1/2/3)

Wave 1 / Step 1 of v5.1.0 plain-language UX humanizer. tests/lint-forbidden-words.json defines the SC-3 forbidden-words vocabulary used by the lint runner (Wave 4 / Step 8) and the humanizer-data translation guard (Wave 1 / Step 2). - Tier 1: 19 absolute prohibitions (failure if matched in default output) — sourced from Microsoft Writing Style Guide, Federal Plain Language, GOV.UK, Google Developer Style, Apple HIG. - Tier 2: 24 strong-avoidance terms (warning if matched) — same sources plus Mailchimp. - Tier 3: 12 domain-specific jargon terms (failure if matched in default output, allowed in --raw and --json paths) — sourced from research/03 jargon table. Counts diverge from plan.md (18/21/11) — JSON tracks the brief's verbatim lists at research/03 lines 200-202 plus tier3 hook entry from the brief's table. Plan revision noted in audit-doc. Test: 10 cases verifying parse, count, schema completeness, spot checks per tier, no cross-tier duplicates. All pass. Regression: 645/645 tests (635 + 10 new). Project: .claude/projects/2026-05-01-config-audit-ux-redesign/
2026-05-01 16:53:37 +02:00 · 2026-05-01 16:53:37 +02:00 · 8c07fe3493
commit 8c07fe3493
parent b9150d4927
2 changed files with 161 additions and 0 deletions
--- a/plugins/config-audit/tests/lib/forbidden-words-data.test.mjs
+++ b/plugins/config-audit/tests/lib/forbidden-words-data.test.mjs
@ -0,0 +1,97 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFile } from 'node:fs/promises';
+import { fileURLToPath } from 'node:url';
+import { dirname, resolve } from 'node:path';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const DATA_PATH = resolve(__dirname, '..', 'lint-forbidden-words.json');
+
+async function loadData() {
+  const raw = await readFile(DATA_PATH, 'utf8');
+  return JSON.parse(raw);
+}
+
+test('forbidden-words JSON parses successfully', async () => {
+  const data = await loadData();
+  assert.equal(typeof data, 'object');
+  assert.ok(data !== null);
+});
+
+test('top-level keys present (tier1, tier2, tier3)', async () => {
+  const data = await loadData();
+  assert.ok(Array.isArray(data.tier1), 'tier1 must be an array');
+  assert.ok(Array.isArray(data.tier2), 'tier2 must be an array');
+  assert.ok(Array.isArray(data.tier3), 'tier3 must be an array');
+});
+
+test('tier1 has 19 entries (verbatim from research/03 SC-3 list line 200)', async () => {
+  const data = await loadData();
+  assert.equal(data.tier1.length, 19, `expected 19 tier1 entries, got ${data.tier1.length}`);
+});
+
+test('tier2 has 24 entries (verbatim from research/03 SC-3 list line 201)', async () => {
+  const data = await loadData();
+  assert.equal(data.tier2.length, 24, `expected 24 tier2 entries, got ${data.tier2.length}`);
+});
+
+test('tier3 has 12 entries (verbatim from research/03 SC-3 list line 202 + hook)', async () => {
+  const data = await loadData();
+  assert.equal(data.tier3.length, 12, `expected 12 tier3 entries, got ${data.tier3.length}`);
+});
+
+test('every entry has required fields (word, replacement, source, tier)', async () => {
+  const data = await loadData();
+  for (const tierName of ['tier1', 'tier2', 'tier3']) {
+    for (const entry of data[tierName]) {
+      assert.ok(typeof entry.word === 'string' && entry.word.length > 0,
+        `${tierName} entry missing 'word': ${JSON.stringify(entry)}`);
+      assert.ok(typeof entry.replacement === 'string' && entry.replacement.length > 0,
+        `${tierName} entry "${entry.word}" missing 'replacement'`);
+      assert.ok(typeof entry.source === 'string' && entry.source.length > 0,
+        `${tierName} entry "${entry.word}" missing 'source'`);
+      assert.ok(entry.tier === Number(tierName.replace('tier', '')),
+        `${tierName} entry "${entry.word}" has wrong tier: ${entry.tier}`);
+    }
+  }
+});
+
+test('tier1 spot-check — required absolute prohibitions present', async () => {
+  const data = await loadData();
+  const words = data.tier1.map((e) => e.word);
+  for (const required of ['utilize', 'leverage', 'facilitate', 'terminate', 'abort', 'invalid', 'illegal', 'failed to', 'fatal', 'in order to']) {
+    assert.ok(words.includes(required), `tier1 missing required word: ${required}`);
+  }
+});
+
+test('tier2 spot-check — condescending words present', async () => {
+  const data = await loadData();
+  const words = data.tier2.map((e) => e.word);
+  for (const required of ['simply', 'just', 'obviously', 'clearly']) {
+    assert.ok(words.includes(required), `tier2 missing required word: ${required}`);
+  }
+});
+
+test('tier3 spot-check — domain-specific jargon present', async () => {
+  const data = await loadData();
+  const words = data.tier3.map((e) => e.word);
+  for (const required of ['CLAUDE.md', '@import', 'MCP', 'hook', 'frontmatter']) {
+    assert.ok(words.includes(required), `tier3 missing required word: ${required}`);
+  }
+});
+
+test('no duplicate words across tiers', async () => {
+  const data = await loadData();
+  const allWords = [
+    ...data.tier1.map((e) => e.word),
+    ...data.tier2.map((e) => e.word),
+    ...data.tier3.map((e) => e.word),
+  ];
+  const seen = new Set();
+  const dupes = [];
+  for (const w of allWords) {
+    if (seen.has(w)) dupes.push(w);
+    seen.add(w);
+  }
+  assert.equal(dupes.length, 0, `duplicate forbidden words across tiers: ${dupes.join(', ')}`);
+});
--- a/plugins/config-audit/tests/lint-forbidden-words.json
+++ b/plugins/config-audit/tests/lint-forbidden-words.json
@ -0,0 +1,64 @@
+{
+  "$schema_note": "SC-3 forbidden-words list. Tier 1 = failure if matched in default output; Tier 2 = warning; Tier 3 = failure (allowed in --raw and --json paths). Sources cite at least one official style guide per term. Generated for config-audit v5.1.0 humanizer.",
+  "tier1": [
+    { "word": "utilize", "replacement": "use", "source": "Microsoft Writing Style Guide; Federal Plain Language; GOV.UK; 18F", "tier": 1 },
+    { "word": "utilization", "replacement": "use", "source": "Microsoft Writing Style Guide; Federal Plain Language; GOV.UK; 18F", "tier": 1 },
+    { "word": "leverage", "replacement": "use, build on", "source": "Microsoft; GOV.UK; Google Developer Style; 18F", "tier": 1 },
+    { "word": "facilitate", "replacement": "help", "source": "Microsoft; Federal Plain Language; GOV.UK", "tier": 1 },
+    { "word": "terminate", "replacement": "stop, end", "source": "Microsoft UX error guide; Federal Plain Language", "tier": 1 },
+    { "word": "abort", "replacement": "stop, cancel, exit", "source": "Google Developer Style; Microsoft", "tier": 1 },
+    { "word": "invalid", "replacement": "incorrect, or describe the problem", "source": "Microsoft UX error guide (explicit); Apple HIG", "tier": 1 },
+    { "word": "illegal", "replacement": "incorrect", "source": "Microsoft UX error guide (explicit)", "tier": 1 },
+    { "word": "failed to", "replacement": "couldn't, unable to", "source": "Microsoft UX error guide (explicit); Federal Plain Language", "tier": 1 },
+    { "word": "catastrophic", "replacement": "serious", "source": "Microsoft UX error guide (explicit)", "tier": 1 },
+    { "word": "fatal", "replacement": "serious", "source": "Microsoft UX error guide (explicit)", "tier": 1 },
+    { "word": "in order to", "replacement": "to", "source": "Federal Plain Language; GOV.UK; Microsoft", "tier": 1 },
+    { "word": "prior to", "replacement": "before", "source": "Federal Plain Language; GOV.UK", "tier": 1 },
+    { "word": "commence", "replacement": "start, begin", "source": "Federal Plain Language; 18F", "tier": 1 },
+    { "word": "endeavor", "replacement": "try", "source": "Federal Plain Language; Microsoft", "tier": 1 },
+    { "word": "attempt", "replacement": "try", "source": "Federal Plain Language; Microsoft", "tier": 1 },
+    { "word": "oops", "replacement": "(omit)", "source": "Microsoft UX; Apple HIG; Dynamics 365", "tier": 1 },
+    { "word": "whoops", "replacement": "(omit)", "source": "Microsoft UX; Apple HIG; Dynamics 365", "tier": 1 },
+    { "word": "hmm", "replacement": "(omit)", "source": "Microsoft UX; Dynamics 365", "tier": 1 }
+  ],
+  "tier2": [
+    { "word": "simply", "replacement": "(omit), or 'straightforward'", "source": "Google Developer Style; Microsoft", "tier": 2 },
+    { "word": "just", "replacement": "(omit)", "source": "Google Developer Style; Microsoft", "tier": 2 },
+    { "word": "obviously", "replacement": "(omit)", "source": "Google Developer Style; Microsoft", "tier": 2 },
+    { "word": "clearly", "replacement": "(omit)", "source": "Google Developer Style; Microsoft", "tier": 2 },
+    { "word": "please", "replacement": "(omit in routine output; reserve for genuine inconvenience)", "source": "Microsoft UX; Mailchimp", "tier": 2 },
+    { "word": "sorry", "replacement": "(omit in routine output; reserve for serious failure)", "source": "Microsoft UX; Mailchimp", "tier": 2 },
+    { "word": "actionable", "replacement": "state the action directly", "source": "Microsoft; Federal Plain Language", "tier": 2 },
+    { "word": "functionality", "replacement": "features, capabilities", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "currently", "replacement": "(omit when redundant)", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "note that", "replacement": "(omit)", "source": "Federal Plain Language; Mailchimp", "tier": 2 },
+    { "word": "at this time", "replacement": "(omit), or specific time", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "allows you to", "replacement": "lets you", "source": "Microsoft; Mailchimp", "tier": 2 },
+    { "word": "ensure", "replacement": "make sure", "source": "Federal Plain Language; Mailchimp", "tier": 2 },
+    { "word": "impact", "replacement": "affect (when used as a verb)", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "methodology", "replacement": "method", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "parameters", "replacement": "limits, or specific name (in prose)", "source": "Federal Plain Language; GOV.UK", "tier": 2 },
+    { "word": "subsequent", "replacement": "next, later", "source": "Federal Plain Language; GOV.UK", "tier": 2 },
+    { "word": "sufficient", "replacement": "enough", "source": "Federal Plain Language; GOV.UK", "tier": 2 },
+    { "word": "numerous", "replacement": "many", "source": "Federal Plain Language; GOV.UK", "tier": 2 },
+    { "word": "assist", "replacement": "help", "source": "Federal Plain Language; GOV.UK", "tier": 2 },
+    { "word": "perform", "replacement": "do, or specific verb (when generic)", "source": "Federal Plain Language; Microsoft", "tier": 2 },
+    { "word": "quite", "replacement": "(omit)", "source": "GOV.UK; Mailchimp", "tier": 2 },
+    { "word": "very", "replacement": "(omit, or use a stronger word)", "source": "GOV.UK; Mailchimp", "tier": 2 },
+    { "word": "really", "replacement": "(omit)", "source": "GOV.UK; Mailchimp", "tier": 2 }
+  ],
+  "tier3": [
+    { "word": "CLAUDE.md", "replacement": "your project's instructions to Claude, or 'the configuration file'", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "@import", "replacement": "links to another file, or 'this file pulls in'", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "prompt cache", "replacement": "Claude's memory of your setup between turns", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "prompt-cache", "replacement": "Claude's memory of your setup between turns", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "allow/deny", "replacement": "can / cannot use", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "severity", "replacement": "how urgent, or 'impact'", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "finding ID", "replacement": "lead with prose; ID at end-of-line for searchability", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "MCP", "replacement": "external tool, or 'Claude's connection to [service]'", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "hook", "replacement": "automation that runs when [event]", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "frontmatter", "replacement": "the settings at the top of the file", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "schema", "replacement": "expected format, or 'structure'", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 },
+    { "word": "scanner", "replacement": "check, or 'the part that looks for X' (in user-facing prose)", "source": "config-audit research/03 Tier 3 jargon table", "tier": 3 }
+  ]
+}