From ec4ac3e6d1f3cf6ff2e5bbb5bd9239724b027369 Mon Sep 17 00:00:00 2001
From: Kjell Tore Guttormsen <ktg@humanize.no>
Date: Fri, 1 May 2026 19:53:59 +0200
Subject: [PATCH] feat(humanizer): update agent system prompts [skip-docs]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 Step 16 — final wave step. Threads humanizer-aware rendering
rules through the three agent prompts that produce user-facing output,
and adds a shape test that locks the structure.

- agents/analyzer-agent.md: documents the humanizer envelope shape
  (userImpactCategory, userActionLanguage, relevanceContext) in the
  Input section; new "Humanizer-aware rendering rules" subsection
  instructs the agent to: render humanized title/description/
  recommendation verbatim, group findings by userImpactCategory, lead
  each line with userActionLanguage, surface relevanceContext when
  not affects-everyone, and skip jargon-translation subroutines.
  --raw fallback documented (v5.0.0 verbatim severity prefiks).
- agents/planner-agent.md: documents the same vocabulary; instructs
  the planner to consume humanized fields from the analysis report,
  preserve titles verbatim, and order actions by both dependencies
  AND userActionLanguage urgency. Translation duties explicitly
  removed from the plan.
- agents/feature-gap-agent.md: replaces the inline t1/t2/t3/t4
  tier-to-prose section ladder with userActionLanguage-driven
  groupings ("Fix soon" → High Impact, "Fix when convenient" →
  Worth Considering, "Optional cleanup"/"FYI" → Explore When Ready);
  instructs skipping findings whose relevanceContext is
  test-fixture-no-impact; --raw fallback documented.

tests/agents/agent-prompt-shape.test.mjs (new, +6 tests, 786 → 792):
  - structural: humanized field reference + frontmatter preserved
  - per-agent anchors: analyzer groups by userImpactCategory; planner
    orders by userActionLanguage; feature-gap references
    test-fixture-no-impact
  - global: no "explain what {jargon} means" / "translate jargon" /
    "jargon-translation duty" prose anywhere

Self-audit: Grade A unchanged (config 97/100, plugin 100/100).
---
 plugins/config-audit/agents/analyzer-agent.md | 19 ++++-
 .../config-audit/agents/feature-gap-agent.md  | 43 +++++-----
 plugins/config-audit/agents/planner-agent.md  | 19 ++++-
 .../tests/agents/agent-prompt-shape.test.mjs  | 82 +++++++++++++++++++
 4 files changed, 136 insertions(+), 27 deletions(-)
 create mode 100644 plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs

diff --git a/plugins/config-audit/agents/analyzer-agent.md b/plugins/config-audit/agents/analyzer-agent.md
index f2478cc..7018314 100644
--- a/plugins/config-audit/agents/analyzer-agent.md
+++ b/plugins/config-audit/agents/analyzer-agent.md
@@ -27,12 +27,23 @@ Analyze all discovered configuration files to:
 You will receive:
 1. Session ID with findings in `~/.claude/config-audit/sessions/{session-id}/findings/`
 2. Scope configuration from `~/.claude/config-audit/sessions/{session-id}/scope.yaml`
-3. Scanner JSON envelope (if available) from scan-orchestrator.mjs
-4. Knowledge base at `{CLAUDE_PLUGIN_ROOT}/knowledge/` for best practices and anti-patterns
+3. Scanner JSON envelope (if available) from scan-orchestrator.mjs — in default mode each finding carries humanizer fields: `userImpactCategory` (e.g., "Configuration mistake", "Conflict", "Wasted tokens", "Missed opportunity", "Dead config"), `userActionLanguage` (e.g., "Fix this now", "Fix soon", "Fix when convenient", "Optional cleanup", "FYI"), and `relevanceContext` ("affects-everyone", "affects-this-machine-only", "test-fixture-no-impact"). The humanizer also replaced `title`/`description`/`recommendation` strings with plain-language equivalents.
+4. Mode flag — when `$RAW_FLAG` is `--raw`, the envelope is v5.0.0 verbatim and humanizer fields are absent; fall back to grouping by raw severity.
+5. Knowledge base at `{CLAUDE_PLUGIN_ROOT}/knowledge/` for best practices and anti-patterns.
+
+## Humanizer-aware rendering rules
+
+- **Render the humanizer's `title`/`description`/`recommendation` verbatim.** Do not paraphrase. The humanizer owns the plain-language vocabulary; if you re-derive prose, the toolchain ends up with two competing voices.
+- **Group findings by `userImpactCategory`.** This replaces severity-bucket grouping in the report. The categories are pre-translated — do not invent your own bucket names.
+- **Lead each finding line with `userActionLanguage`.** This replaces raw severity prefiks ("critical", "high", "medium") in the report. Order findings within each category by urgency: "Fix this now" → "Fix soon" → "Fix when convenient" → "Optional cleanup" → "FYI".
+- **Surface `relevanceContext` when it isn't `affects-everyone`.** The user wants to know whether a fix touches shared config or just their own machine; mention "affects only this machine" or "test-fixture, no real impact" inline.
+- **Do not include "explain what X means" subroutines.** Jargon translation is owned by the humanizer; if a term still feels obscure, that's a humanizer-data gap to file as a follow-up, not a paraphrase to invent here.
+
+In `--raw` mode, fall back to v5.0.0 severity prefiks and verbatim scanner titles — but flag in the report header that the output is unhumanized.
 
 ## Task
 
-1. **Load all findings**: Read all `*.yaml` files from findings directory
+1. **Load all findings**: Use the Read tool on all `*.yaml` files from findings directory
 1.5. **Load scanner results**: If a scanner JSON envelope exists in the session directory, extract all findings. Cross-reference against `knowledge/anti-patterns.md` to add remediation context. Note any CA-{prefix}-NNN finding IDs in the report.
 2. **Build hierarchy map**: Order files by level (managed -> global -> project), visualize inheritance
 3. **Detect conflicts**: Compare settings across hierarchy levels, note which level wins
@@ -40,7 +51,7 @@ You will receive:
 5. **Identify optimizations**: Rules to globalize, missing configs, orphaned files
 6. **Security scan**: Aggregate secret warnings, check for insecure patterns
 7. **CLAUDE.md quality assessment**: Score each file against rubric, assign letter grades
-8. **Generate report**: Write comprehensive markdown report
+8. **Generate report**: Write comprehensive markdown report — group findings by `userImpactCategory`, lead with `userActionLanguage`
 
 ## Output
 
diff --git a/plugins/config-audit/agents/feature-gap-agent.md b/plugins/config-audit/agents/feature-gap-agent.md
index cedcee4..31f690b 100644
--- a/plugins/config-audit/agents/feature-gap-agent.md
+++ b/plugins/config-audit/agents/feature-gap-agent.md
@@ -19,10 +19,17 @@ You receive posture assessment data (JSON) containing:
 - `areas` — per-scanner grades (10 quality areas incl. Token Efficiency, Plugin Hygiene, + Feature Coverage)
 - `overallGrade` — health grade (quality areas only)
 - `opportunityCount` — number of unused features detected
-- `scannerEnvelope` — full scanner results including GAP findings
+- `scannerEnvelope` — full scanner results. In default mode each GAP finding carries humanizer fields: `userImpactCategory` ("Missed opportunity"), `userActionLanguage` ("Fix soon", "Fix when convenient", "Optional cleanup", "FYI"), and `relevanceContext`. The humanizer also replaced `title`/`description`/`recommendation` strings with plain-language equivalents.
 
 You also receive project context: language, file count, existing configuration.
 
+## Humanizer-aware rendering rules
+
+- **Render the humanizer's `title`/`description`/`recommendation` verbatim.** Do not paraphrase. The humanizer owns the plain-language vocabulary.
+- **Drive prioritization with `userActionLanguage`, not raw category tiers.** "Fix soon" → "Fix when convenient" → "Optional cleanup" → "FYI" replaces the t1/t2/t3/t4 tier ladder for output ordering.
+- **Skip findings with `relevanceContext === "test-fixture-no-impact"`** unless the user explicitly asked to include fixtures.
+- **Do not include "explain what X means" subroutines.** The category labels ("Missed opportunity") are pre-translated.
+
 ## Knowledge Files
 
 Read **at most 3** of these files from the plugin's `knowledge/` directory:
@@ -36,6 +43,8 @@ Write `feature-gap-report.md` to the session directory. Max 200 lines.
 
 ### Report Structure
 
+Group findings by `userActionLanguage` rather than by raw category tier. Render the humanizer's `title` and `recommendation` verbatim — the humanizer has already produced plain-language equivalents.
+
 ```markdown
 # Feature Opportunities
 
@@ -47,38 +56,34 @@ Write `feature-gap-report.md` to the session directory. Max 200 lines.
 
 ## High Impact
 
-These address correctness or security — consider them seriously.
+[Findings where userActionLanguage is "Fix soon" — these address correctness or security; consider them seriously.]
 
-→ **[feature name]**
-  Why: [evidence-backed reason, cite Anthropic docs or proven issues]
-  How: [2-3 concrete steps]
-
-[Repeat for each T1 finding]
+→ **[humanized title verbatim]**
+  Why: [humanized description verbatim, plus "relevant because your project has X" context]
+  How: [humanized recommendation verbatim, broken into 2-3 concrete steps from gap-closure-templates.md]
 
 ## Worth Considering
 
-These improve workflow efficiency for projects like yours.
+[Findings where userActionLanguage is "Fix when convenient" — these improve workflow efficiency for projects like yours.]
 
-→ **[feature name]**
-  Why: [reason, with "relevant because your project has X"]
-  How: [2-3 concrete steps]
-
-[Repeat for each T2 finding]
+→ **[humanized title verbatim]**
+  Why: [humanized description verbatim, plus relevance context]
+  How: [humanized recommendation verbatim, broken into 2-3 concrete steps]
 
 ## Explore When Ready
 
-Nice-to-have features. Skip these if your current setup works well.
+[Findings where userActionLanguage is "Optional cleanup" or "FYI" — nice-to-have, skip if current setup works well.]
 
-→ **[feature name]**
-  Why: [brief reason]
-
-[Repeat for T3/T4 findings, keep brief]
+→ **[humanized title verbatim]**
+  Why: [humanized description verbatim, brief]
 
 ## When You Might Skip These
 
-[Honest qualification: which recommendations are genuinely optional and why. A minimal setup can be the right choice.]
+[Honest qualification: which recommendations are genuinely optional and why. A minimal setup can be the right choice. Mention any findings whose `relevanceContext` is `affects-this-machine-only` so the user knows the change won't propagate to teammates.]
 ```
 
+In `--raw` mode (humanizer fields absent), fall back to grouping by raw category tier (t1/t2/t3/t4) and render scanner-emitted titles verbatim — flag in the report header that output is unhumanized.
+
 ## Guidelines
 
 - Frame everything as opportunities, never as failures or gaps
diff --git a/plugins/config-audit/agents/planner-agent.md b/plugins/config-audit/agents/planner-agent.md
index 9b7774d..41fa4a2 100644
--- a/plugins/config-audit/agents/planner-agent.md
+++ b/plugins/config-audit/agents/planner-agent.md
@@ -25,15 +25,26 @@ You will receive:
 1. Session ID
 2. Analysis report: `~/.claude/config-audit/sessions/{session-id}/analysis-report.md`
 3. Interview results: `~/.claude/config-audit/sessions/{session-id}/interview.md` (optional)
+4. Mode flag — `$RAW_FLAG`. When empty (default), the analysis report uses humanized vocabulary: each finding has been grouped by `userImpactCategory` and led with `userActionLanguage`. When `--raw`, the report is v5.0.0 verbatim severity prefiks.
+
+## Humanizer-aware planning rules
+
+- **Consume humanized fields from the analysis report.** The analyzer-agent has already grouped findings by `userImpactCategory` ("Configuration mistake", "Conflict", "Wasted tokens", "Missed opportunity", "Dead config") and led each line with `userActionLanguage` ("Fix this now", "Fix soon", "Fix when convenient", "Optional cleanup", "FYI"). Carry that vocabulary forward into the action plan — do not re-derive severity-to-prose mappings.
+- **Render finding titles and recommendations verbatim** as they appear in the analysis report. The humanizer owns the plain-language vocabulary; rephrasing introduces drift between report and plan.
+- **Order actions by `userActionLanguage` urgency**, not by raw severity. "Fix this now" + "Fix soon" precede "Fix when convenient" precede "Optional cleanup" precede "FYI".
+- **Surface `relevanceContext`** when an action only affects the user's machine or only touches test fixtures — these warrant different escalation paths.
+- **Do not perform translation duties in the action plan.** No "what this means in plain English" sections. The humanizer handles that upstream; if a finding's prose still reads like jargon, that's a data gap to flag, not a translation to invent.
+
+In `--raw` mode, the analysis report is v5.0.0 verbatim — fall back to severity-based prioritization and surface raw scanner titles. Flag in the plan header that the plan was generated from unhumanized analysis.
 
 ## Task
 
-1. **Load inputs**: Read analysis and interview (if exists)
-2. **Generate actions**: Create action items for each finding
+1. **Load inputs**: Use the Read tool on the analysis report and interview (if exists)
+2. **Generate actions**: Create action items for each finding, preserving humanized titles
 3. **Assess risk**: Evaluate risk level per action
-4. **Order by dependencies**: Ensure correct execution order
+4. **Order by dependencies AND `userActionLanguage`**: dependency-correct AND urgency-correct
 5. **Create rollback plans**: Define how to undo each action
-6. **Write action plan**: Output comprehensive plan
+6. **Write action plan**: Output comprehensive plan grouped by `userImpactCategory`
 
 ## Action Categories
 
diff --git a/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs b/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs
new file mode 100644
index 0000000..374f46f
--- /dev/null
+++ b/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs
@@ -0,0 +1,82 @@
+/**
+ * Wave 5 Step 16 — Agent system-prompt shape tests.
+ *
+ * Verifies that the 3 agent prompt files have the correct structural shape
+ * after the humanizer integration:
+ *
+ *   - Each file references at least one of the humanized field names by
+ *     name: `userImpactCategory`, `userActionLanguage`, `relevanceContext`.
+ *
+ *   - Each file does NOT contain a "explain what X means" subroutine —
+ *     those translation duties are owned by the humanizer now.
+ *
+ *   - Each file preserves its required frontmatter (name, description,
+ *     model, color, tools).
+ */
+
+import { test } from 'node:test';
+import { strict as assert } from 'node:assert';
+import { readFile } from 'node:fs/promises';
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const AGENTS_DIR = resolve(__dirname, '..', '..', 'agents');
+
+const AGENT_FILES = [
+  'analyzer-agent.md',
+  'planner-agent.md',
+  'feature-gap-agent.md',
+];
+
+const HUMANIZED_FIELD_REGEX = /userImpactCategory|userActionLanguage|relevanceContext/;
+const JARGON_TRANSLATION_INSTRUCTION_REGEX = /explain\s+what\s+\{[^}]+\}\s+means|translate\s+jargon|jargon[- ]translation\s+duty/i;
+const FRONTMATTER_REGEX = /^---\s*\nname:\s+\S+/m;
+
+async function readAgent(name) {
+  return await readFile(resolve(AGENTS_DIR, name), 'utf-8');
+}
+
+test('Agent prompts: every file references at least one humanized field', async () => {
+  for (const name of AGENT_FILES) {
+    const content = await readAgent(name);
+    assert.match(
+      content,
+      HUMANIZED_FIELD_REGEX,
+      `${name} must reference userImpactCategory, userActionLanguage, or relevanceContext`,
+    );
+  }
+});
+
+test('Agent prompts: no jargon-translation subroutines', async () => {
+  for (const name of AGENT_FILES) {
+    const content = await readAgent(name);
+    assert.doesNotMatch(
+      content,
+      JARGON_TRANSLATION_INSTRUCTION_REGEX,
+      `${name} must not contain "explain what {jargon} means" / "translate jargon" instructions — humanizer owns translation`,
+    );
+  }
+});
+
+test('Agent prompts: frontmatter preserved (name field present)', async () => {
+  for (const name of AGENT_FILES) {
+    const content = await readAgent(name);
+    assert.match(content, FRONTMATTER_REGEX, `${name} missing required frontmatter`);
+  }
+});
+
+test('analyzer-agent.md: instructs grouping by userImpactCategory', async () => {
+  const content = await readAgent('analyzer-agent.md');
+  assert.match(content, /group.*by\s+`?userImpactCategory`?/i, 'analyzer-agent must group findings by userImpactCategory');
+});
+
+test('planner-agent.md: instructs ordering by userActionLanguage', async () => {
+  const content = await readAgent('planner-agent.md');
+  assert.match(content, /order.*by\s+(dependencies\s+and\s+)?`?userActionLanguage`?|userActionLanguage\s+urgency/i, 'planner-agent must order actions by userActionLanguage');
+});
+
+test('feature-gap-agent.md: skips test-fixture-no-impact findings', async () => {
+  const content = await readAgent('feature-gap-agent.md');
+  assert.match(content, /test-fixture-no-impact/, 'feature-gap-agent must reference the test-fixture-no-impact relevanceContext');
+});