From ec4ac3e6d1f3cf6ff2e5bbb5bd9239724b027369 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 19:53:59 +0200 Subject: [PATCH] feat(humanizer): update agent system prompts [skip-docs] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 5 Step 16 — final wave step. Threads humanizer-aware rendering rules through the three agent prompts that produce user-facing output, and adds a shape test that locks the structure. - agents/analyzer-agent.md: documents the humanizer envelope shape (userImpactCategory, userActionLanguage, relevanceContext) in the Input section; new "Humanizer-aware rendering rules" subsection instructs the agent to: render humanized title/description/ recommendation verbatim, group findings by userImpactCategory, lead each line with userActionLanguage, surface relevanceContext when not affects-everyone, and skip jargon-translation subroutines. --raw fallback documented (v5.0.0 verbatim severity prefiks). - agents/planner-agent.md: documents the same vocabulary; instructs the planner to consume humanized fields from the analysis report, preserve titles verbatim, and order actions by both dependencies AND userActionLanguage urgency. Translation duties explicitly removed from the plan. - agents/feature-gap-agent.md: replaces the inline t1/t2/t3/t4 tier-to-prose section ladder with userActionLanguage-driven groupings ("Fix soon" → High Impact, "Fix when convenient" → Worth Considering, "Optional cleanup"/"FYI" → Explore When Ready); instructs skipping findings whose relevanceContext is test-fixture-no-impact; --raw fallback documented. tests/agents/agent-prompt-shape.test.mjs (new, +6 tests, 786 → 792): - structural: humanized field reference + frontmatter preserved - per-agent anchors: analyzer groups by userImpactCategory; planner orders by userActionLanguage; feature-gap references test-fixture-no-impact - global: no "explain what {jargon} means" / "translate jargon" / "jargon-translation duty" prose anywhere Self-audit: Grade A unchanged (config 97/100, plugin 100/100). --- plugins/config-audit/agents/analyzer-agent.md | 19 ++++- .../config-audit/agents/feature-gap-agent.md | 43 +++++----- plugins/config-audit/agents/planner-agent.md | 19 ++++- .../tests/agents/agent-prompt-shape.test.mjs | 82 +++++++++++++++++++ 4 files changed, 136 insertions(+), 27 deletions(-) create mode 100644 plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs diff --git a/plugins/config-audit/agents/analyzer-agent.md b/plugins/config-audit/agents/analyzer-agent.md index f2478cc..7018314 100644 --- a/plugins/config-audit/agents/analyzer-agent.md +++ b/plugins/config-audit/agents/analyzer-agent.md @@ -27,12 +27,23 @@ Analyze all discovered configuration files to: You will receive: 1. Session ID with findings in `~/.claude/config-audit/sessions/{session-id}/findings/` 2. Scope configuration from `~/.claude/config-audit/sessions/{session-id}/scope.yaml` -3. Scanner JSON envelope (if available) from scan-orchestrator.mjs -4. Knowledge base at `{CLAUDE_PLUGIN_ROOT}/knowledge/` for best practices and anti-patterns +3. Scanner JSON envelope (if available) from scan-orchestrator.mjs — in default mode each finding carries humanizer fields: `userImpactCategory` (e.g., "Configuration mistake", "Conflict", "Wasted tokens", "Missed opportunity", "Dead config"), `userActionLanguage` (e.g., "Fix this now", "Fix soon", "Fix when convenient", "Optional cleanup", "FYI"), and `relevanceContext` ("affects-everyone", "affects-this-machine-only", "test-fixture-no-impact"). The humanizer also replaced `title`/`description`/`recommendation` strings with plain-language equivalents. +4. Mode flag — when `$RAW_FLAG` is `--raw`, the envelope is v5.0.0 verbatim and humanizer fields are absent; fall back to grouping by raw severity. +5. Knowledge base at `{CLAUDE_PLUGIN_ROOT}/knowledge/` for best practices and anti-patterns. + +## Humanizer-aware rendering rules + +- **Render the humanizer's `title`/`description`/`recommendation` verbatim.** Do not paraphrase. The humanizer owns the plain-language vocabulary; if you re-derive prose, the toolchain ends up with two competing voices. +- **Group findings by `userImpactCategory`.** This replaces severity-bucket grouping in the report. The categories are pre-translated — do not invent your own bucket names. +- **Lead each finding line with `userActionLanguage`.** This replaces raw severity prefiks ("critical", "high", "medium") in the report. Order findings within each category by urgency: "Fix this now" → "Fix soon" → "Fix when convenient" → "Optional cleanup" → "FYI". +- **Surface `relevanceContext` when it isn't `affects-everyone`.** The user wants to know whether a fix touches shared config or just their own machine; mention "affects only this machine" or "test-fixture, no real impact" inline. +- **Do not include "explain what X means" subroutines.** Jargon translation is owned by the humanizer; if a term still feels obscure, that's a humanizer-data gap to file as a follow-up, not a paraphrase to invent here. + +In `--raw` mode, fall back to v5.0.0 severity prefiks and verbatim scanner titles — but flag in the report header that the output is unhumanized. ## Task -1. **Load all findings**: Read all `*.yaml` files from findings directory +1. **Load all findings**: Use the Read tool on all `*.yaml` files from findings directory 1.5. **Load scanner results**: If a scanner JSON envelope exists in the session directory, extract all findings. Cross-reference against `knowledge/anti-patterns.md` to add remediation context. Note any CA-{prefix}-NNN finding IDs in the report. 2. **Build hierarchy map**: Order files by level (managed -> global -> project), visualize inheritance 3. **Detect conflicts**: Compare settings across hierarchy levels, note which level wins @@ -40,7 +51,7 @@ You will receive: 5. **Identify optimizations**: Rules to globalize, missing configs, orphaned files 6. **Security scan**: Aggregate secret warnings, check for insecure patterns 7. **CLAUDE.md quality assessment**: Score each file against rubric, assign letter grades -8. **Generate report**: Write comprehensive markdown report +8. **Generate report**: Write comprehensive markdown report — group findings by `userImpactCategory`, lead with `userActionLanguage` ## Output diff --git a/plugins/config-audit/agents/feature-gap-agent.md b/plugins/config-audit/agents/feature-gap-agent.md index cedcee4..31f690b 100644 --- a/plugins/config-audit/agents/feature-gap-agent.md +++ b/plugins/config-audit/agents/feature-gap-agent.md @@ -19,10 +19,17 @@ You receive posture assessment data (JSON) containing: - `areas` — per-scanner grades (10 quality areas incl. Token Efficiency, Plugin Hygiene, + Feature Coverage) - `overallGrade` — health grade (quality areas only) - `opportunityCount` — number of unused features detected -- `scannerEnvelope` — full scanner results including GAP findings +- `scannerEnvelope` — full scanner results. In default mode each GAP finding carries humanizer fields: `userImpactCategory` ("Missed opportunity"), `userActionLanguage` ("Fix soon", "Fix when convenient", "Optional cleanup", "FYI"), and `relevanceContext`. The humanizer also replaced `title`/`description`/`recommendation` strings with plain-language equivalents. You also receive project context: language, file count, existing configuration. +## Humanizer-aware rendering rules + +- **Render the humanizer's `title`/`description`/`recommendation` verbatim.** Do not paraphrase. The humanizer owns the plain-language vocabulary. +- **Drive prioritization with `userActionLanguage`, not raw category tiers.** "Fix soon" → "Fix when convenient" → "Optional cleanup" → "FYI" replaces the t1/t2/t3/t4 tier ladder for output ordering. +- **Skip findings with `relevanceContext === "test-fixture-no-impact"`** unless the user explicitly asked to include fixtures. +- **Do not include "explain what X means" subroutines.** The category labels ("Missed opportunity") are pre-translated. + ## Knowledge Files Read **at most 3** of these files from the plugin's `knowledge/` directory: @@ -36,6 +43,8 @@ Write `feature-gap-report.md` to the session directory. Max 200 lines. ### Report Structure +Group findings by `userActionLanguage` rather than by raw category tier. Render the humanizer's `title` and `recommendation` verbatim — the humanizer has already produced plain-language equivalents. + ```markdown # Feature Opportunities @@ -47,38 +56,34 @@ Write `feature-gap-report.md` to the session directory. Max 200 lines. ## High Impact -These address correctness or security — consider them seriously. +[Findings where userActionLanguage is "Fix soon" — these address correctness or security; consider them seriously.] -→ **[feature name]** - Why: [evidence-backed reason, cite Anthropic docs or proven issues] - How: [2-3 concrete steps] - -[Repeat for each T1 finding] +→ **[humanized title verbatim]** + Why: [humanized description verbatim, plus "relevant because your project has X" context] + How: [humanized recommendation verbatim, broken into 2-3 concrete steps from gap-closure-templates.md] ## Worth Considering -These improve workflow efficiency for projects like yours. +[Findings where userActionLanguage is "Fix when convenient" — these improve workflow efficiency for projects like yours.] -→ **[feature name]** - Why: [reason, with "relevant because your project has X"] - How: [2-3 concrete steps] - -[Repeat for each T2 finding] +→ **[humanized title verbatim]** + Why: [humanized description verbatim, plus relevance context] + How: [humanized recommendation verbatim, broken into 2-3 concrete steps] ## Explore When Ready -Nice-to-have features. Skip these if your current setup works well. +[Findings where userActionLanguage is "Optional cleanup" or "FYI" — nice-to-have, skip if current setup works well.] -→ **[feature name]** - Why: [brief reason] - -[Repeat for T3/T4 findings, keep brief] +→ **[humanized title verbatim]** + Why: [humanized description verbatim, brief] ## When You Might Skip These -[Honest qualification: which recommendations are genuinely optional and why. A minimal setup can be the right choice.] +[Honest qualification: which recommendations are genuinely optional and why. A minimal setup can be the right choice. Mention any findings whose `relevanceContext` is `affects-this-machine-only` so the user knows the change won't propagate to teammates.] ``` +In `--raw` mode (humanizer fields absent), fall back to grouping by raw category tier (t1/t2/t3/t4) and render scanner-emitted titles verbatim — flag in the report header that output is unhumanized. + ## Guidelines - Frame everything as opportunities, never as failures or gaps diff --git a/plugins/config-audit/agents/planner-agent.md b/plugins/config-audit/agents/planner-agent.md index 9b7774d..41fa4a2 100644 --- a/plugins/config-audit/agents/planner-agent.md +++ b/plugins/config-audit/agents/planner-agent.md @@ -25,15 +25,26 @@ You will receive: 1. Session ID 2. Analysis report: `~/.claude/config-audit/sessions/{session-id}/analysis-report.md` 3. Interview results: `~/.claude/config-audit/sessions/{session-id}/interview.md` (optional) +4. Mode flag — `$RAW_FLAG`. When empty (default), the analysis report uses humanized vocabulary: each finding has been grouped by `userImpactCategory` and led with `userActionLanguage`. When `--raw`, the report is v5.0.0 verbatim severity prefiks. + +## Humanizer-aware planning rules + +- **Consume humanized fields from the analysis report.** The analyzer-agent has already grouped findings by `userImpactCategory` ("Configuration mistake", "Conflict", "Wasted tokens", "Missed opportunity", "Dead config") and led each line with `userActionLanguage` ("Fix this now", "Fix soon", "Fix when convenient", "Optional cleanup", "FYI"). Carry that vocabulary forward into the action plan — do not re-derive severity-to-prose mappings. +- **Render finding titles and recommendations verbatim** as they appear in the analysis report. The humanizer owns the plain-language vocabulary; rephrasing introduces drift between report and plan. +- **Order actions by `userActionLanguage` urgency**, not by raw severity. "Fix this now" + "Fix soon" precede "Fix when convenient" precede "Optional cleanup" precede "FYI". +- **Surface `relevanceContext`** when an action only affects the user's machine or only touches test fixtures — these warrant different escalation paths. +- **Do not perform translation duties in the action plan.** No "what this means in plain English" sections. The humanizer handles that upstream; if a finding's prose still reads like jargon, that's a data gap to flag, not a translation to invent. + +In `--raw` mode, the analysis report is v5.0.0 verbatim — fall back to severity-based prioritization and surface raw scanner titles. Flag in the plan header that the plan was generated from unhumanized analysis. ## Task -1. **Load inputs**: Read analysis and interview (if exists) -2. **Generate actions**: Create action items for each finding +1. **Load inputs**: Use the Read tool on the analysis report and interview (if exists) +2. **Generate actions**: Create action items for each finding, preserving humanized titles 3. **Assess risk**: Evaluate risk level per action -4. **Order by dependencies**: Ensure correct execution order +4. **Order by dependencies AND `userActionLanguage`**: dependency-correct AND urgency-correct 5. **Create rollback plans**: Define how to undo each action -6. **Write action plan**: Output comprehensive plan +6. **Write action plan**: Output comprehensive plan grouped by `userImpactCategory` ## Action Categories diff --git a/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs b/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs new file mode 100644 index 0000000..374f46f --- /dev/null +++ b/plugins/config-audit/tests/agents/agent-prompt-shape.test.mjs @@ -0,0 +1,82 @@ +/** + * Wave 5 Step 16 — Agent system-prompt shape tests. + * + * Verifies that the 3 agent prompt files have the correct structural shape + * after the humanizer integration: + * + * - Each file references at least one of the humanized field names by + * name: `userImpactCategory`, `userActionLanguage`, `relevanceContext`. + * + * - Each file does NOT contain a "explain what X means" subroutine — + * those translation duties are owned by the humanizer now. + * + * - Each file preserves its required frontmatter (name, description, + * model, color, tools). + */ + +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { readFile } from 'node:fs/promises'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const AGENTS_DIR = resolve(__dirname, '..', '..', 'agents'); + +const AGENT_FILES = [ + 'analyzer-agent.md', + 'planner-agent.md', + 'feature-gap-agent.md', +]; + +const HUMANIZED_FIELD_REGEX = /userImpactCategory|userActionLanguage|relevanceContext/; +const JARGON_TRANSLATION_INSTRUCTION_REGEX = /explain\s+what\s+\{[^}]+\}\s+means|translate\s+jargon|jargon[- ]translation\s+duty/i; +const FRONTMATTER_REGEX = /^---\s*\nname:\s+\S+/m; + +async function readAgent(name) { + return await readFile(resolve(AGENTS_DIR, name), 'utf-8'); +} + +test('Agent prompts: every file references at least one humanized field', async () => { + for (const name of AGENT_FILES) { + const content = await readAgent(name); + assert.match( + content, + HUMANIZED_FIELD_REGEX, + `${name} must reference userImpactCategory, userActionLanguage, or relevanceContext`, + ); + } +}); + +test('Agent prompts: no jargon-translation subroutines', async () => { + for (const name of AGENT_FILES) { + const content = await readAgent(name); + assert.doesNotMatch( + content, + JARGON_TRANSLATION_INSTRUCTION_REGEX, + `${name} must not contain "explain what {jargon} means" / "translate jargon" instructions — humanizer owns translation`, + ); + } +}); + +test('Agent prompts: frontmatter preserved (name field present)', async () => { + for (const name of AGENT_FILES) { + const content = await readAgent(name); + assert.match(content, FRONTMATTER_REGEX, `${name} missing required frontmatter`); + } +}); + +test('analyzer-agent.md: instructs grouping by userImpactCategory', async () => { + const content = await readAgent('analyzer-agent.md'); + assert.match(content, /group.*by\s+`?userImpactCategory`?/i, 'analyzer-agent must group findings by userImpactCategory'); +}); + +test('planner-agent.md: instructs ordering by userActionLanguage', async () => { + const content = await readAgent('planner-agent.md'); + assert.match(content, /order.*by\s+(dependencies\s+and\s+)?`?userActionLanguage`?|userActionLanguage\s+urgency/i, 'planner-agent must order actions by userActionLanguage'); +}); + +test('feature-gap-agent.md: skips test-fixture-no-impact findings', async () => { + const content = await readAgent('feature-gap-agent.md'); + assert.match(content, /test-fixture-no-impact/, 'feature-gap-agent must reference the test-fixture-no-impact relevanceContext'); +});