From 79b6e29073cef861cc2dbae3e8a8cbd058bc8441 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Fri, 1 May 2026 19:41:08 +0200 Subject: [PATCH] feat(humanizer): update audit/analysis command templates group A [skip-docs] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 5 Step 13. Threads the humanizer vocabulary through five audit/ analysis command templates and adds a shape test that locks the structure in place. - commands/posture.md, tokens.md, feature-gap.md (findings-renderers): reference userImpactCategory/userActionLanguage/relevanceContext; remove hardcoded A/B/C/D/F-to-prose tables (humanizer owns the grade-context vocabulary now via the stderr scorecard headline). - commands/manifest.md, whats-active.md (inventory CLIs): add --raw pass-through for CLI-surface consistency. --raw is a no-op in these CLIs, but the flag is threaded through so users get uniform behaviour. - All five files: --raw flag parsed from $ARGUMENTS and passed verbatim to the underlying scanner CLI when present. tests/commands/group-a-shape.test.mjs (new, +5 tests, 767 → 772): - structural: every file has a bash invocation block, Read tool reference, and --raw/$ARGUMENTS plumbing - findings-renderers only: at least one humanized field referenced; no hardcoded "[grade] grade is..." prose tables --- plugins/config-audit/commands/feature-gap.md | 64 ++++++------ plugins/config-audit/commands/manifest.md | 5 +- plugins/config-audit/commands/posture.md | 45 ++++----- plugins/config-audit/commands/tokens.md | 20 ++-- plugins/config-audit/commands/whats-active.md | 5 +- .../tests/commands/group-a-shape.test.mjs | 97 +++++++++++++++++++ 6 files changed, 174 insertions(+), 62 deletions(-) create mode 100644 plugins/config-audit/tests/commands/group-a-shape.test.mjs diff --git a/plugins/config-audit/commands/feature-gap.md b/plugins/config-audit/commands/feature-gap.md index 5c5427e..22dcc7f 100644 --- a/plugins/config-audit/commands/feature-gap.md +++ b/plugins/config-audit/commands/feature-gap.md @@ -20,9 +20,11 @@ Context-aware analysis of Claude Code features that could benefit your specific ## Implementation -### Step 1: Determine target and greet +### Step 1: Determine target and flags -Parse `$ARGUMENTS` for a path (default: current working directory). +Split `$ARGUMENTS` into a path and flags. Path is the first non-flag argument (default: current working directory). Recognized flags: + +- `--raw` — pass-through to the scanner; produces v5.0.0 verbatim envelope (bypasses the humanizer). When `--raw` is set, render with v5.0.0 finding-field shape only — humanizer fields are absent in raw output. Tell the user: @@ -38,7 +40,9 @@ Generate session ID (`YYYYMMDD_HHmmss`) if no active session exists. ```bash mkdir -p ~/.claude/config-audit/sessions/{session-id}/findings 2>/dev/null -node ${CLAUDE_PLUGIN_ROOT}/scanners/posture.mjs --json --output-file ~/.claude/config-audit/sessions/{session-id}/posture.json 2>/dev/null; echo $? +RAW_FLAG="" +if echo "$ARGUMENTS" | grep -q -- "--raw"; then RAW_FLAG="--raw"; fi +node ${CLAUDE_PLUGIN_ROOT}/scanners/posture.mjs --output-file ~/.claude/config-audit/sessions/{session-id}/posture.json $RAW_FLAG 2>/dev/null; echo $? ``` If exit code is non-zero: "Assessment couldn't run. Check that the path exists and contains configuration files." @@ -59,49 +63,51 @@ ls /*.py /requirements.txt /pyproject.tom Read `${CLAUDE_PLUGIN_ROOT}/knowledge/gap-closure-templates.md` for implementation templates. -Group GAP findings into three sections. Number them sequentially across sections: +Group GAP findings by their humanized fields rather than re-deriving tier-to-prose mappings. In default mode (no `--raw`) each finding carries: + +- `userImpactCategory` (e.g., "Missed opportunity") — the impact bucket +- `userActionLanguage` (e.g., "Fix soon", "Fix when convenient", "Optional cleanup", "FYI") — the urgency phrasing the rest of the toolchain uses +- `relevanceContext` ("affects-everyone" / "affects-this-machine-only" / "test-fixture-no-impact") — the scope so the user knows whether the change touches shared config or just their own machine + +Group findings into three sections by `userActionLanguage`: "Fix this now" + "Fix soon" → **High Impact**, "Fix when convenient" → **Worth Considering**, "Optional cleanup" + "FYI" → **Explore When Ready**. Number sequentially across sections. Skip findings whose `relevanceContext === "test-fixture-no-impact"` unless the user explicitly asked to include fixtures. + +The humanizer has already replaced jargon-heavy strings with plain-language equivalents in `title`, `description`, and `recommendation` — render those verbatim. Do not paraphrase. Do not introduce inline tier-to-prose tables ("Tier 1 means…"); the categories are pre-translated. + +If `--raw` was passed, the v5.0.0 envelope is in effect — humanizer fields are absent. Fall back to grouping by `category` ("t1"/"t2"/"t3"/"t4") and render `title` + `recommendation` directly. + +Render shape (default mode): ```markdown ### High Impact -These address correctness or safety — consider them seriously. +{For each finding where userActionLanguage is "Fix this now" or "Fix soon":} -**1.** Add permissions.deny for sensitive paths - → Settings enforcement is stronger than CLAUDE.md instructions. - → Effort: Low (5 min) - -**2.** Configure at least one hook for safety automation - → Hooks guarantee the action happens. CLAUDE.md instructions are advisory. - → Effort: Medium (15 min) +**{N}.** {title} + → {description} + → {recommendation} + → Effort: {from gap-closure-templates.md} ### Worth Considering -These improve workflow efficiency for projects like yours. +{For each finding where userActionLanguage is "Fix when convenient":} -**3.** Split CLAUDE.md into focused modules with @imports - → Files over 200 lines degrade Claude's adherence to instructions. - → Effort: Low (10 min) - -**4.** Add path-scoped rules for different file types - → Unscoped rules load every session regardless of relevance. - → Effort: Low (10 min) +**{N}.** {title} + → {description} + → {recommendation} ### Explore When Ready -Nice-to-have. Skip if your current setup works well. +{For each finding where userActionLanguage is "Optional cleanup" or "FYI":} -**5.** Custom keybindings (Shift+Enter for newline) - → Effort: Low (2 min) - -**6.** Status line configuration - → Effort: Low (2 min) +**{N}.** {title} + → {recommendation} ``` Each recommendation MUST have: - A number -- A one-line description -- A "Why" with evidence -- An effort estimate from the templates +- The humanizer-provided `title` +- The humanizer-provided `description` (where shown) +- An effort estimate looked up from the templates ### Step 5: Ask what to implement diff --git a/plugins/config-audit/commands/manifest.md b/plugins/config-audit/commands/manifest.md index 072438a..4b77cda 100644 --- a/plugins/config-audit/commands/manifest.md +++ b/plugins/config-audit/commands/manifest.md @@ -24,6 +24,7 @@ Produce a ranked, single-table view of every token source loaded for a given rep First non-flag argument is the path (default `.`). Recognized flags: - `--json` — emit raw JSON instead of the rendered table. +- `--raw` — pass-through to the scanner; accepted for CLI surface consistency with the other config-audit commands. The manifest CLI is data-table only (no findings prose), so `--raw` is a no-op here, but the flag is still threaded through so users get uniform behaviour across `--raw`. ### Step 2: Run the CLI silently @@ -31,7 +32,9 @@ Tell the user: **"Building token-source manifest for ``..."** ```bash TMPFILE="/tmp/ca-manifest-$$.json" -node ${CLAUDE_PLUGIN_ROOT}/scanners/manifest.mjs --output-file "$TMPFILE" 2>/dev/null; echo $? +RAW_FLAG="" +if echo "$ARGUMENTS" | grep -q -- "--raw"; then RAW_FLAG="--raw"; fi +node ${CLAUDE_PLUGIN_ROOT}/scanners/manifest.mjs --output-file "$TMPFILE" $RAW_FLAG 2>/dev/null; echo $? ``` **Exit code handling:** diff --git a/plugins/config-audit/commands/posture.md b/plugins/config-audit/commands/posture.md index f058617..364869b 100644 --- a/plugins/config-audit/commands/posture.md +++ b/plugins/config-audit/commands/posture.md @@ -19,9 +19,13 @@ Quick, deterministic configuration health scorecard. No agents needed — runs a ## Implementation -### Step 1: Determine target +### Step 1: Determine target and flags -Parse `$ARGUMENTS` for a path (default: current working directory). Resolve relative paths. +Split `$ARGUMENTS` into a path and flags. Path is the first non-flag argument (default: current working directory). Resolve relative paths. Recognized flags: + +- `--raw` — pass-through to the scanner; produces v5.0.0 verbatim output (bypasses the humanizer). Power-user mode for byte-stable diffs and machine consumption. +- `--drift` — append a "Configuration Drift" section (see Step 5). +- `--plugin-health` — append a "Plugin Health" section (see Step 5). Tell the user: @@ -33,32 +37,34 @@ Running quick assessment{if path != cwd: " on `{path}`"}... ### Step 2: Run posture scanner -Run silently — all output goes to a file: +Run silently — JSON goes to a file, the humanized scorecard prints to stderr (default mode). The humanized stderr scorecard already includes the grade headline and area-score lines in plain language, so render those directly rather than re-deriving prose tables. ```bash -node ${CLAUDE_PLUGIN_ROOT}/scanners/posture.mjs --json --output-file /tmp/config-audit-posture-$$.json 2>/dev/null; echo $? +RAW_FLAG="" +if echo "$ARGUMENTS" | grep -q -- "--raw"; then RAW_FLAG="--raw"; fi +node ${CLAUDE_PLUGIN_ROOT}/scanners/posture.mjs --output-file /tmp/config-audit-posture-$$.json $RAW_FLAG 2>/tmp/config-audit-posture-stderr-$$.txt; echo $? ``` If exit code is non-zero, tell the user: "Assessment couldn't complete. Check that the path exists and contains Claude Code configuration files." +If `--raw` was passed, treat the captured stderr as v5.0.0-shape verbatim text and present it as-is in a code block; skip the humanized rendering steps below. + ### Step 3: Read and interpret results Read the JSON output file using the Read tool. Extract: - `overallGrade`, `opportunityCount` - `areas[]` — each with `name`, `grade`, `score`, `findingCount` +- `scannerEnvelope.scanners[].findings[]` — when surfacing individual findings, prefer the humanizer-provided fields: `userImpactCategory` (e.g., "Configuration mistake", "Wasted tokens"), `userActionLanguage` (e.g., "Fix this now", "Fix soon", "Optional cleanup"), and `relevanceContext` ("affects-everyone", "affects-this-machine-only", "test-fixture-no-impact"). These let you group and prioritize without hardcoded severity-to-prose mappings. + +Also Read the captured stderr file — its body is the humanized scorecard (grade headline, area-score block, opportunity hint). You can present it verbatim or interleave its lines with the JSON-driven table. ### Step 4: Present the scorecard ```markdown **Health: {overallGrade}** | {qualityAreaCount} areas scanned -{grade-based context — pick ONE:} -- A: "Your configuration is correct and well-maintained." -- B: "Solid configuration with minor improvements available." -- C: "Working configuration with some issues worth addressing." -- D: "Configuration needs attention in several areas." -- F: "Significant issues found — addressing these will improve your experience." +{Use the headline line from the humanized stderr scorecard — it carries grade-context prose already (e.g., " Health: A (97/100) — Healthy setup, only minor polish needed"). Do not re-derive an A/B/C/D prose table here; the humanizer owns that vocabulary.} ### Area Scores @@ -73,22 +79,13 @@ Read the JSON output file using the Read tool. Extract: ### What's next ``` -**Grade A or B:** -``` -Your configuration health is strong. Re-run after major changes to catch regressions. -For feature recommendations: `/config-audit feature-gap` -``` +Group "what's next" suggestions by `userActionLanguage` from the humanized findings: -**Grade C:** -``` -Run `/config-audit fix` to auto-fix what's possible, then `/config-audit plan` for a prioritized improvement path. -``` +- Findings tagged "Fix this now" / "Fix soon" → suggest `/config-audit fix` first, then `/config-audit plan`. +- Findings tagged "Fix when convenient" / "Optional cleanup" → suggest `/config-audit feature-gap` and routine maintenance. +- No high-urgency findings → suggest `/config-audit feature-gap` for opportunities and re-running posture after major config changes. -**Grade D or F:** -``` -Start with `/config-audit fix` — it handles the most impactful issues automatically with backup and rollback. -Then run `/config-audit plan` for a step-by-step path to a better configuration. -``` +Avoid hardcoded grade-to-prose ladders here — the humanized scorecard headline already supplies grade context, and `userActionLanguage` supplies finding-level urgency. ### Step 5: Optional sections diff --git a/plugins/config-audit/commands/tokens.md b/plugins/config-audit/commands/tokens.md index 54df519..e7c10a1 100644 --- a/plugins/config-audit/commands/tokens.md +++ b/plugins/config-audit/commands/tokens.md @@ -28,16 +28,21 @@ Complementary to `/config-audit whats-active`: Split `$ARGUMENTS` into a path and flags. Path is the first non-flag argument. Default to `.` (current working directory). Recognized flags: - `--global` — also include the user-level `~/.claude/` cascade -- `--json` — emit raw JSON instead of rendered tables (power-user mode) +- `--json` — emit raw JSON instead of rendered tables (power-user mode; bypasses the humanizer for byte-stable v5.0.0 output) +- `--raw` — pass-through to the scanner; produces v5.0.0 verbatim JSON (bypasses the humanizer). Use when piping into v5.0.0-baseline diff tooling. - `--with-telemetry-recipe` — include `telemetry_recipe_path` in the JSON output, pointing to `knowledge/cache-telemetry-recipe.md`. Use this when you want to verify a structural fix actually improved cache hit rate (manual jq recipe, opt-in) ### Step 2: Run the CLI silently Tell the user: **"Analysing token hotspots for ``..."** +Default mode (no `--json`, no `--raw`) emits a humanized JSON envelope: each finding carries `userImpactCategory`, `userActionLanguage`, and `relevanceContext` in addition to the v5.0.0 fields. Pass `--raw` through verbatim if the user requested it. + ```bash TMPFILE="/tmp/config-audit-tokens-$$.json" -node ${CLAUDE_PLUGIN_ROOT}/scanners/token-hotspots-cli.mjs --output-file "$TMPFILE" [--global] 2>/dev/null; echo $? +RAW_FLAG="" +if echo "$ARGUMENTS" | grep -q -- "--raw"; then RAW_FLAG="--raw"; fi +node ${CLAUDE_PLUGIN_ROOT}/scanners/token-hotspots-cli.mjs --output-file "$TMPFILE" [--global] $RAW_FLAG 2>/dev/null; echo $? ``` **Exit code handling:** @@ -58,10 +63,10 @@ Use the Read tool on `$TMPFILE`. Extract: - `total_estimated_tokens` — top-line number - `hotspots[]` — top 10 ranked sources -- `findings[]` — Opus 4.7 pattern findings (CA-TOK-001..003) +- `findings[]` — Opus 4.7 pattern findings (CA-TOK-001..003); each finding in default mode carries humanizer fields (`userImpactCategory`, `userActionLanguage`, `relevanceContext`) alongside the v5.0.0 fields - `counts` — severity breakdown -Render as markdown: +Render as markdown. Group findings by `userImpactCategory` (e.g., "Wasted tokens" vs "Configuration mistake") rather than re-deriving severity prose; lead each line with `userActionLanguage` ("Fix this now", "Fix soon", "Optional cleanup", etc.) so the urgency phrasing stays consistent with the rest of the toolchain. The humanizer already replaced jargon-heavy `title`/`description`/`recommendation` strings with plain-language equivalents — render them verbatim. ```markdown **Token hotspots for ``** — ~{total_estimated_tokens} estimated tokens loaded per turn @@ -72,13 +77,14 @@ Render as markdown: |------|--------|--------|-----------------| | {rank} | `{source}` | ~{estimated_tokens} | {recommendations joined as `· ` bullets} | -### Opus 4.7 pattern findings +### Findings, grouped by impact -{For each finding, render:} +{Group findings[] by their userImpactCategory. Within each group, sort by userActionLanguage urgency (Fix this now → Fix soon → Fix when convenient → Optional cleanup → FYI), then render:} -- **{id}** ({severity}) — {title} +- **{userActionLanguage}** — {title} ({id}) - {description} - **Fix:** {recommendation} + - _{relevanceContext}_ when not "affects-everyone" (mention the scope so the user knows whether a fix touches shared config or just their machine) ### Severity summary diff --git a/plugins/config-audit/commands/whats-active.md b/plugins/config-audit/commands/whats-active.md index fefcb19..8af6c6c 100644 --- a/plugins/config-audit/commands/whats-active.md +++ b/plugins/config-audit/commands/whats-active.md @@ -24,6 +24,7 @@ Show a complete, read-only inventory of everything Claude Code loads for a given Split `$ARGUMENTS` into a path and flags. Path is the first non-flag argument. Default to `.` (current working directory). Recognized flags: - `--json` — emit raw JSON instead of rendered tables (power-user mode) +- `--raw` — pass-through to the scanner; accepted for CLI surface consistency. `whats-active` is an inventory-only output (no findings prose), so `--raw` is a no-op here, but the flag is still threaded through for uniform behaviour across the toolchain. - `--verbose` — include per-file byte/line detail - `--suggest-disables` — append deterministic disable-candidates + LLM-judgment pass @@ -33,7 +34,9 @@ Tell the user: **"Reading active configuration for ``..."** ```bash TMPFILE="/tmp/ca-whats-active-$$.json" -node ${CLAUDE_PLUGIN_ROOT}/scanners/whats-active.mjs --output-file "$TMPFILE" [--verbose] [--suggest-disables] 2>/dev/null; echo $? +RAW_FLAG="" +if echo "$ARGUMENTS" | grep -q -- "--raw"; then RAW_FLAG="--raw"; fi +node ${CLAUDE_PLUGIN_ROOT}/scanners/whats-active.mjs --output-file "$TMPFILE" [--verbose] [--suggest-disables] $RAW_FLAG 2>/dev/null; echo $? ``` **Exit code handling:** diff --git a/plugins/config-audit/tests/commands/group-a-shape.test.mjs b/plugins/config-audit/tests/commands/group-a-shape.test.mjs new file mode 100644 index 0000000..a7f456f --- /dev/null +++ b/plugins/config-audit/tests/commands/group-a-shape.test.mjs @@ -0,0 +1,97 @@ +/** + * Wave 5 Step 13 — Group A command-template shape tests. + * + * Verifies that the 5 audit/analysis command templates have the correct + * structural shape after the humanizer integration: + * + * - All 5 files: contain a Bash invocation block, reference the Read tool, + * and contain the `--raw` flag (or the literal `"$ARGUMENTS"` string). + * + * - Findings-rendering files (posture.md, tokens.md, feature-gap.md): + * reference at least one of `userImpactCategory|userActionLanguage| + * relevanceContext`, and do NOT contain hardcoded grade-prose tables + * of the form `[ABCDF]\s+grade\s+is...`. + * + * - Inventory/data-only files (manifest.md, whats-active.md): structural + * checks only (Bash + Read + --raw pass-through). No humanized-field + * reference required because these CLIs emit data tables, not findings. + */ + +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { readFile } from 'node:fs/promises'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const COMMANDS_DIR = resolve(__dirname, '..', '..', 'commands'); + +const GROUP_A_FILES = [ + 'posture.md', + 'tokens.md', + 'manifest.md', + 'whats-active.md', + 'feature-gap.md', +]; + +const FINDINGS_RENDERING_FILES = [ + 'posture.md', + 'tokens.md', + 'feature-gap.md', +]; + +const HUMANIZED_FIELD_REGEX = /userImpactCategory|userActionLanguage|relevanceContext/; +const RAW_OR_ARGUMENTS_REGEX = /--raw|"\$ARGUMENTS"/; +const HARDCODED_GRADE_PROSE_REGEX = /[ABCDF]\s+grade\s+is/; +// A Bash invocation block in markdown is a fenced ``` block tagged with bash. +const BASH_BLOCK_REGEX = /```bash\b/; +// Read tool reference: either explicit "Read tool" prose or the frontmatter +// "allowed-tools" list mentioning Read. +const READ_TOOL_REGEX = /\bRead\s+tool\b|allowed-tools:.*\bRead\b/; + +async function readCommand(name) { + return await readFile(resolve(COMMANDS_DIR, name), 'utf-8'); +} + +test('Group A: every file contains a Bash invocation block', async () => { + for (const name of GROUP_A_FILES) { + const content = await readCommand(name); + assert.match(content, BASH_BLOCK_REGEX, `${name} missing bash block`); + } +}); + +test('Group A: every file references the Read tool', async () => { + for (const name of GROUP_A_FILES) { + const content = await readCommand(name); + assert.match(content, READ_TOOL_REGEX, `${name} missing Read tool reference`); + } +}); + +test('Group A: every file contains --raw or "$ARGUMENTS" (pass-through plumbing)', async () => { + for (const name of GROUP_A_FILES) { + const content = await readCommand(name); + assert.match(content, RAW_OR_ARGUMENTS_REGEX, `${name} missing --raw / $ARGUMENTS plumbing`); + } +}); + +test('Group A findings-renderers: reference at least one humanized field', async () => { + for (const name of FINDINGS_RENDERING_FILES) { + const content = await readCommand(name); + assert.match( + content, + HUMANIZED_FIELD_REGEX, + `${name} must reference userImpactCategory, userActionLanguage, or relevanceContext`, + ); + } +}); + +test('Group A findings-renderers: no hardcoded grade-prose tables', async () => { + for (const name of FINDINGS_RENDERING_FILES) { + const content = await readCommand(name); + assert.doesNotMatch( + content, + HARDCODED_GRADE_PROSE_REGEX, + `${name} contains a hardcoded "[grade] grade is..." prose table — humanizer owns grade vocabulary now`, + ); + } +});