From 7e2d9e151e163d6ad0b9fde410c2d11b99d732bb Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Thu, 30 Apr 2026 14:21:54 +0200 Subject: [PATCH] =?UTF-8?q?feat(ultraplan-local):=20M1=20=E2=80=94=20profi?= =?UTF-8?q?le=20recommendation=20flow=20in=20ultrabrief?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the profile recommendation step to /ultrabrief-local Phase 4. The brief stays universal (same questions, same template); the new step is purely a processing-decision layer that records which profile downstream commands should apply. What lands: - agents/profile-recommender.md — new sonnet agent that scores available profiles against the finalized brief (keyword + NFR-signal matching, axis bumps, hallucination gate that forbids inventing profile names). Emits a fenced JSON block with ranked entries. - templates/ultrabrief-template.md — frontmatter gains recommended_profile, profile_match, profile_rationale (default values applied when only `default` is available — true at M1). - commands/ultrabrief-local.md — Phase 4 gains Step 4h with explicit branches: short-circuit when only `default` exists; AskUserQuestion confirmation when top score ≥ 0.7; explicit fallback message when below threshold; manual selection sub-question on user override. Persists the three frontmatter fields to brief.md after user confirmation. JSON parser failure falls back to `default` with `profile_match: fallback` rather than blocking — silent fallback is the worst outcome, but a *visible* fallback is acceptable. - scripts/profile-loader.mjs — adds selectRecommendation(ranked, opts) + RECOMMENDATION_THRESHOLD=0.7 export. Single source of truth for the threshold logic so the command spec and the helper agree. - scripts/profile-loader.test.mjs — 10 new tests for selectRecommendation (default-only, empty/malformed input, above/below threshold, custom threshold, max-by-score, missing fields). Total now 36/36. - README.md / CLAUDE.md / marketplace landing — docs reflect M0 + M1 shipped, M2 + M3 still pending. In practice nothing changes for users at M1 because only `default` is available — Step 4h takes the short-circuit path and writes `profile_match: default-only`. M2 ships the additional profiles that make the recommender meaningful. Co-Authored-By: Claude Opus 4.7 --- README.md | 4 +- plugins/ultraplan-local/CLAUDE.md | 25 ++- plugins/ultraplan-local/README.md | 29 ++- .../agents/profile-recommender.md | 188 ++++++++++++++++++ .../commands/ultrabrief-local.md | 185 +++++++++++++++++ .../scripts/profile-loader.mjs | 92 +++++++++ .../scripts/profile-loader.test.mjs | 98 +++++++++ .../templates/ultrabrief-template.md | 3 + 8 files changed, 609 insertions(+), 15 deletions(-) create mode 100644 plugins/ultraplan-local/agents/profile-recommender.md diff --git a/README.md b/README.md index 55c2b87..ab6b91c 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,9 @@ M0 of the runtime-profile work (additive, no behaviour change) introduces `profi Phase 1 of `/ultraplan-local` now resolves a profile in the order `--profile flag → brief.recommended_profile → default fallback` and reports `Profile: {name} (source: ...)` in the mode banner. M0 ships only `default.yaml`, which mirrors the current hardcoded Phase 5/9 agent set verbatim — so existing flows are unaffected. -The remaining milestones layer on top: M1 wires profile recommendation into `/ultrabrief-local` Phase 4 (brief recommends a profile, user confirms or overrides). M2 ships additional built-in profiles (`quick`, `bugfix`, `feature`, `refactor`, `security-deep`, `research-heavy`) and replaces the hardcoded Phase 5 agent table with profile-driven selection. M3 adds user-extensible profiles in `.claude/ultraplan-profiles/` and `~/.claude/ultraplan-profiles/`. +M1 (additive) adds the recommendation flow: a new `profile-recommender` sonnet agent ranks the available profiles against the finalized brief and writes `recommended_profile`/`profile_match`/`profile_rationale` to the brief frontmatter. `/ultrabrief-local` Phase 4 gains Step 4h: it short-circuits when only `default` exists, surfaces an `AskUserQuestion` confirmation when the top score reaches `RECOMMENDATION_THRESHOLD = 0.7`, and falls back to `default` with an explicit message otherwise. The brief itself stays universal — Step 4h does not branch on domain, only the downstream profile does. + +The remaining milestones layer on top: M2 ships additional built-in profiles (`quick`, `bugfix`, `feature`, `refactor`, `security-deep`, `research-heavy`) and replaces the hardcoded Phase 5 agent table with profile-driven selection. M3 adds user-extensible profiles in `.claude/ultraplan-profiles/` and `~/.claude/ultraplan-profiles/`. The whole design preserves a universal brief: `/ultrabrief-local` asks the same questions regardless of domain, and the profile is a *processing decision* layered on top of that universal data capture. No silent variant routing, no hidden magic — the active profile is always reported and overridable. diff --git a/plugins/ultraplan-local/CLAUDE.md b/plugins/ultraplan-local/CLAUDE.md index 7f948dd..971c705 100644 --- a/plugins/ultraplan-local/CLAUDE.md +++ b/plugins/ultraplan-local/CLAUDE.md @@ -90,6 +90,7 @@ Architect sits between `/ultraresearch-local` and `/ultraplan-local`. It matches | research-scout | sonnet | External docs for unfamiliar tech (conditional, planning only) | | convention-scanner | sonnet | Coding conventions: naming, style, error handling, test patterns | | brief-reviewer | sonnet | Task brief quality (5 dimensions: completeness, consistency, testability, scope clarity, research plan validity) | +| profile-recommender | sonnet | Match finalized brief against available profiles; ranked JSON output with brief-anchored rationale (M1+) | | plan-critic | sonnet | Adversarial plan review (9 dimensions) | | scope-guardian | sonnet | Scope alignment (creep + gaps) | | session-decomposer | sonnet | Splits plans into headless sessions with dependency graph | @@ -119,18 +120,30 @@ Architect sits between `/ultraresearch-local` and `/ultraplan-local`. It matches **Security:** 4-layer defense-in-depth: plugin hooks (pre-bash-executor, pre-write-executor), prompt-level denylist (works in headless sessions), pre-execution plan scan (Phase 2.4), scoped `--allowedTools` replacing `--dangerously-skip-permissions`. Hard Rules 14-16 enforce verify command security, repo-boundary writes, and sensitive path protection. -**Profiles (M0 — foundation, no behaviour change):** Profiles describe +**Profiles (M0/M1 — foundation + recommendation):** Profiles describe which exploration/review agents, catalog filter, and adversarial regime ultraplan-local should use. They live as `profiles/*.yaml` and are loaded by `scripts/profile-loader.mjs` (null-deps Node, limited-subset YAML parser, agent cross-validation). M0 ships only `default.yaml`, which captures today's hardcoded Phase 5/9 agent set verbatim — so existing -flows are unchanged. Phase 1 of `/ultraplan-local` now resolves a profile -in the order `--profile flag → brief frontmatter recommended_profile → +flows are unchanged. Phase 1 of `/ultraplan-local` resolves a profile in +the order `--profile flag → brief frontmatter recommended_profile → default fallback` and reports `Profile: {name} (source: ...)` in the mode -banner. Future milestones: M1 wires recommendation into ultrabrief Phase -4; M2 ships more built-in profiles + replaces the hardcoded agent table; -M3 adds user-extensible profiles under `.claude/ultraplan-profiles/`. +banner. + +M1 adds the recommendation flow: `/ultrabrief-local` Phase 4 gains +Step 4h, which spawns the new `profile-recommender` agent (sonnet, +single-shot, brief-anchored scoring) and writes +`recommended_profile`/`profile_match`/`profile_rationale` to the brief +frontmatter. The threshold is `RECOMMENDATION_THRESHOLD = 0.7` (exported +from the loader); below that, the orchestrator surfaces an explicit +fallback message and offers manual selection via AskUserQuestion. When +only `default` is available, Step 4h short-circuits without asking. +The brief itself stays universal — Step 4h does not change which +questions are asked, only which downstream profile applies. Future +milestones: M2 ships more built-in profiles + replaces the hardcoded +agent table; M3 adds user-extensible profiles under +`.claude/ultraplan-profiles/`. **Pipeline:** `/ultrabrief-local` produces the task brief. `/ultraresearch-local --project ` fills in `{dir}/research/`. `/ultra-cc-architect-local --project ` *(optional, v2.2)* matches available Claude Code features against brief+research and writes `{dir}/architecture/`. `/ultraplan-local --project ` reads brief + research (+ architecture note if present) to produce `{dir}/plan.md`. `/ultraexecute-local --project ` executes and writes `{dir}/progress.json`. All artifacts live in one project directory. diff --git a/plugins/ultraplan-local/README.md b/plugins/ultraplan-local/README.md index 90ebd80..7e973a6 100644 --- a/plugins/ultraplan-local/README.md +++ b/plugins/ultraplan-local/README.md @@ -272,7 +272,7 @@ Output: `--brief` or `--project` is **required**. `/ultraplan-local` with no brief exits with an error and a pointer to `/ultrabrief-local`. -#### Profiles (M0 — foundation) +#### Profiles (M0/M1 — foundation + recommendation flow) A *profile* describes which exploration/review agents to spawn, which catalog filter to apply, and which adversarial regime to use. Profiles @@ -280,13 +280,26 @@ live in `profiles/*.yaml` and are loaded via `scripts/profile-loader.mjs` (null-deps Node, limited-subset YAML parser, validates that every referenced agent exists). -M0 ships only the `default` profile, which mirrors the current -hardcoded Phase 5/9 agent set — so existing flows are unaffected. M1 -(coming next) lets `/ultrabrief-local` recommend a profile based on -brief content; M2 ships additional built-in profiles (`quick`, -`bugfix`, `feature`, `refactor`, `security-deep`, `research-heavy`) -and replaces the hardcoded Phase 5 agent table. M3 adds -user-extensible profiles in `.claude/ultraplan-profiles/`. +M0 (shipped) introduced the loader and the `default` profile. M1 +(shipped) adds the recommendation flow: `/ultrabrief-local` Phase 4 +gains a Step 4h that loads available profiles, spawns the +`profile-recommender` agent (sonnet, single-shot), and writes +`recommended_profile`/`profile_match`/`profile_rationale` to the +brief frontmatter. When the top score is `≥ 0.7`, the user is asked +to confirm; otherwise the fallback to `default` is surfaced +explicitly with the option to choose manually. When only `default` +is available (still the case at M1), Step 4h short-circuits and +records `profile_match: default-only` — no AskUserQuestion. + +The brief stays universal — the same questions, the same template, +no domain-specific branching. The profile is a *processing +decision* layered on top of universal data capture. + +M2 (next) ships additional built-in profiles (`quick`, `bugfix`, +`feature`, `refactor`, `security-deep`, `research-heavy`) and +replaces the hardcoded Phase 5 agent table with profile-driven +selection. M3 adds user-extensible profiles in +`.claude/ultraplan-profiles/`. ```bash # Inspect available profiles diff --git a/plugins/ultraplan-local/agents/profile-recommender.md b/plugins/ultraplan-local/agents/profile-recommender.md new file mode 100644 index 0000000..8904bb1 --- /dev/null +++ b/plugins/ultraplan-local/agents/profile-recommender.md @@ -0,0 +1,188 @@ +--- +name: profile-recommender +description: | + Use this agent to match a finalized task brief against the available + ultraplan-local profiles and produce a ranked recommendation with + brief-anchored rationale. Called by `/ultrabrief-local` Phase 4 (Step 4h) + after the brief-reviewer gate has passed. + + + Context: ultrabrief Step 4h profile recommendation + user: "/ultrabrief-local" + assistant: "Brief finalized. Launching profile-recommender to match the brief against available profiles." + + Step 4h spawns this agent once and uses its ranked output to recommend a + profile via AskUserQuestion. If only `default` exists, the orchestrator + skips this agent entirely. + + + + + Context: User asks which profile fits a brief + user: "Which profile should we use for this brief?" + assistant: "I'll use the profile-recommender to score available profiles." + + Direct request triggers the agent. + + +model: sonnet +color: cyan +tools: ["Read"] +--- + +You are a profile matcher for ultraplan-local. Your sole job is to read a +finalized task brief and rank the available profiles by how well each fits. +You produce a single fenced JSON block that the orchestrator parses to drive +the profile-confirmation flow. + +You are not an opinionator. You match brief content against profile triggers. +If no profile matches well, you must say so clearly — silent fallback is the +worst outcome. + +## Input + +The orchestrator's prompt provides: + +1. **The brief path** — read with the Read tool. The brief follows the + ultrabrief v2.0 format and contains `## Intent`, `## Goal`, `## Non-Goals`, + `## Constraints`, `## Preferences`, `## Non-Functional Requirements`, + `## Success Criteria`, `## Research Plan`, `## Open Questions / Assumptions`, + `## Prior Attempts`. + +2. **The available profiles** as a JSON array embedded in the prompt: + ```json + [ + { + "name": "security-deep", + "description": "Security-focused deep planning", + "axes": {"depth": "deep", "domain": "security", "goal": "implementation"}, + "triggers": { + "keywords": ["security", "auth", "OWASP", ...], + "nfr_signals": ["zero-trust", "threat model"] + } + }, + ... + ] + ``` + +You must score every profile in the input array. Do not invent profile names. +If a name is not in the input array, you must not output it. + +## Scoring rubric + +For each profile, assign a `score` in `[0.0, 1.0]` and a `match_quality` from +`{exact, partial, fallback}`. Use these heuristics: + +### Keyword and NFR-signal matching (primary signal) + +- Count how many `triggers.keywords` appear in the brief's Intent, Goal, + Constraints, NFRs, or Success Criteria sections (case-insensitive, + whole-word match preferred). +- Count how many `triggers.nfr_signals` appear in the same sections. +- Strong matches in Intent + Goal weigh more than matches in Constraints + (because Intent/Goal are load-bearing for downstream planning). +- A profile with 3+ keyword hits in Intent + Goal scores `≥ 0.8` + (`exact` match_quality). +- A profile with 1–2 hits in any section scores `0.5–0.7` (`partial`). +- A profile with zero direct hits scores `< 0.4` (`fallback`). + +### Axis matching (secondary signal) + +- If the brief's task description or Intent explicitly mentions a domain + (e.g., "security", "refactor", "research", "bugfix"), profiles with a + matching `axes.domain` get a +0.15 bump. +- If the brief signals high-stakes (NFRs about availability, security, + performance targets), profiles with `axes.depth: deep` get a +0.10 bump. +- If the brief is small and contained (few constraints, narrow goal), profiles + with `axes.depth: quick` get a +0.10 bump for that signal alone. + +### Cap and clamp + +- Final score is `min(1.0, primary + axis_bumps)`. +- Profiles with empty `triggers.keywords` AND empty `triggers.nfr_signals` + (e.g., the `default` profile) score by axis match only, capped at `0.6`. + This guarantees the orchestrator falls back to `default` only when no + trigger-bearing profile scores higher. + +### Match quality bands + +- `exact` — score `≥ 0.7` AND at least 2 keyword/NFR hits. +- `partial` — score in `[0.4, 0.7)` OR exactly 1 keyword/NFR hit. +- `fallback` — score `< 0.4` and no direct trigger hits. + +The orchestrator uses `score ≥ 0.7` as the recommendation threshold. Below +that it falls back to `default` and surfaces an explicit message to the user. + +## Hallucination gate + +You may only output profiles whose `name` appears in the input JSON array. +If you find yourself wanting to suggest a profile that "would fit but isn't +listed", do not. The orchestrator will treat that as a parser failure and +fall back to `default`. + +## Output format + +Produce a brief prose summary (2–4 sentences) followed by a single fenced +JSON block. The JSON block MUST be the last fenced block in your output — +parsers extract it by reading the last `json` code fence. + +``` +## Profile match for {brief task} + +{2-4 sentences explaining which profile fits best and why, or that no profile +matches strongly. Cite specific brief sections (e.g., "Intent mentions +'OWASP top 10' and 'JWT auth' — security-deep triggers fire strongly").} + +### Ranked + +| Rank | Profile | Score | Match | Rationale | +|------|---------|-------|-------|-----------| +| 1 | {name} | {0.00–1.00} | {exact/partial/fallback} | {one-sentence why} | +| 2 | ... | ... | ... | ... | + +```json +{ + "ranked": [ + { + "name": "", + "score": 0.0, + "match_quality": "exact|partial|fallback", + "rationale": "" + }, + ... + ] +} +``` +``` + +### JSON rules + +- `ranked` must be a non-empty array. Every input profile must appear + exactly once. Order by descending `score`. +- `score` is a number in `[0.0, 1.0]` with up to 2 decimal places. +- `match_quality` is one of `exact | partial | fallback` exactly. +- `rationale` is a single short sentence (≤ 25 words). Quote brief + content where useful, but do not paraphrase your own scoring rubric. +- Do not include trailing commas, comments, or non-JSON text inside + the fence. The block must parse with a strict JSON parser. + +## Failure modes + +If you cannot read the brief (file missing, malformed) or the input profile +array is empty, output a single ranked entry for `default` with score `0.0`, +match_quality `fallback`, and a rationale describing the failure. The +orchestrator treats this as the explicit fallback signal. + +If the brief is empty or has no usable sections, score every profile at +`0.0` with match_quality `fallback`. Let the orchestrator decide. + +## Rules + +- **Read the brief once.** Do not over-analyze. Quick scoring beats slow + perfectionism for this step. +- **Cite brief content in rationale.** "Intent: '...'" is more useful than + "fits the security domain". +- **Never invent profile names.** Hallucination gate is hard. +- **Never propose a `default` recommendation when a non-default profile + scores ≥ 0.7.** The orchestrator decides fallback; you only score. +- **One JSON block, last in the output.** Parsers depend on this. diff --git a/plugins/ultraplan-local/commands/ultrabrief-local.md b/plugins/ultraplan-local/commands/ultrabrief-local.md index 89c29ac..6527c18 100644 --- a/plugins/ultraplan-local/commands/ultrabrief-local.md +++ b/plugins/ultraplan-local/commands/ultrabrief-local.md @@ -468,6 +468,191 @@ Final quality: {complete | partial} Research topics identified: {N} ``` +### Step 4h — Profile recommendation (M1+) + +After the brief is finalized on disk, recommend an ultraplan-local profile +that fits the brief. The profile drives which exploration/review agents +`/ultraplan-local` will spawn, which catalog filter the architect will use, +and which adversarial regime applies. Profiles live in +`${CLAUDE_PLUGIN_ROOT}/profiles/`; the loader is +`${CLAUDE_PLUGIN_ROOT}/scripts/profile-loader.mjs`. + +The brief stays universal — Step 4h does not change what the brief contains, +only which processing profile downstream commands should apply to it. + +**Step 4h.1 — Discover available profiles** + +Run: +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/profile-loader.mjs list +``` + +Capture the newline-separated profile names into a variable +`AVAILABLE_PROFILES`. If the loader exits non-zero or returns zero names, +skip Step 4h entirely and write `recommended_profile: default`, +`profile_match: default-only` to the brief frontmatter. + +**Step 4h.2 — Short-circuit when only `default` exists** + +If `AVAILABLE_PROFILES == ["default"]` (M1 ships only `default`), do not +spawn the recommender or ask the user. Write to the brief frontmatter: + +```yaml +recommended_profile: default +profile_match: default-only +profile_rationale: "Only the default profile is available; recommendation skipped." +``` + +Report: +``` +Profile: default (only profile available; recommendation skipped) +``` + +Proceed to Phase 5. The rest of Step 4h applies once M2 ships additional +profiles. + +**Step 4h.3 — Build profile manifest for the recommender** + +For each profile in `AVAILABLE_PROFILES`, load it: +``` +node ${CLAUDE_PLUGIN_ROOT}/scripts/profile-loader.mjs load +``` + +Extract `name`, `description`, `axes`, and `triggers` from each. Build a +JSON array of profile manifests in memory. + +**Step 4h.4 — Spawn `profile-recommender`** + +Launch the `profile-recommender` agent (foreground, blocking). Prompt: + +> "Read the finalized brief at `{PROJECT_DIR}/brief.md` and rank these +> profiles by fit. Profiles JSON: `{paste manifest JSON array}`. Output the +> ranked list in your standard JSON block. Do not invent profile names — only +> rank what is in the JSON array." + +Capture the agent's output. Locate the **last** fenced ```json``` block and +parse it. Expected shape: + +```json +{ + "ranked": [ + {"name": "", "score": 0.0, "match_quality": "exact|partial|fallback", "rationale": "..."}, + ... + ] +} +``` + +**JSON fallback:** if the JSON block is missing, malformed, or empty, treat +this as a recommender failure. Write to the brief: + +```yaml +recommended_profile: default +profile_match: fallback +profile_rationale: "profile-recommender output could not be parsed; using default." +``` + +Report the failure to the user as plain text and proceed to Phase 5. Do not +ask AskUserQuestion in this branch — the silent fallback is the explicit +fallback. + +**Step 4h.5 — Apply recommendation threshold** + +Use the `selectRecommendation()` helper logic (see +`scripts/profile-loader.mjs`): +- If the top-ranked profile has `score >= 0.7`, that is the recommendation. +- If the top score is `< 0.7`, the recommendation is `default` with + `match: fallback`. + +This logic is also exported as a helper for tests; use the same threshold. + +**Step 4h.6 — Confirm with the user via AskUserQuestion** + +If the recommended profile is **non-default** (i.e., a profile scored ≥ 0.7), +present: + +``` +Question: "Based on the brief, the {recommended} profile fits best. + Use it for /ultraplan-local?" + +Options: + 1. "Use {recommended}" — apply the recommendation. (Recommended) + 2. "Use default" — fall back to the baseline profile. + 3. "Choose another" — pick from the full list of available profiles. +``` + +If the user picks option 1: write `recommended_profile: {recommended}`, +`profile_match: {match_quality}` (from the agent's output), +`profile_rationale: {rationale}` (from the agent's output). + +If the user picks option 2: write `recommended_profile: default`, +`profile_match: user-override`, `profile_rationale: "User chose default +over the {recommended} recommendation."` + +If the user picks option 3: present a sub-question listing every profile in +`AVAILABLE_PROFILES` with its description. The user picks one. Write +`recommended_profile: {chosen}`, `profile_match: user-override`, +`profile_rationale: "User selected {chosen} manually over the {recommended} +recommendation."` + +**Step 4h.7 — Fallback: top score below threshold** + +If the top score is `< 0.7`, surface the fallback explicitly: + +``` +No profile matched this brief strongly enough for an automatic recommendation. + +Available profiles: + - default: {description} + - {profile-2}: {description} + - {profile-N}: {description} + +Using fallback: default. You can override with: + /ultraplan-local --project {PROJECT_DIR} --profile +``` + +Then ask via `AskUserQuestion` whether the user wants to pick a profile now +or accept the default fallback: + +``` +Question: "No profile matched strongly. Pick one now, or use default?" + +Options: + 1. "Use default (fallback)" — write profile_match: fallback. (Recommended) + 2. "Choose a profile manually" — sub-question with full list. +``` + +If the user picks option 1: write `recommended_profile: default`, +`profile_match: fallback`, `profile_rationale: "{top-score and reason from +agent's output}"`. If the user picks option 2: same flow as Step 4h.6 option 3, +but `profile_match: user-override`. + +**Step 4h.8 — Persist to brief frontmatter** + +Edit `{PROJECT_DIR}/brief.md` and replace the placeholder values: + +```yaml +recommended_profile: +profile_match: +profile_rationale: "" +``` + +Use Edit with exact matches against the template's placeholder values +(`recommended_profile: default`, `profile_match: default-only`, +`profile_rationale: "Single profile available; no recommendation made."`) +to avoid clobbering other frontmatter. + +**Step 4h.9 — Report** + +``` +Profile: {chosen} (match: {match_quality}, source: {recommended | user-override | fallback}) +Rationale: {rationale} +``` + +If the brief did not write the placeholder defaults (older briefs from before +M1), insert the three lines below the existing frontmatter — never above +`---`. Old briefs without the fields stay valid; downstream consumers default +to `default`. + ## Phase 5 — Auto-orchestration opt-in (if research_topics > 0) **Skip this phase if research_topics = 0.** Proceed directly to Phase 6. diff --git a/plugins/ultraplan-local/scripts/profile-loader.mjs b/plugins/ultraplan-local/scripts/profile-loader.mjs index 9b53f84..a1fbe3b 100644 --- a/plugins/ultraplan-local/scripts/profile-loader.mjs +++ b/plugins/ultraplan-local/scripts/profile-loader.mjs @@ -400,6 +400,98 @@ async function missingAgents(names, agentsDir) { return missing; } +// ===================================================================== +// Recommendation helper +// ===================================================================== + +/** + * Recommendation threshold used by ultrabrief-local Step 4h. The + * profile-recommender agent's top-ranked profile must reach this score to + * be presented as a recommendation; below it, ultrabrief falls back to + * `default` with an explicit message. + */ +export const RECOMMENDATION_THRESHOLD = 0.7; + +/** + * Decide what to do with a `profile-recommender` agent's ranked output. + * Returns `{ profile, match, rationale, source }` where: + * - `source` is `recommended` (top ≥ threshold), `fallback` (top < threshold + * or empty input), or `default-only` (only `default` available). + * - `profile` is the chosen profile name. + * - `match` is one of `exact | partial | fallback | default-only`. + * - `rationale` is a one-sentence explanation suitable for the brief + * frontmatter. + * + * Rules: + * - If `availableProfiles` only contains `default`, return `default-only`. + * - If `ranked` is empty/malformed, fall back to `default` with a fallback + * rationale. + * - Otherwise pick the highest-scoring entry; recommend it only when + * `score >= RECOMMENDATION_THRESHOLD`. Below threshold, recommend + * `default` with `match: fallback` and the top entry's rationale. + */ +export function selectRecommendation(ranked, opts = {}) { + const threshold = opts.threshold ?? RECOMMENDATION_THRESHOLD; + const available = opts.availableProfiles ?? null; + + if (Array.isArray(available) && available.length === 1 && available[0] === 'default') { + return { + profile: 'default', + match: 'default-only', + rationale: 'Only the default profile is available; recommendation skipped.', + source: 'default-only', + }; + } + + if (!Array.isArray(ranked) || ranked.length === 0) { + return { + profile: 'default', + match: 'fallback', + rationale: 'profile-recommender returned no ranked profiles; using default.', + source: 'fallback', + }; + } + + // Find highest-scoring entry. Treat missing/non-numeric scores as 0. + let top = null; + for (const entry of ranked) { + if (!entry || typeof entry.name !== 'string') continue; + const score = typeof entry.score === 'number' ? entry.score : 0; + if (top === null || score > (typeof top.score === 'number' ? top.score : 0)) { + top = { ...entry, score }; + } + } + + if (top === null) { + return { + profile: 'default', + match: 'fallback', + rationale: 'profile-recommender output had no usable entries; using default.', + source: 'fallback', + }; + } + + if (top.score >= threshold) { + return { + profile: top.name, + match: typeof top.match_quality === 'string' ? top.match_quality : 'partial', + rationale: typeof top.rationale === 'string' && top.rationale.trim() !== '' + ? top.rationale + : `Top-ranked profile (score ${top.score}).`, + source: 'recommended', + }; + } + + return { + profile: 'default', + match: 'fallback', + rationale: typeof top.rationale === 'string' && top.rationale.trim() !== '' + ? `Top score ${top.score} below ${threshold}; ${top.rationale}` + : `Top score ${top.score} below recommendation threshold ${threshold}.`, + source: 'fallback', + }; +} + // ===================================================================== // CLI // ===================================================================== diff --git a/plugins/ultraplan-local/scripts/profile-loader.test.mjs b/plugins/ultraplan-local/scripts/profile-loader.test.mjs index 20743a6..ae64908 100644 --- a/plugins/ultraplan-local/scripts/profile-loader.test.mjs +++ b/plugins/ultraplan-local/scripts/profile-loader.test.mjs @@ -18,6 +18,8 @@ import { validateProfile, loadProfile, listProfiles, + selectRecommendation, + RECOMMENDATION_THRESHOLD, } from './profile-loader.mjs'; // ===================================================================== @@ -377,3 +379,99 @@ test('listProfiles: includes default', async () => { const names = await listProfiles(); assert.ok(names.includes('default'), `Expected default in ${names.join(', ')}`); }); + +// ===================================================================== +// selectRecommendation tests (M1) +// ===================================================================== + +test('selectRecommendation: only-default short-circuit', () => { + const result = selectRecommendation([], { availableProfiles: ['default'] }); + assert.equal(result.profile, 'default'); + assert.equal(result.match, 'default-only'); + assert.equal(result.source, 'default-only'); +}); + +test('selectRecommendation: empty ranked input falls back', () => { + const result = selectRecommendation([]); + assert.equal(result.profile, 'default'); + assert.equal(result.match, 'fallback'); + assert.equal(result.source, 'fallback'); + assert.match(result.rationale, /no ranked profiles/); +}); + +test('selectRecommendation: malformed ranked input falls back', () => { + const result = selectRecommendation([null, { not_a_profile: true }]); + assert.equal(result.profile, 'default'); + assert.equal(result.source, 'fallback'); +}); + +test('selectRecommendation: top score above threshold returns recommendation', () => { + const ranked = [ + { name: 'security-deep', score: 0.91, match_quality: 'exact', rationale: 'OWASP + JWT in Intent.' }, + { name: 'default', score: 0.30, match_quality: 'fallback', rationale: 'No triggers.' }, + ]; + const result = selectRecommendation(ranked); + assert.equal(result.profile, 'security-deep'); + assert.equal(result.match, 'exact'); + assert.equal(result.source, 'recommended'); + assert.equal(result.rationale, 'OWASP + JWT in Intent.'); +}); + +test('selectRecommendation: top score below threshold falls back to default', () => { + const ranked = [ + { name: 'feature', score: 0.55, match_quality: 'partial', rationale: 'Some keyword hits.' }, + { name: 'default', score: 0.30, match_quality: 'fallback', rationale: 'Baseline.' }, + ]; + const result = selectRecommendation(ranked); + assert.equal(result.profile, 'default'); + assert.equal(result.match, 'fallback'); + assert.equal(result.source, 'fallback'); + // Rationale should reference both the score and the top entry's rationale + assert.match(result.rationale, /0\.55/); + assert.match(result.rationale, /Some keyword hits/); +}); + +test('selectRecommendation: respects custom threshold', () => { + const ranked = [ + { name: 'feature', score: 0.55, match_quality: 'partial', rationale: 'Match.' }, + ]; + // With low threshold the same entry IS the recommendation + const result = selectRecommendation(ranked, { threshold: 0.5 }); + assert.equal(result.profile, 'feature'); + assert.equal(result.source, 'recommended'); +}); + +test('selectRecommendation: highest-score wins regardless of input order', () => { + const ranked = [ + { name: 'a', score: 0.40, match_quality: 'partial', rationale: 'Low.' }, + { name: 'b', score: 0.95, match_quality: 'exact', rationale: 'High.' }, + { name: 'c', score: 0.72, match_quality: 'partial', rationale: 'Mid.' }, + ]; + const result = selectRecommendation(ranked); + assert.equal(result.profile, 'b'); + assert.equal(result.source, 'recommended'); +}); + +test('selectRecommendation: missing score treated as 0', () => { + const ranked = [ + { name: 'a', match_quality: 'fallback', rationale: 'No score.' }, + ]; + const result = selectRecommendation(ranked); + // Top entry has effective score 0 → falls back + assert.equal(result.profile, 'default'); + assert.equal(result.source, 'fallback'); +}); + +test('selectRecommendation: missing rationale gets a synthetic one', () => { + const ranked = [ + { name: 'security-deep', score: 0.85, match_quality: 'exact' }, + ]; + const result = selectRecommendation(ranked); + assert.equal(result.profile, 'security-deep'); + assert.match(result.rationale, /Top-ranked/); +}); + +test('RECOMMENDATION_THRESHOLD: matches plan default', () => { + // Sanity check that the export agrees with the documented threshold. + assert.equal(RECOMMENDATION_THRESHOLD, 0.7); +}); diff --git a/plugins/ultraplan-local/templates/ultrabrief-template.md b/plugins/ultraplan-local/templates/ultrabrief-template.md index eb65a6b..dd0064b 100644 --- a/plugins/ultraplan-local/templates/ultrabrief-template.md +++ b/plugins/ultraplan-local/templates/ultrabrief-template.md @@ -10,6 +10,9 @@ research_status: pending # pending | in_progress | complete | skipped auto_research: false # true if user opted into Claude-managed research interview_turns: {N} source: {interview | manual} +recommended_profile: default # ultraplan-local profile name (M1+) +profile_match: default-only # exact | partial | fallback | user-override | default-only +profile_rationale: "Single profile available; no recommendation made." --- # Task: {title}