diff --git a/scripts/templates/optimization/README.md b/scripts/templates/optimization/README.md new file mode 100644 index 0000000..84f815d --- /dev/null +++ b/scripts/templates/optimization/README.md @@ -0,0 +1,88 @@ +# Pipeline Optimization and Self-Healing + +Two tools for making agent pipelines more efficient and resilient over time. + +## pipeline-optimizer.sh + +Analyzes FEEDBACK.md and cost-events.jsonl to identify: + +| Issue | Detection | Recommendation | +|-------|-----------|----------------| +| Bottleneck agent | Top-2 by cost event count, 1.5x+ avg | Batch tool calls or narrow task scope | +| Unnecessary revision loops | 3+ `loop-excess` pattern rows | Tighten acceptance criteria, add max-iterations guard | +| Underutilized agent | Appears in < 10% of pipeline runs | Remove from pipeline or combine with another agent | +| Cost outlier | Single run >= 3x average | Add per-run budget cap via budget-hook.sh | + +Output is written to `RECOMMENDATIONS.md` with a VFM pre-score for each +recommendation. Higher VFM pre-scores mean more value per implementation effort. + +**This script does not auto-implement anything.** All changes require +manual review and explicit approval. This is intentional — pipeline +restructuring is a high-stakes operation. + +## self-healing.sh + +Categorizes errors and applies targeted recovery strategies: + +| Error Type | Recovery | Max Retries | +|------------|----------|-------------| +| `timeout` | Retry with shorter scope | 5 (hard cap) | +| `permission-denied` | Log and skip | 0 (no retry) | +| `tool-not-found` | Alert operator | 0 (no retry) | +| `api-error` | Exponential backoff (2^n seconds) | 3 | +| `content-quality` | Retry with stricter prompt | 2 | + +**Hard cap: 5 total attempts regardless of category.** This follows the +OpenClaw pattern — unbounded retry loops are the most common cause of +runaway agent costs. The cap is non-negotiable. + +After the hard cap is reached, the script exits with code 2 (escalate). +The caller is responsible for deciding whether to pause, alert a human, +or abort the pipeline run. + +## Connection to feedback and VFM + +``` +feedback-collector.sh -> FEEDBACK.md -> performance-scorer.sh -> flagged agents + | + pipeline-optimizer.sh -> RECOMMENDATIONS.md + | + (manual review + approval) + | + prompt/pipeline update + | + new runs -> new feedback +``` + +VFM pre-scores in RECOMMENDATIONS.md use the same 0–100 scale as +`scripts/templates/proactive/VFM-SCORING.md` (Step 11). They are +pre-scores, not final scores — the VFM evaluation still needs to run +when the task is scheduled. The pre-scores help prioritize which +recommendations to tackle first. + +## Safety limits + +- `pipeline-optimizer.sh`: read-only analysis — never modifies pipeline files +- `self-healing.sh`: max 5 attempts hard cap, permission errors never retried +- All events logged to `healing-log.jsonl` for audit trail +- No auto-escalation to external systems — exit codes only + +## Usage + +```bash +# Run optimizer for all pipelines +./optimization/pipeline-optimizer.sh + +# Run optimizer for a specific pipeline +./optimization/pipeline-optimizer.sh --pipeline doc-pipeline + +# Handle an error in a pipeline step +./optimization/self-healing.sh \ + --error-type api-error \ + --agent agent-writer \ + --attempt 1 \ + --context "OpenAI timeout on summarize call" + +# Check healing log +cat healing-log.jsonl | python3 -m json.tool +``` diff --git a/scripts/templates/optimization/pipeline-optimizer.sh b/scripts/templates/optimization/pipeline-optimizer.sh new file mode 100644 index 0000000..b5c09b8 --- /dev/null +++ b/scripts/templates/optimization/pipeline-optimizer.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# Pipeline optimizer: identify bottlenecks, excess loops, cost outliers. +# Bash 3.2 compatible. Uses python3 for all analysis. +# Does NOT auto-implement any changes -- produces RECOMMENDATIONS.md only. +# +# Analysis covers: +# - Bottleneck agents (highest avg duration or cost per run) +# - Unnecessary revision loops (agents that loop 3+ times on average) +# - Underutilized agents (invoked < 10% of pipeline runs) +# - Cost outliers (single run cost >= 3x average) +# +# Output: RECOMMENDATIONS.md with VFM pre-scores for each recommendation. +# +# Usage: +# ./pipeline-optimizer.sh +# ./pipeline-optimizer.sh --pipeline {{PIPELINE_NAME}} +# +# Placeholders: +# {{WORKING_DIR}} - absolute path to project directory + +WORKING_DIR="{{WORKING_DIR}}" +FEEDBACK_FILE="$WORKING_DIR/FEEDBACK.md" +COST_LOG="$WORKING_DIR/budget/cost-events.jsonl" +RECOMMENDATIONS_FILE="$WORKING_DIR/RECOMMENDATIONS.md" +PIPELINE_FILTER="" + +# Parse arguments (bash 3.2 compatible) +while [ "$#" -gt 0 ]; do + case "$1" in + --pipeline) PIPELINE_FILTER="$2"; shift 2 ;; + *) shift ;; + esac +done + +python3 << PYEOF +import re, json, os, sys +from collections import defaultdict +from datetime import datetime + +feedback_file = "$FEEDBACK_FILE" +cost_log = "$COST_LOG" +recommendations_file = "$RECOMMENDATIONS_FILE" +pipeline_filter = "$PIPELINE_FILTER" + +# Parse FEEDBACK.md +feedback_rows = [] +if os.path.exists(feedback_file): + with open(feedback_file) as f: + in_table = False + for line in f: + line = line.strip() + if '| Date |' in line: + in_table = True + continue + if in_table and line.startswith('|---'): + continue + if in_table and line.startswith('|') and '{{' not in line: + cols = [c.strip() for c in line.strip('|').split('|')] + if len(cols) >= 7: + try: + score_m = re.match(r'(\d+)', cols[3]) + score = int(score_m.group(1)) if score_m else 0 + feedback_rows.append({ + 'date': cols[0], + 'pipeline': cols[1], + 'agent': cols[2], + 'score': score, + 'issue': cols[4], + 'pattern': cols[6] + }) + except (ValueError, IndexError): + pass + +# Filter by pipeline +if pipeline_filter: + feedback_rows = [r for r in feedback_rows if r['pipeline'] == pipeline_filter] + +# Parse cost events +cost_events = [] +if os.path.exists(cost_log): + with open(cost_log) as f: + for line in f: + line = line.strip() + if line: + try: + cost_events.append(json.loads(line)) + except Exception: + pass + +# Per-agent event counts (cost proxy) +cost_by_agent = defaultdict(list) +# Group by agent+date for per-run cost +run_costs = defaultdict(list) +for e in cost_events: + agent = e.get('agent', 'unknown') + date = e.get('timestamp', '')[:10] + run_key = f"{agent}:{date}" + cost_by_agent[agent].append(1) + run_costs[agent].append(1) + +# Build recommendations +recommendations = [] + +# 1. Bottleneck agents: top 2 by event count +if cost_by_agent: + agent_totals = [(a, len(events)) for a, events in cost_by_agent.items()] + agent_totals.sort(key=lambda x: -x[1]) + for agent, total in agent_totals[:2]: + all_costs = [len(v) for v in run_costs.values()] + avg_cost = sum(all_costs) / len(all_costs) if all_costs else 1 + if total > avg_cost * 1.5: + recommendations.append({ + 'type': 'bottleneck', + 'agent': agent, + 'description': f"Agent '{agent}' accounts for {total} events vs avg {avg_cost:.0f}. " + f"Consider batching its tool calls or reducing its task scope.", + 'vfm_prescore': 70 + }) + +# 2. Unnecessary revision loops: agents with loop-excess pattern >= 3 times +pattern_by_agent = defaultdict(lambda: defaultdict(int)) +for r in feedback_rows: + if r['pattern']: + pattern_by_agent[r['agent']][r['pattern']] += 1 + +for agent, patterns in pattern_by_agent.items(): + if patterns.get('loop-excess', 0) >= 3: + count = patterns['loop-excess'] + recommendations.append({ + 'type': 'loop-excess', + 'agent': agent, + 'description': f"Agent '{agent}' has {count} feedback rows tagged 'loop-excess'. " + f"Review pipeline revision criteria -- tighten acceptance conditions " + f"or add a max-iterations guard (see self-healing.sh).", + 'vfm_prescore': 80 + }) + +# 3. Underutilized agents: invoked in < 10% of pipeline runs +if feedback_rows: + all_runs = set(r['date'] + ':' + r['pipeline'] for r in feedback_rows) + total_runs = len(all_runs) if all_runs else 1 + agent_runs = defaultdict(set) + for r in feedback_rows: + agent_runs[r['agent']].add(r['date'] + ':' + r['pipeline']) + for agent, runs in agent_runs.items(): + utilization = len(runs) / total_runs + if utilization < 0.1 and total_runs >= 10: + recommendations.append({ + 'type': 'underutilized', + 'agent': agent, + 'description': f"Agent '{agent}' appears in only {utilization*100:.0f}% of pipeline runs. " + f"Consider removing from the pipeline or combining with another agent.", + 'vfm_prescore': 60 + }) + +# 4. Cost outliers: single-run cost >= 3x average +if run_costs: + all_run_totals = [] + for agent, runs in run_costs.items(): + all_run_totals.extend(runs) + avg_run = sum(all_run_totals) / len(all_run_totals) if all_run_totals else 1 + for agent, runs in run_costs.items(): + for run_cost in runs: + if run_cost >= avg_run * 3: + recommendations.append({ + 'type': 'cost-outlier', + 'agent': agent, + 'description': f"Agent '{agent}' had a run costing {run_cost} events " + f"vs avg {avg_run:.1f} (3x+ threshold). " + f"Add per-run budget cap with budget-hook.sh.", + 'vfm_prescore': 75 + }) + break # one recommendation per agent + +# Write RECOMMENDATIONS.md +timestamp = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') +pipeline_label = pipeline_filter if pipeline_filter else "all pipelines" + +lines = [ + f"# Pipeline Optimization Recommendations", + f"", + f"Generated: {timestamp}", + f"Scope: {pipeline_label}", + f"", + f"> These are recommendations only. No changes have been made.", + f"> Review each item and implement manually or with team approval.", + f"", +] + +if recommendations: + lines.append(f"## Recommendations ({len(recommendations)} found)") + lines.append("") + for i, rec in enumerate(recommendations, 1): + lines.append(f"### R{i}: {rec['type'].upper()} -- {rec['agent']}") + lines.append("") + lines.append(rec['description']) + lines.append("") + lines.append(f"**VFM pre-score:** {rec['vfm_prescore']}/100") + lines.append("") +else: + lines.append("## No recommendations") + lines.append("") + lines.append("No bottlenecks, excess loops, underutilized agents, or cost outliers detected.") + lines.append("") + +lines.append("## Next steps") +lines.append("") +lines.append("1. Review each recommendation with the team") +lines.append("2. Prioritize by VFM pre-score (higher = more value per effort)") +lines.append("3. Implement approved changes one at a time") +lines.append("4. Run feedback-collector.sh for 10+ runs after each change") +lines.append("5. Re-run pipeline-optimizer.sh to confirm improvement") + +with open(recommendations_file, 'w') as f: + f.write('\n'.join(lines) + '\n') + +print(f"Recommendations written to {recommendations_file}") +print(f" Found: {len(recommendations)} recommendations") +for rec in recommendations: + print(f" - [{rec['type']}] {rec['agent']}: VFM pre-score {rec['vfm_prescore']}") +PYEOF diff --git a/scripts/templates/optimization/self-healing.sh b/scripts/templates/optimization/self-healing.sh new file mode 100644 index 0000000..2f31b8a --- /dev/null +++ b/scripts/templates/optimization/self-healing.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Self-healing: categorize errors and apply recovery strategies. +# Bash 3.2 compatible. Uses python3 for JSON/log parsing. +# +# Error categories and recovery strategies: +# timeout -> retry with shorter task scope +# permission-denied -> log and skip (do not retry) +# tool-not-found -> log and alert, do not retry +# api-error -> exponential backoff, max 3 retries +# content-quality -> re-run with stricter prompt, max 2 retries +# +# Max total attempts: 5 (OpenClaw pattern -- hard cap regardless of category). +# All recovery events logged to healing-log.jsonl. +# +# Usage: +# ./self-healing.sh --error-type --agent --attempt --context +# +# Exit codes: +# 0 -- recovery action taken (caller should retry) +# 1 -- no recovery possible (caller should abort) +# 2 -- max attempts reached (caller should escalate) +# +# Placeholders: +# {{WORKING_DIR}} - absolute path to project directory + +WORKING_DIR="{{WORKING_DIR}}" +HEALING_LOG="$WORKING_DIR/healing-log.jsonl" +MAX_ATTEMPTS=5 + +ERROR_TYPE="" +AGENT_NAME="" +ATTEMPT=1 +CONTEXT_MSG="" + +# Parse arguments (bash 3.2 compatible) +while [ "$#" -gt 0 ]; do + case "$1" in + --error-type) ERROR_TYPE="$2"; shift 2 ;; + --agent) AGENT_NAME="$2"; shift 2 ;; + --attempt) ATTEMPT="$2"; shift 2 ;; + --context) CONTEXT_MSG="$2"; shift 2 ;; + *) shift ;; + esac +done + +if [ -z "$ERROR_TYPE" ]; then + echo "Usage: $0 --error-type --agent --attempt --context " + exit 1 +fi + +# Hard cap: max 5 attempts total +if [ "$ATTEMPT" -gt "$MAX_ATTEMPTS" ]; then + echo "MAX ATTEMPTS REACHED ($MAX_ATTEMPTS) for $AGENT_NAME. Escalating." + python3 -c " +import json, time, os +event = { + 'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), + 'agent': '$AGENT_NAME', + 'error_type': '$ERROR_TYPE', + 'attempt': $ATTEMPT, + 'action': 'escalate', + 'reason': 'max_attempts_reached', + 'context': '$CONTEXT_MSG' +} +with open('$HEALING_LOG', 'a') as f: + f.write(json.dumps(event) + '\n') +print(json.dumps(event)) +" + exit 2 +fi + +# Determine recovery action per category +RECOVERY_ACTION="" +RECOVERY_DETAIL="" +EXIT_CODE=0 + +case "$ERROR_TYPE" in + timeout) + RECOVERY_ACTION="retry_shorter" + RECOVERY_DETAIL="Re-run with reduced task scope. Split task if attempt >= 3." + if [ "$ATTEMPT" -ge 3 ]; then + RECOVERY_DETAIL="Attempt $ATTEMPT: recommend splitting task before retry." + fi + EXIT_CODE=0 + ;; + permission-denied) + RECOVERY_ACTION="skip" + RECOVERY_DETAIL="Permission errors cannot be auto-resolved. Log and skip. Notify operator." + EXIT_CODE=1 + ;; + tool-not-found) + RECOVERY_ACTION="alert" + RECOVERY_DETAIL="Tool not found -- check agent config and hook registrations. Do not retry." + EXIT_CODE=1 + ;; + api-error) + # Exponential backoff: 2^(attempt-1) seconds, max 3 retries + if [ "$ATTEMPT" -le 3 ]; then + BACKOFF_SECS=$(python3 -c "print(min(2 ** ($ATTEMPT - 1), 16))") + RECOVERY_ACTION="retry_backoff" + RECOVERY_DETAIL="API error -- wait ${BACKOFF_SECS}s then retry (attempt $ATTEMPT/3)." + sleep "$BACKOFF_SECS" + EXIT_CODE=0 + else + RECOVERY_ACTION="abort" + RECOVERY_DETAIL="API error persists after 3 retries. Aborting." + EXIT_CODE=1 + fi + ;; + content-quality) + # Max 2 retries for quality issues + if [ "$ATTEMPT" -le 2 ]; then + RECOVERY_ACTION="retry_strict" + RECOVERY_DETAIL="Re-run with stricter prompt. Add explicit quality criteria (attempt $ATTEMPT/2)." + EXIT_CODE=0 + else + RECOVERY_ACTION="escalate_quality" + RECOVERY_DETAIL="Content quality below threshold after 2 retries. Escalate to human review." + EXIT_CODE=2 + fi + ;; + *) + RECOVERY_ACTION="unknown" + RECOVERY_DETAIL="Unknown error type '$ERROR_TYPE'. Logging and aborting." + EXIT_CODE=1 + ;; +esac + +# Log recovery event +python3 -c " +import json, time +event = { + 'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), + 'agent': '$AGENT_NAME', + 'error_type': '$ERROR_TYPE', + 'attempt': $ATTEMPT, + 'action': '$RECOVERY_ACTION', + 'detail': '$RECOVERY_DETAIL', + 'context': '$CONTEXT_MSG' +} +with open('$HEALING_LOG', 'a') as f: + f.write(json.dumps(event) + '\n') +print(json.dumps(event, indent=2)) +" + +echo "Recovery: $RECOVERY_ACTION -- $RECOVERY_DETAIL" +exit $EXIT_CODE