#!/bin/bash # generate-skills.sh — Generate knowledge reference files from manifest # # Reads manifest.json and generates each skill file using claude --print # with the prompt template. Supports resuming from where it left off. # # Usage: # ./generate-skills.sh # Generate all pending skills # ./generate-skills.sh --category rag-architecture # Generate single category # ./generate-skills.sh --skill azure-ai-vision-overview # Generate single skill # ./generate-skills.sh --wave 1 # Generate wave 1 (HIGH) only # ./generate-skills.sh --dry-run # Show what would be generated # ./generate-skills.sh --pilot 5 # Generate first N skills only # # Prerequisites: # - claude CLI installed and authenticated # - jq installed # - manifest.json (run expand-categories.sh first) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" REFS_DIR="$PLUGIN_DIR/skills/ms-ai-engineering/references" MANIFEST_FILE="$SCRIPT_DIR/manifest.json" STATE_FILE="$SCRIPT_DIR/state.json" PROMPT_TEMPLATE="$SCRIPT_DIR/prompt-template.md" CATEGORIES_FILE="$SCRIPT_DIR/categories.json" LOG_DIR="$SCRIPT_DIR/logs" # Model for generation (sonnet for quality, haiku for speed) MODEL="${MODEL:-sonnet}" # Limits PARALLEL="${PARALLEL:-1}" # Sequential by default for reliability DELAY="${DELAY:-3}" # Seconds between API calls MIN_SIZE="${MIN_SIZE:-5000}" # Minimum file size in bytes (quality gate) MAX_RETRIES="${MAX_RETRIES:-2}" # Retries for failed/small files # Flags DRY_RUN=false PILOT=0 # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log() { echo -e "${BLUE}[gen]${NC} $1" >&2; } success() { echo -e "${GREEN}[gen]${NC} $1" >&2; } warn() { echo -e "${YELLOW}[gen]${NC} $1" >&2; } error() { echo -e "${RED}[gen]${NC} $1" >&2; } detail() { echo -e "${CYAN}[gen]${NC} $1" >&2; } # Check prerequisites check_prereqs() { if ! command -v claude &>/dev/null; then error "claude CLI not found" exit 1 fi if ! command -v jq &>/dev/null; then error "jq not found" exit 1 fi if [[ ! -f "$MANIFEST_FILE" ]]; then error "manifest.json not found. Run expand-categories.sh first." exit 1 fi } # Initialize or load state init_state() { if [[ ! -f "$STATE_FILE" ]]; then jq -n '{ "started": "'$(date -Iseconds)'", "completed": [], "failed": [], "skipped": [], "stats": { "total_generated": 0, "total_failed": 0, "total_skipped": 0, "total_bytes": 0 } }' > "$STATE_FILE" fi } # Check if skill is already completed is_completed() { local skill_id="$1" jq -e --arg id "$skill_id" '.completed | index($id) != null' "$STATE_FILE" &>/dev/null } # Mark skill as completed mark_completed() { local skill_id="$1" local file_size="$2" jq --arg id "$skill_id" --arg size "$file_size" ' .completed += [$id] | .stats.total_generated += 1 | .stats.total_bytes += ($size | tonumber) ' "$STATE_FILE" > "$STATE_FILE.tmp" && mv "$STATE_FILE.tmp" "$STATE_FILE" } # Mark skill as failed mark_failed() { local skill_id="$1" local reason="$2" jq --arg id "$skill_id" --arg reason "$reason" ' .failed += [{"id": $id, "reason": $reason, "time": (now | todate)}] | .stats.total_failed += 1 ' "$STATE_FILE" > "$STATE_FILE.tmp" && mv "$STATE_FILE.tmp" "$STATE_FILE" } # Get existing context for a category (overlap files content summary) get_existing_context() { local category_key="$1" local overlaps overlaps=$(jq -r ".categories[\"$category_key\"].existing_overlap // [] | .[]" "$CATEGORIES_FILE" 2>/dev/null) if [[ -z "$overlaps" ]]; then echo "Ingen direkte overlapp med eksisterende filer." return fi local context="" for overlap in $overlaps; do local filepath="$REFS_DIR/$overlap" if [[ -f "$filepath" ]]; then # Extract just the header and section titles local summary summary=$(head -50 "$filepath" | grep -E '^#{1,3} ' | head -10) context+="**$overlap:** $summary"$'\n' fi done echo "${context:-Ingen direkte overlapp med eksisterende filer.}" } # Get related skills in same category get_related_skills() { local category_key="$1" local current_skill="$2" jq -r --arg key "$category_key" --arg current "$current_skill" ' .categories[$key].skills[] | select(.id != $current) | "- \(.title): \(.description)" ' "$MANIFEST_FILE" | head -20 } # Build the prompt for a specific skill build_prompt() { local category_key="$1" local skill_id="$2" local title description subtopics title=$(jq -r --arg key "$category_key" --arg id "$skill_id" \ '.categories[$key].skills[] | select(.id == $id) | .title' "$MANIFEST_FILE") description=$(jq -r --arg key "$category_key" --arg id "$skill_id" \ '.categories[$key].skills[] | select(.id == $id) | .description' "$MANIFEST_FILE") subtopics=$(jq -r --arg key "$category_key" --arg id "$skill_id" \ '.categories[$key].skills[] | select(.id == $id) | .subtopics | join(", ")' "$MANIFEST_FILE") local category_name category_description category_name=$(jq -r --arg key "$category_key" '.categories[$key].name' "$MANIFEST_FILE") category_description=$(jq -r --arg key "$category_key" '.categories[$key].name' "$CATEGORIES_FILE") local existing_context existing_context=$(get_existing_context "$category_key") local related_skills related_skills=$(get_related_skills "$category_key" "$skill_id") # Build the full prompt from template cat <"$LOG_DIR/gen-${skill_id}.err"); then error " Claude CLI failed for $skill_id" if [[ $attempt -le $MAX_RETRIES ]]; then warn " Retrying in ${DELAY}s..." sleep "$DELAY" generate_skill "$category_key" "$skill_id" $((attempt + 1)) return $? fi mark_failed "$skill_id" "claude CLI error" return 1 fi # Write output echo "$output" > "$output_file" # Quality gate: check file size local file_size file_size=$(wc -c < "$output_file" | tr -d ' ') if [[ $file_size -lt $MIN_SIZE ]]; then warn " File too small: ${file_size}B (min: ${MIN_SIZE}B)" if [[ $attempt -le $MAX_RETRIES ]]; then warn " Retrying with stronger prompt..." sleep "$DELAY" generate_skill "$category_key" "$skill_id" $((attempt + 1)) return $? fi error " Giving up on $skill_id (still too small after retries)" mark_failed "$skill_id" "file too small: ${file_size}B" return 1 fi # Quality gate: check that file starts with # (markdown header) if ! head -1 "$output_file" | grep -q '^# '; then warn " File doesn't start with markdown header" # Try to fix by removing leading content before first header local temp_file="$output_file.tmp" sed -n '/^# /,$p' "$output_file" > "$temp_file" if [[ -s "$temp_file" ]]; then mv "$temp_file" "$output_file" file_size=$(wc -c < "$output_file" | tr -d ' ') else rm -f "$temp_file" fi fi mark_completed "$skill_id" "$file_size" success " Generated: $skill_id (${file_size}B)" # Rate limiting sleep "$DELAY" } # Generate all skills in a category generate_category() { local category_key="$1" local category_name category_name=$(jq -r --arg key "$category_key" '.categories[$key].name' "$MANIFEST_FILE") local skill_count skill_count=$(jq --arg key "$category_key" '.categories[$key].skills | length' "$MANIFEST_FILE") log "" log "═══════════════════════════════════════" log "Category: $category_name ($skill_count skills)" log "═══════════════════════════════════════" local skill_ids=() while IFS= read -r line; do skill_ids+=("$line") done < <(jq -r --arg key "$category_key" \ '.categories[$key].skills[].id' "$MANIFEST_FILE") local generated=0 for skill_id in "${skill_ids[@]}"; do if generate_skill "$category_key" "$skill_id"; then generated=$((generated + 1)) fi # Pilot mode: stop after N skills total if [[ $PILOT -gt 0 ]]; then local total_completed total_completed=$(jq '.stats.total_generated' "$STATE_FILE") if [[ $total_completed -ge $PILOT ]]; then warn "Pilot limit reached ($PILOT skills)" return 0 fi fi done success "Category complete: $generated/$skill_count generated" } # Print summary print_summary() { local total_generated total_failed total_bytes total_generated=$(jq '.stats.total_generated' "$STATE_FILE") total_failed=$(jq '.stats.total_failed' "$STATE_FILE") total_bytes=$(jq '.stats.total_bytes' "$STATE_FILE") local total_kb=$((total_bytes / 1024)) echo "" log "═══════════════════════════════════════" log " GENERATION SUMMARY " log "═══════════════════════════════════════" success "Generated: $total_generated files ($total_kb KB)" [[ $total_failed -gt 0 ]] && error "Failed: $total_failed files" log "State: $STATE_FILE" log "Output: $REFS_DIR/" log "═══════════════════════════════════════" # List failed skills if any if [[ $total_failed -gt 0 ]]; then echo "" warn "Failed skills:" jq -r '.failed[] | " - \(.id): \(.reason)"' "$STATE_FILE" fi } # Parse arguments parse_args() { local category="" local skill="" local wave="" while [[ $# -gt 0 ]]; do case "$1" in --category|-c) category="$2" shift 2 ;; --skill|-s) skill="$2" shift 2 ;; --wave|-w) wave="$2" shift 2 ;; --model|-m) MODEL="$2" shift 2 ;; --dry-run) DRY_RUN=true shift ;; --pilot) PILOT="$2" shift 2 ;; --delay) DELAY="$2" shift 2 ;; --min-size) MIN_SIZE="$2" shift 2 ;; --max-retries) MAX_RETRIES="$2" shift 2 ;; --reset) rm -f "$STATE_FILE" log "State reset" shift ;; --help|-h) cat <