ktg-plugin-marketplace/plugins/ms-ai-architect/scripts/skill-gen/generate-skills.sh
Kjell Tore Guttormsen 6a7632146e feat(ms-ai-architect): add plugin to open marketplace (v1.5.0 baseline)
Initial addition of ms-ai-architect plugin to the open-source marketplace.
Private content excluded: orchestrator/ (Linear tooling), docs/utredning/
(client investigation), generated test reports and PDF export script.
skill-gen tooling moved from orchestrator/ to scripts/skill-gen/.

Security scan: WARNING (risk 20/100) — no secrets, no injection found.
False positive fixed: added gitleaks:allow to Python variable reference
in output-validation-grounding-verification.md line 109.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-07 17:17:17 +02:00

610 lines
19 KiB
Bash
Executable file

#!/bin/bash
# generate-skills.sh — Generate knowledge reference files from manifest
#
# Reads manifest.json and generates each skill file using claude --print
# with the prompt template. Supports resuming from where it left off.
#
# Usage:
# ./generate-skills.sh # Generate all pending skills
# ./generate-skills.sh --category rag-architecture # Generate single category
# ./generate-skills.sh --skill azure-ai-vision-overview # Generate single skill
# ./generate-skills.sh --wave 1 # Generate wave 1 (HIGH) only
# ./generate-skills.sh --dry-run # Show what would be generated
# ./generate-skills.sh --pilot 5 # Generate first N skills only
#
# Prerequisites:
# - claude CLI installed and authenticated
# - jq installed
# - manifest.json (run expand-categories.sh first)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
REFS_DIR="$PLUGIN_DIR/skills/ms-ai-engineering/references"
MANIFEST_FILE="$SCRIPT_DIR/manifest.json"
STATE_FILE="$SCRIPT_DIR/state.json"
PROMPT_TEMPLATE="$SCRIPT_DIR/prompt-template.md"
CATEGORIES_FILE="$SCRIPT_DIR/categories.json"
LOG_DIR="$SCRIPT_DIR/logs"
# Model for generation (sonnet for quality, haiku for speed)
MODEL="${MODEL:-sonnet}"
# Limits
PARALLEL="${PARALLEL:-1}" # Sequential by default for reliability
DELAY="${DELAY:-3}" # Seconds between API calls
MIN_SIZE="${MIN_SIZE:-5000}" # Minimum file size in bytes (quality gate)
MAX_RETRIES="${MAX_RETRIES:-2}" # Retries for failed/small files
# Flags
DRY_RUN=false
PILOT=0
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log() { echo -e "${BLUE}[gen]${NC} $1" >&2; }
success() { echo -e "${GREEN}[gen]${NC} $1" >&2; }
warn() { echo -e "${YELLOW}[gen]${NC} $1" >&2; }
error() { echo -e "${RED}[gen]${NC} $1" >&2; }
detail() { echo -e "${CYAN}[gen]${NC} $1" >&2; }
# Check prerequisites
check_prereqs() {
if ! command -v claude &>/dev/null; then
error "claude CLI not found"
exit 1
fi
if ! command -v jq &>/dev/null; then
error "jq not found"
exit 1
fi
if [[ ! -f "$MANIFEST_FILE" ]]; then
error "manifest.json not found. Run expand-categories.sh first."
exit 1
fi
}
# Initialize or load state
init_state() {
if [[ ! -f "$STATE_FILE" ]]; then
jq -n '{
"started": "'$(date -Iseconds)'",
"completed": [],
"failed": [],
"skipped": [],
"stats": {
"total_generated": 0,
"total_failed": 0,
"total_skipped": 0,
"total_bytes": 0
}
}' > "$STATE_FILE"
fi
}
# Check if skill is already completed
is_completed() {
local skill_id="$1"
jq -e --arg id "$skill_id" '.completed | index($id) != null' "$STATE_FILE" &>/dev/null
}
# Mark skill as completed
mark_completed() {
local skill_id="$1"
local file_size="$2"
jq --arg id "$skill_id" --arg size "$file_size" '
.completed += [$id] |
.stats.total_generated += 1 |
.stats.total_bytes += ($size | tonumber)
' "$STATE_FILE" > "$STATE_FILE.tmp" && mv "$STATE_FILE.tmp" "$STATE_FILE"
}
# Mark skill as failed
mark_failed() {
local skill_id="$1"
local reason="$2"
jq --arg id "$skill_id" --arg reason "$reason" '
.failed += [{"id": $id, "reason": $reason, "time": (now | todate)}] |
.stats.total_failed += 1
' "$STATE_FILE" > "$STATE_FILE.tmp" && mv "$STATE_FILE.tmp" "$STATE_FILE"
}
# Get existing context for a category (overlap files content summary)
get_existing_context() {
local category_key="$1"
local overlaps
overlaps=$(jq -r ".categories[\"$category_key\"].existing_overlap // [] | .[]" "$CATEGORIES_FILE" 2>/dev/null)
if [[ -z "$overlaps" ]]; then
echo "Ingen direkte overlapp med eksisterende filer."
return
fi
local context=""
for overlap in $overlaps; do
local filepath="$REFS_DIR/$overlap"
if [[ -f "$filepath" ]]; then
# Extract just the header and section titles
local summary
summary=$(head -50 "$filepath" | grep -E '^#{1,3} ' | head -10)
context+="**$overlap:** $summary"$'\n'
fi
done
echo "${context:-Ingen direkte overlapp med eksisterende filer.}"
}
# Get related skills in same category
get_related_skills() {
local category_key="$1"
local current_skill="$2"
jq -r --arg key "$category_key" --arg current "$current_skill" '
.categories[$key].skills[]
| select(.id != $current)
| "- \(.title): \(.description)"
' "$MANIFEST_FILE" | head -20
}
# Build the prompt for a specific skill
build_prompt() {
local category_key="$1"
local skill_id="$2"
local title description subtopics
title=$(jq -r --arg key "$category_key" --arg id "$skill_id" \
'.categories[$key].skills[] | select(.id == $id) | .title' "$MANIFEST_FILE")
description=$(jq -r --arg key "$category_key" --arg id "$skill_id" \
'.categories[$key].skills[] | select(.id == $id) | .description' "$MANIFEST_FILE")
subtopics=$(jq -r --arg key "$category_key" --arg id "$skill_id" \
'.categories[$key].skills[] | select(.id == $id) | .subtopics | join(", ")' "$MANIFEST_FILE")
local category_name category_description
category_name=$(jq -r --arg key "$category_key" '.categories[$key].name' "$MANIFEST_FILE")
category_description=$(jq -r --arg key "$category_key" '.categories[$key].name' "$CATEGORIES_FILE")
local existing_context
existing_context=$(get_existing_context "$category_key")
local related_skills
related_skills=$(get_related_skills "$category_key" "$skill_id")
# Build the full prompt from template
cat <<PROMPT
Du er Cosmo Skyberg, en senior Microsoft AI Solution Architect som skriver kunnskapsreferanser for et Claude Code-plugin. Referansene brukes av en AI-arkitekt persona som hjelper norske organisasjoner (spesielt offentlig sektor) med Microsoft AI-løsninger.
## Oppgave
Skriv en komplett kunnskapsreferanse om: **${title}**
Skill-beskrivelse: ${description}
Viktige undertemaer å dekke: ${subtopics}
Denne filen tilhører kategorien **${category_name}**.
## Format-krav (STRENGT)
### Header
\`\`\`markdown
# ${title}
**Last updated:** 2026-02
**Status:** [GA | Preview | Announced]
**Category:** ${category_name}
---
\`\`\`
### Innhold (7-15 KB, alle seksjoner påkrevd)
1. **Introduksjon** (2-3 avsnitt)
- Hva er dette? Hvorfor er det viktig for enterprise AI?
- Plassering i Microsoft-økosystemet
- Norsk prosa, engelske tekniske termer
2. **Kjernekomponenter / Nøkkelegenskaper**
- Bruk tabeller for sammenligninger
- Bullet points for egenskaper
- Kodeeksempler der relevant (korte, illustrative)
3. **Arkitekturmønstre**
- 2-3 typiske bruksmønstre
- Når bruke hvert mønster
- Fordeler og ulemper
4. **Beslutningsveiledning**
- "Velg X når..." beslutningstabell
- Vanlige feil og misforståelser
- Røde flagg arkitekten bør se etter
5. **Integrasjon med Microsoft-stakken**
- Hvordan dette kobles til andre Azure/M365-tjenester
- Typiske integrasjonsmønstre
6. **Offentlig sektor (Norge)**
- GDPR, Schrems II, AI Act, Forvaltningsloven
- Datasuverenitet og residency
7. **Kostnad og lisensiering**
- Prismodell (oversikt)
- Kostnadsoptimaliseringstips
8. **For arkitekten (Cosmo)**
- 5-8 nøkkelspørsmål å stille kunden
- Vanlige fallgruver
- Anbefalinger per modenhetsnivå
9. **Kilder og verifisering**
- Microsoft Learn-referanser
- Konfidensnivå (Verified / Baseline / Assumed)
## Regler
1. Norsk prosa, engelske tekniske termer
2. Tabeller over tekst for sammenligninger
3. Konkret over vagt — spesifikke tall og tjenester
4. Balansert — vis fordeler OG ulemper
5. Oppdatert — 2025-2026 informasjon
6. Størrelse: 7-15 KB (200-400 linjer)
7. Ikke dupliser: ${existing_context}
## Relaterte skills (for kryssreferanser)
${related_skills}
Skriv KUN markdown-innholdet. Ingen innledende forklaring eller avsluttende kommentar.
PROMPT
}
# Generate a single skill file
generate_skill() {
local category_key="$1"
local skill_id="$2"
local attempt="${3:-1}"
local category_dir
category_dir=$(jq -r --arg key "$category_key" '.categories[$key].dir' "$MANIFEST_FILE")
local output_dir="$REFS_DIR/$category_dir"
local output_file="$output_dir/$skill_id.md"
local title
title=$(jq -r --arg key "$category_key" --arg id "$skill_id" \
'.categories[$key].skills[] | select(.id == $id) | .title' "$MANIFEST_FILE")
# Skip if already completed
if is_completed "$skill_id"; then
detail " Skipping (already completed): $skill_id"
return 0
fi
# Skip if file already exists and is large enough
if [[ -f "$output_file" ]]; then
local existing_size
existing_size=$(wc -c < "$output_file" | tr -d ' ')
if [[ $existing_size -ge $MIN_SIZE ]]; then
detail " Skipping (file exists, ${existing_size}B): $skill_id"
mark_completed "$skill_id" "$existing_size"
return 0
fi
warn " File exists but too small (${existing_size}B < ${MIN_SIZE}B), regenerating: $skill_id"
fi
if $DRY_RUN; then
log " [DRY RUN] Would generate: $output_file"
log " Title: $title"
return 0
fi
log " Generating ($attempt/$((MAX_RETRIES+1))): $title"
# Create output directory
mkdir -p "$output_dir"
# Build prompt
local prompt
prompt=$(build_prompt "$category_key" "$skill_id")
# Generate with claude --print
local output
if ! output=$(claude --print --model "$MODEL" "$prompt" 2>"$LOG_DIR/gen-${skill_id}.err"); then
error " Claude CLI failed for $skill_id"
if [[ $attempt -le $MAX_RETRIES ]]; then
warn " Retrying in ${DELAY}s..."
sleep "$DELAY"
generate_skill "$category_key" "$skill_id" $((attempt + 1))
return $?
fi
mark_failed "$skill_id" "claude CLI error"
return 1
fi
# Write output
echo "$output" > "$output_file"
# Quality gate: check file size
local file_size
file_size=$(wc -c < "$output_file" | tr -d ' ')
if [[ $file_size -lt $MIN_SIZE ]]; then
warn " File too small: ${file_size}B (min: ${MIN_SIZE}B)"
if [[ $attempt -le $MAX_RETRIES ]]; then
warn " Retrying with stronger prompt..."
sleep "$DELAY"
generate_skill "$category_key" "$skill_id" $((attempt + 1))
return $?
fi
error " Giving up on $skill_id (still too small after retries)"
mark_failed "$skill_id" "file too small: ${file_size}B"
return 1
fi
# Quality gate: check that file starts with # (markdown header)
if ! head -1 "$output_file" | grep -q '^# '; then
warn " File doesn't start with markdown header"
# Try to fix by removing leading content before first header
local temp_file="$output_file.tmp"
sed -n '/^# /,$p' "$output_file" > "$temp_file"
if [[ -s "$temp_file" ]]; then
mv "$temp_file" "$output_file"
file_size=$(wc -c < "$output_file" | tr -d ' ')
else
rm -f "$temp_file"
fi
fi
mark_completed "$skill_id" "$file_size"
success " Generated: $skill_id (${file_size}B)"
# Rate limiting
sleep "$DELAY"
}
# Generate all skills in a category
generate_category() {
local category_key="$1"
local category_name
category_name=$(jq -r --arg key "$category_key" '.categories[$key].name' "$MANIFEST_FILE")
local skill_count
skill_count=$(jq --arg key "$category_key" '.categories[$key].skills | length' "$MANIFEST_FILE")
log ""
log "═══════════════════════════════════════"
log "Category: $category_name ($skill_count skills)"
log "═══════════════════════════════════════"
local skill_ids=()
while IFS= read -r line; do
skill_ids+=("$line")
done < <(jq -r --arg key "$category_key" \
'.categories[$key].skills[].id' "$MANIFEST_FILE")
local generated=0
for skill_id in "${skill_ids[@]}"; do
if generate_skill "$category_key" "$skill_id"; then
generated=$((generated + 1))
fi
# Pilot mode: stop after N skills total
if [[ $PILOT -gt 0 ]]; then
local total_completed
total_completed=$(jq '.stats.total_generated' "$STATE_FILE")
if [[ $total_completed -ge $PILOT ]]; then
warn "Pilot limit reached ($PILOT skills)"
return 0
fi
fi
done
success "Category complete: $generated/$skill_count generated"
}
# Print summary
print_summary() {
local total_generated total_failed total_bytes
total_generated=$(jq '.stats.total_generated' "$STATE_FILE")
total_failed=$(jq '.stats.total_failed' "$STATE_FILE")
total_bytes=$(jq '.stats.total_bytes' "$STATE_FILE")
local total_kb=$((total_bytes / 1024))
echo ""
log "═══════════════════════════════════════"
log " GENERATION SUMMARY "
log "═══════════════════════════════════════"
success "Generated: $total_generated files ($total_kb KB)"
[[ $total_failed -gt 0 ]] && error "Failed: $total_failed files"
log "State: $STATE_FILE"
log "Output: $REFS_DIR/"
log "═══════════════════════════════════════"
# List failed skills if any
if [[ $total_failed -gt 0 ]]; then
echo ""
warn "Failed skills:"
jq -r '.failed[] | " - \(.id): \(.reason)"' "$STATE_FILE"
fi
}
# Parse arguments
parse_args() {
local category=""
local skill=""
local wave=""
while [[ $# -gt 0 ]]; do
case "$1" in
--category|-c)
category="$2"
shift 2
;;
--skill|-s)
skill="$2"
shift 2
;;
--wave|-w)
wave="$2"
shift 2
;;
--model|-m)
MODEL="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--pilot)
PILOT="$2"
shift 2
;;
--delay)
DELAY="$2"
shift 2
;;
--min-size)
MIN_SIZE="$2"
shift 2
;;
--max-retries)
MAX_RETRIES="$2"
shift 2
;;
--reset)
rm -f "$STATE_FILE"
log "State reset"
shift
;;
--help|-h)
cat <<EOF
Usage: $0 [OPTIONS]
Options:
--category, -c KEY Generate single category
--skill, -s ID Generate single skill
--wave, -w N Generate wave N (1=HIGH, 2=MEDIUM)
--model, -m MODEL Claude model (default: sonnet)
--dry-run Show what would be generated
--pilot N Generate only first N skills (for testing)
--delay N Seconds between API calls (default: 3)
--min-size N Minimum file size in bytes (default: 5000)
--max-retries N Max retries per skill (default: 2)
--reset Clear state and start fresh
Environment:
MODEL=sonnet Override default model
MAX_BUDGET_USD=5 Max dollar amount per run
PARALLEL=1 Parallel generation (experimental)
DELAY=3 Delay between calls
Examples:
$0 --pilot 3 # Test with 3 skills
$0 --category rag-architecture # Generate one category
$0 --wave 1 --model sonnet # Generate all HIGH priority
$0 --dry-run # Preview without generating
MODEL=haiku $0 --wave 2 # MEDIUM priority with haiku
EOF
exit 0
;;
*)
error "Unknown option: $1"
exit 1
;;
esac
done
# Return mode and target
if [[ -n "$skill" ]]; then
echo "skill:$skill"
elif [[ -n "$category" ]]; then
echo "category:$category"
elif [[ -n "$wave" ]]; then
echo "wave:$wave"
else
echo "all"
fi
}
# Find which category a skill belongs to
find_skill_category() {
local skill_id="$1"
jq -r --arg id "$skill_id" '
.categories | to_entries[] |
select(.value.skills | map(.id) | index($id) != null) |
.key
' "$MANIFEST_FILE"
}
# Main
main() {
check_prereqs
init_state
mkdir -p "$LOG_DIR"
local mode
mode=$(parse_args "$@")
log "Skill Generation Pipeline"
log "Model: $MODEL | Min size: ${MIN_SIZE}B | Delay: ${DELAY}s"
$DRY_RUN && warn "DRY RUN MODE — no files will be generated"
[[ $PILOT -gt 0 ]] && warn "PILOT MODE — only $PILOT skills"
echo ""
case "$mode" in
skill:*)
local skill_id="${mode#skill:}"
local category_key
category_key=$(find_skill_category "$skill_id")
if [[ -z "$category_key" ]]; then
error "Skill not found in manifest: $skill_id"
exit 1
fi
generate_skill "$category_key" "$skill_id"
;;
category:*)
local category_key="${mode#category:}"
generate_category "$category_key"
;;
wave:*)
local wave_num="${mode#wave:}"
local categories=()
while IFS= read -r line; do
categories+=("$line")
done < <(jq -r --argjson w "$wave_num" \
'.waves[] | select(.wave == $w) | .categories[]' "$CATEGORIES_FILE")
for cat in "${categories[@]}"; do
generate_category "$cat"
if [[ $PILOT -gt 0 ]]; then
local total
total=$(jq '.stats.total_generated' "$STATE_FILE")
[[ $total -ge $PILOT ]] && break
fi
done
;;
all)
local all_categories=()
while IFS= read -r line; do
all_categories+=("$line")
done < <(jq -r '.categories | keys[]' "$MANIFEST_FILE")
for cat in "${all_categories[@]}"; do
generate_category "$cat"
if [[ $PILOT -gt 0 ]]; then
local total
total=$(jq '.stats.total_generated' "$STATE_FILE")
[[ $total -ge $PILOT ]] && break
fi
done
;;
esac
print_summary
}
main "$@"