#!/bin/bash # kb-staleness-check.sh — Scan knowledge base files for staleness # Usage: bash scripts/kb-staleness-check.sh [--days N] [--priority-only] [--verbose] [--json] [--output FILE] # # Default threshold: 90 days # Priority order: prices > compliance > features > architecture set -euo pipefail PLUGIN_ROOT="$(cd "$(dirname "$0")/.." && pwd)" # Scan all skill reference directories KB_ROOTS=( "$PLUGIN_ROOT/skills/ms-ai-advisor/references" "$PLUGIN_ROOT/skills/ms-ai-governance/references" "$PLUGIN_ROOT/skills/ms-ai-security/references" "$PLUGIN_ROOT/skills/ms-ai-engineering/references" "$PLUGIN_ROOT/skills/ms-ai-infrastructure/references" ) # Defaults THRESHOLD_DAYS=90 PRIORITY_ONLY=false VERBOSE=false JSON_OUTPUT=false OUTPUT_FILE="" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --days) THRESHOLD_DAYS="$2" shift 2 ;; --priority-only) PRIORITY_ONLY=true shift ;; --verbose) VERBOSE=true shift ;; --json) JSON_OUTPUT=true shift ;; --output) OUTPUT_FILE="$2" shift 2 ;; *) echo "Unknown option: $1" echo "Usage: bash scripts/kb-staleness-check.sh [--days N] [--priority-only] [--verbose] [--json] [--output FILE]" exit 1 ;; esac done for kb_dir in "${KB_ROOTS[@]}"; do if [ ! -d "$kb_dir" ]; then echo "WARNING: Knowledge base directory not found: $kb_dir" >&2 fi done NOW=$(date +%s) TOTAL=0 FRESH=0 STALE=0 STALE_CRITICAL=0 STALE_HIGH=0 STALE_MEDIUM=0 STALE_LOW=0 # Collect stale files for sorted summary declare -a STALE_ENTRIES=() get_priority() { local filepath="$1" local lower_path lower_path=$(echo "$filepath" | tr '[:upper:]' '[:lower:]') # Critical (30 days): cost, pricing, pris if echo "$lower_path" | grep -qE '(cost|pricing|pris)'; then echo "Critical:30" return fi # High (60 days): compliance, security, governance if echo "$lower_path" | grep -qE '(responsible-ai|norwegian-public-sector-governance|ai-security-engineering)'; then echo "High:60" return fi # Medium (90 days): platforms, features, extensibility if echo "$lower_path" | grep -qE '(platforms|copilot-extensibility|azure-ai-services|multi-modal|performance-scalability|monitoring-observability|agent-orchestration|data-engineering|api-management|hybrid-edge|bcdr|rag-architecture|mlops-genaiops|prompt-engineering)'; then echo "Medium:90" return fi # Low (180 days): architecture, development, patterns echo "Low:180" } for KB_ROOT in "${KB_ROOTS[@]}"; do [ -d "$KB_ROOT" ] || continue while IFS= read -r -d '' file; do TOTAL=$((TOTAL + 1)) # macOS-compatible stat for modification time MOD_EPOCH=$(stat -f '%m' "$file" 2>/dev/null || stat -c '%Y' "$file" 2>/dev/null) DAYS_OLD=$(( (NOW - MOD_EPOCH) / 86400 )) REL_PATH="${file#"$KB_ROOT/"}" PRIORITY_INFO=$(get_priority "$REL_PATH") PRIORITY="${PRIORITY_INFO%%:*}" PRIORITY_THRESHOLD="${PRIORITY_INFO##*:}" if [ "$DAYS_OLD" -gt "$PRIORITY_THRESHOLD" ]; then STALE=$((STALE + 1)) case "$PRIORITY" in Critical) STALE_CRITICAL=$((STALE_CRITICAL + 1)) ;; High) STALE_HIGH=$((STALE_HIGH + 1)) ;; Medium) STALE_MEDIUM=$((STALE_MEDIUM + 1)) ;; Low) STALE_LOW=$((STALE_LOW + 1)) ;; esac FULL_REL="${file#"$PLUGIN_ROOT/"}" if [ "$JSON_OUTPUT" = true ]; then echo "[STALE] $REL_PATH — ${DAYS_OLD} days old (threshold: ${PRIORITY_THRESHOLD}) — Priority: $PRIORITY" >&2 else echo "[STALE] $REL_PATH — ${DAYS_OLD} days old (threshold: ${PRIORITY_THRESHOLD}) — Priority: $PRIORITY" fi STALE_ENTRIES+=("${DAYS_OLD}:${PRIORITY}:${FULL_REL}") else FRESH=$((FRESH + 1)) if [ "$VERBOSE" = true ] && [ "$PRIORITY_ONLY" = false ]; then if [ "$JSON_OUTPUT" = true ]; then echo "[FRESH] $REL_PATH — ${DAYS_OLD} days old (threshold: ${PRIORITY_THRESHOLD}) — Priority: $PRIORITY" >&2 else echo "[FRESH] $REL_PATH — ${DAYS_OLD} days old (threshold: ${PRIORITY_THRESHOLD}) — Priority: $PRIORITY" fi fi fi done < <(find "$KB_ROOT" -name '*.md' -type f -print0) done # JSON output mode if [ "$JSON_OUTPUT" = true ]; then JSON="{" JSON+="\"generated_at\":\"$(date -Iseconds)\"," JSON+="\"total\":$TOTAL," JSON+="\"fresh\":$FRESH," JSON+="\"stale\":$STALE," JSON+="\"stale_by_priority\":{\"critical\":$STALE_CRITICAL,\"high\":$STALE_HIGH,\"medium\":$STALE_MEDIUM,\"low\":$STALE_LOW}," JSON+="\"files\":[" FIRST=true for entry in "${STALE_ENTRIES[@]}"; do days="${entry%%:*}" rest="${entry#*:}" priority="${rest%%:*}" filepath="${rest#*:}" # Determine skill from path skill="unknown" case "$filepath" in *ms-ai-advisor*) skill="ms-ai-advisor" ;; *ms-ai-engineering*) skill="ms-ai-engineering" ;; *ms-ai-governance*) skill="ms-ai-governance" ;; *ms-ai-security*) skill="ms-ai-security" ;; *ms-ai-infrastructure*) skill="ms-ai-infrastructure" ;; esac # Determine category from path category=$(echo "$filepath" | sed -E 's|.*/references/([^/]+)/.*|\1|') if [ "$FIRST" = true ]; then FIRST=false else JSON+="," fi JSON+="{\"path\":\"$filepath\",\"skill\":\"$skill\",\"category\":\"$category\",\"age_days\":$days,\"priority\":\"$priority\"}" done JSON+="]}" if [ -n "$OUTPUT_FILE" ]; then echo "$JSON" > "$OUTPUT_FILE" echo "JSON written to: $OUTPUT_FILE" >&2 else echo "$JSON" fi exit 0 fi echo "" echo "=== KB Freshness Report ===" echo "Total files: $TOTAL" echo "Fresh: $FRESH" echo "Stale: $STALE (Critical: $STALE_CRITICAL, High: $STALE_HIGH, Medium: $STALE_MEDIUM, Low: $STALE_LOW)" if [ "$STALE" -gt 0 ]; then echo "" echo "Recommended update order:" # Sort stale entries: Critical first, then High, Medium, Low; within priority by age descending PRIORITY_ORDER="Critical High Medium Low" INDEX=1 for prio in $PRIORITY_ORDER; do # Collect entries for this priority, sort by age descending PRIO_ENTRIES=() for entry in "${STALE_ENTRIES[@]}"; do entry_prio="${entry#*:}" entry_prio="${entry_prio%%:*}" if [ "$entry_prio" = "$prio" ]; then PRIO_ENTRIES+=("$entry") fi done # Sort by days (first field) descending if [ ${#PRIO_ENTRIES[@]} -gt 0 ]; then SORTED=$(printf '%s\n' "${PRIO_ENTRIES[@]}" | sort -t: -k1 -nr) while IFS= read -r sorted_entry; do days="${sorted_entry%%:*}" rest="${sorted_entry#*:}" rest="${rest#*:}" echo " ${INDEX}. [$prio] $rest (${days} days)" INDEX=$((INDEX + 1)) done <<< "$SORTED" fi done fi echo "" echo "Run with --verbose to see fresh files. Use --days N to override threshold. Use --json for machine-readable output."