feat(ms-ai-architect): sitemap-based KB change detection system

Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps
weekly to detect when source documentation changes. Replaces the broken
mtime-based staleness check (all files had identical mtime after release).

Components:
- build-registry.mjs: extracts 1342 URLs from 387 reference files
- poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry
- report-changes.mjs: prioritized change report (critical/high/medium/low)
- discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered
- run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run

Integration:
- session-start hook reads change-report.json instead of broken mtime check
- hook triggers background poll if >7 days since last check
- generate-skills --update reads change report for targeted MCP updates

Current stats: 69% match rate (924/1342 URLs tracked via sitemaps).
~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Kjell Tore Guttormsen 2026-04-09 21:19:51 +02:00
commit f968f37be3
13 changed files with 976 additions and 59 deletions

View file

@ -3,8 +3,9 @@
// Shows active utredning sessions and KB staleness on session start.
// Output: plain text to stdout (advisory, never blocking).
import { readdirSync, statSync, existsSync } from 'node:fs';
import { readdirSync, readFileSync, existsSync } from 'node:fs';
import { join, relative } from 'node:path';
import { spawn } from 'node:child_process';
const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT || join(process.cwd());
const cwd = process.cwd();
@ -40,23 +41,36 @@ if (existsSync(docsDir)) {
}
}
// --- 2. Check KB staleness (stat mtime, no content reading) ---
const staleLevels = { critical: 0, high: 0, medium: 0 };
// --- 2. Check KB staleness (from sitemap-based change report) ---
const now = Date.now();
const DAY_MS = 24 * 60 * 60 * 1000;
const staleLevels = { critical: 0, high: 0, medium: 0 };
let lastPollDaysAgo = Infinity;
const skillsDir = join(pluginRoot, 'skills');
if (existsSync(skillsDir)) {
const changeReportPath = join(pluginRoot, 'scripts', 'kb-update', 'data', 'change-report.json');
if (existsSync(changeReportPath)) {
try {
const skillDirs = readdirSync(skillsDir, { withFileTypes: true });
for (const skill of skillDirs) {
if (!skill.isDirectory()) continue;
const refsDir = join(skillsDir, skill.name, 'references');
if (!existsSync(refsDir)) continue;
countStaleFiles(refsDir, staleLevels, now);
const report = JSON.parse(readFileSync(changeReportPath, 'utf8'));
staleLevels.critical = report.by_priority?.critical || 0;
staleLevels.high = report.by_priority?.high || 0;
staleLevels.medium = report.by_priority?.medium || 0;
if (report.last_poll) {
lastPollDaysAgo = (now - new Date(report.last_poll).getTime()) / DAY_MS;
}
} catch {
// Ignore
// Ignore — fall back to showing no data
}
}
// Trigger background poll if >7 days since last check
if (lastPollDaysAgo > 7) {
const updateScript = join(pluginRoot, 'scripts', 'kb-update', 'run-weekly-update.mjs');
if (existsSync(updateScript)) {
try {
spawn('node', [updateScript], { detached: true, stdio: 'ignore' }).unref();
} catch {
// Non-critical — silent fail
}
}
}
@ -117,7 +131,10 @@ if (staleLevels.high > 0) staleEntries.push(`${staleLevels.high} high`);
if (staleLevels.medium > 0) staleEntries.push(`${staleLevels.medium} medium`);
if (staleEntries.length > 0) {
parts.push(`KB stale: ${staleEntries.join(', ')}`);
const pollAge = lastPollDaysAgo < Infinity ? ` (pollet ${Math.floor(lastPollDaysAgo)}d siden)` : '';
parts.push(`KB: ${staleEntries.join(', ')} needs update${pollAge}`);
} else if (lastPollDaysAgo > 7) {
parts.push('KB: poll overdue');
}
if (nearestDeadline) {
@ -154,26 +171,3 @@ function countFiles(dir, filename) {
return count;
}
function countStaleFiles(dir, levels, now) {
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
countStaleFiles(fullPath, levels, now);
} else if (entry.name.endsWith('.md')) {
try {
const mtime = statSync(fullPath).mtimeMs;
const ageDays = (now - mtime) / DAY_MS;
if (ageDays > 180) levels.critical++;
else if (ageDays > 90) levels.high++;
else if (ageDays > 60) levels.medium++;
} catch {
// Skip unreadable files
}
}
}
} catch {
// Ignore
}
}