ktg-plugin-marketplace/plugins/ms-ai-architect/scripts/kb-update/report-changes.mjs
Kjell Tore Guttormsen f968f37be3 feat(ms-ai-architect): sitemap-based KB change detection system
Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps
weekly to detect when source documentation changes. Replaces the broken
mtime-based staleness check (all files had identical mtime after release).

Components:
- build-registry.mjs: extracts 1342 URLs from 387 reference files
- poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry
- report-changes.mjs: prioritized change report (critical/high/medium/low)
- discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered
- run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run

Integration:
- session-start hook reads change-report.json instead of broken mtime check
- hook triggers background poll if >7 days since last check
- generate-skills --update reads change report for targeted MCP updates

Current stats: 69% match rate (924/1342 URLs tracked via sitemaps).
~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 21:19:51 +02:00

144 lines
5.3 KiB
JavaScript

#!/usr/bin/env node
// report-changes.mjs — Compare sitemap lastmod to reference file "Last updated:" headers.
// Generates change-report.json and prints human-readable summary.
// Usage: node report-changes.mjs [--json]
import { readFileSync, existsSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { loadRegistry, saveReport } from './lib/registry-io.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const PLUGIN_ROOT = join(__dirname, '..', '..');
const DATA_DIR = join(__dirname, 'data');
const jsonOnly = process.argv.includes('--json');
// Priority classification by file path patterns
function getFilePriority(filePath) {
const lower = filePath.toLowerCase();
if (/cost|pricing|pris/.test(lower)) return 'critical';
if (/responsible-ai|governance|ai-security-(?:engineering|scoring)/.test(lower)) return 'high';
if (/platforms|copilot|azure-ai-services|agent-orchestration|rag|mlops|prompt-engineering|monitoring|performance/.test(lower)) return 'medium';
return 'low';
}
// Parse "Last updated:" header from a reference file
const LAST_UPDATED_PATTERNS = [
/\*\*Last updated:\*\*\s*([\d-]+)/i,
/\*\*Sist (?:oppdatert|verifisert):\*\*\s*([\d-]+)/i,
/\*\*Dato:\*\*\s*([\d-]+)/i,
];
function parseLastUpdated(filePath) {
const fullPath = join(PLUGIN_ROOT, filePath);
if (!existsSync(fullPath)) return null;
// Only read first 500 bytes — header is always at the top
const content = readFileSync(fullPath, 'utf8').slice(0, 500);
for (const pattern of LAST_UPDATED_PATTERNS) {
const match = content.match(pattern);
if (match) {
const raw = match[1].trim();
// YYYY-MM → YYYY-MM-01, YYYY-MM-DD → as-is
return raw.length === 7 ? raw + '-01' : raw;
}
}
return null; // No date found — treat as always stale
}
// Priority sort order
const PRIORITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
// --- Main ---
const registry = loadRegistry(DATA_DIR);
if (!registry.last_poll) {
console.error('Registry has not been polled yet. Run poll-sitemaps.mjs first.');
process.exit(1);
}
// Group changed URLs by reference file
const fileChanges = new Map(); // filePath → { changedUrls, newestChange }
for (const [url, entry] of Object.entries(registry.urls)) {
if (!entry.sitemap_lastmod || entry.status !== 'tracked') continue;
for (const refFile of entry.reference_files) {
const fileDate = parseLastUpdated(refFile);
// If no date found, treat as always stale (date "0000-01-01")
const effectiveDate = fileDate || '0000-01-01';
if (entry.sitemap_lastmod > effectiveDate) {
if (!fileChanges.has(refFile)) {
fileChanges.set(refFile, { changedUrls: [], newestChange: entry.sitemap_lastmod, fileDate });
}
const fc = fileChanges.get(refFile);
fc.changedUrls.push({ url, sitemap_lastmod: entry.sitemap_lastmod });
if (entry.sitemap_lastmod > fc.newestChange) {
fc.newestChange = entry.sitemap_lastmod;
}
}
}
}
// Build report entries
const files = [];
for (const [path, changes] of fileChanges) {
const priority = getFilePriority(path);
const pathParts = path.split('/');
files.push({
path,
priority,
file_last_updated: changes.fileDate || 'unknown',
newest_source_change: changes.newestChange,
changed_url_count: changes.changedUrls.length,
changed_urls: changes.changedUrls.map(u => u.url),
skill: pathParts[1] || 'unknown',
category: pathParts[3] || 'unknown',
});
}
// Sort: priority first, then newest source change descending
files.sort((a, b) => {
const pDiff = PRIORITY_ORDER[a.priority] - PRIORITY_ORDER[b.priority];
if (pDiff !== 0) return pDiff;
return b.newest_source_change.localeCompare(a.newest_source_change);
});
// Count by priority
const byPriority = { critical: 0, high: 0, medium: 0, low: 0 };
for (const f of files) byPriority[f.priority]++;
const report = {
generated_at: new Date().toISOString().split('T')[0],
last_poll: registry.last_poll,
total_tracked: Object.values(registry.urls).filter(u => u.status === 'tracked').length,
total_not_in_sitemap: Object.values(registry.urls).filter(u => u.status === 'not_in_sitemap').length,
total_files_needing_update: files.length,
by_priority: byPriority,
files,
};
saveReport('change-report.json', report, DATA_DIR);
if (jsonOnly) {
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
} else {
console.log(`\n=== KB Change Report (${report.generated_at}) ===`);
console.log(`Sources last polled: ${registry.last_poll}`);
console.log(`URLs tracked: ${report.total_tracked}/${Object.keys(registry.urls).length} (${report.total_not_in_sitemap} not in sitemap)`);
console.log(`Files needing update: ${files.length} (Critical: ${byPriority.critical}, High: ${byPriority.high}, Medium: ${byPriority.medium}, Low: ${byPriority.low})`);
if (files.length > 0) {
console.log('\nTop 20 by priority:');
for (const f of files.slice(0, 20)) {
console.log(` [${f.priority.toUpperCase()}] ${f.path}`);
console.log(` ${f.changed_url_count} source(s) changed. Latest: ${f.newest_source_change}. File: ${f.file_last_updated}`);
}
if (files.length > 20) {
console.log(` ... and ${files.length - 20} more`);
}
}
console.log('\nRun: /architect:generate-skills --update to process updates');
}