Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps weekly to detect when source documentation changes. Replaces the broken mtime-based staleness check (all files had identical mtime after release). Components: - build-registry.mjs: extracts 1342 URLs from 387 reference files - poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry - report-changes.mjs: prioritized change report (critical/high/medium/low) - discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered - run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run Integration: - session-start hook reads change-report.json instead of broken mtime check - hook triggers background poll if >7 days since last check - generate-skills --update reads change report for targeted MCP updates Current stats: 69% match rate (924/1342 URLs tracked via sitemaps). ~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
144 lines
5.3 KiB
JavaScript
144 lines
5.3 KiB
JavaScript
#!/usr/bin/env node
|
|
// report-changes.mjs — Compare sitemap lastmod to reference file "Last updated:" headers.
|
|
// Generates change-report.json and prints human-readable summary.
|
|
// Usage: node report-changes.mjs [--json]
|
|
|
|
import { readFileSync, existsSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { loadRegistry, saveReport } from './lib/registry-io.mjs';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const PLUGIN_ROOT = join(__dirname, '..', '..');
|
|
const DATA_DIR = join(__dirname, 'data');
|
|
const jsonOnly = process.argv.includes('--json');
|
|
|
|
// Priority classification by file path patterns
|
|
function getFilePriority(filePath) {
|
|
const lower = filePath.toLowerCase();
|
|
if (/cost|pricing|pris/.test(lower)) return 'critical';
|
|
if (/responsible-ai|governance|ai-security-(?:engineering|scoring)/.test(lower)) return 'high';
|
|
if (/platforms|copilot|azure-ai-services|agent-orchestration|rag|mlops|prompt-engineering|monitoring|performance/.test(lower)) return 'medium';
|
|
return 'low';
|
|
}
|
|
|
|
// Parse "Last updated:" header from a reference file
|
|
const LAST_UPDATED_PATTERNS = [
|
|
/\*\*Last updated:\*\*\s*([\d-]+)/i,
|
|
/\*\*Sist (?:oppdatert|verifisert):\*\*\s*([\d-]+)/i,
|
|
/\*\*Dato:\*\*\s*([\d-]+)/i,
|
|
];
|
|
|
|
function parseLastUpdated(filePath) {
|
|
const fullPath = join(PLUGIN_ROOT, filePath);
|
|
if (!existsSync(fullPath)) return null;
|
|
|
|
// Only read first 500 bytes — header is always at the top
|
|
const content = readFileSync(fullPath, 'utf8').slice(0, 500);
|
|
for (const pattern of LAST_UPDATED_PATTERNS) {
|
|
const match = content.match(pattern);
|
|
if (match) {
|
|
const raw = match[1].trim();
|
|
// YYYY-MM → YYYY-MM-01, YYYY-MM-DD → as-is
|
|
return raw.length === 7 ? raw + '-01' : raw;
|
|
}
|
|
}
|
|
return null; // No date found — treat as always stale
|
|
}
|
|
|
|
// Priority sort order
|
|
const PRIORITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
|
|
|
|
// --- Main ---
|
|
const registry = loadRegistry(DATA_DIR);
|
|
|
|
if (!registry.last_poll) {
|
|
console.error('Registry has not been polled yet. Run poll-sitemaps.mjs first.');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Group changed URLs by reference file
|
|
const fileChanges = new Map(); // filePath → { changedUrls, newestChange }
|
|
|
|
for (const [url, entry] of Object.entries(registry.urls)) {
|
|
if (!entry.sitemap_lastmod || entry.status !== 'tracked') continue;
|
|
|
|
for (const refFile of entry.reference_files) {
|
|
const fileDate = parseLastUpdated(refFile);
|
|
// If no date found, treat as always stale (date "0000-01-01")
|
|
const effectiveDate = fileDate || '0000-01-01';
|
|
|
|
if (entry.sitemap_lastmod > effectiveDate) {
|
|
if (!fileChanges.has(refFile)) {
|
|
fileChanges.set(refFile, { changedUrls: [], newestChange: entry.sitemap_lastmod, fileDate });
|
|
}
|
|
const fc = fileChanges.get(refFile);
|
|
fc.changedUrls.push({ url, sitemap_lastmod: entry.sitemap_lastmod });
|
|
if (entry.sitemap_lastmod > fc.newestChange) {
|
|
fc.newestChange = entry.sitemap_lastmod;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build report entries
|
|
const files = [];
|
|
for (const [path, changes] of fileChanges) {
|
|
const priority = getFilePriority(path);
|
|
const pathParts = path.split('/');
|
|
files.push({
|
|
path,
|
|
priority,
|
|
file_last_updated: changes.fileDate || 'unknown',
|
|
newest_source_change: changes.newestChange,
|
|
changed_url_count: changes.changedUrls.length,
|
|
changed_urls: changes.changedUrls.map(u => u.url),
|
|
skill: pathParts[1] || 'unknown',
|
|
category: pathParts[3] || 'unknown',
|
|
});
|
|
}
|
|
|
|
// Sort: priority first, then newest source change descending
|
|
files.sort((a, b) => {
|
|
const pDiff = PRIORITY_ORDER[a.priority] - PRIORITY_ORDER[b.priority];
|
|
if (pDiff !== 0) return pDiff;
|
|
return b.newest_source_change.localeCompare(a.newest_source_change);
|
|
});
|
|
|
|
// Count by priority
|
|
const byPriority = { critical: 0, high: 0, medium: 0, low: 0 };
|
|
for (const f of files) byPriority[f.priority]++;
|
|
|
|
const report = {
|
|
generated_at: new Date().toISOString().split('T')[0],
|
|
last_poll: registry.last_poll,
|
|
total_tracked: Object.values(registry.urls).filter(u => u.status === 'tracked').length,
|
|
total_not_in_sitemap: Object.values(registry.urls).filter(u => u.status === 'not_in_sitemap').length,
|
|
total_files_needing_update: files.length,
|
|
by_priority: byPriority,
|
|
files,
|
|
};
|
|
|
|
saveReport('change-report.json', report, DATA_DIR);
|
|
|
|
if (jsonOnly) {
|
|
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
|
|
} else {
|
|
console.log(`\n=== KB Change Report (${report.generated_at}) ===`);
|
|
console.log(`Sources last polled: ${registry.last_poll}`);
|
|
console.log(`URLs tracked: ${report.total_tracked}/${Object.keys(registry.urls).length} (${report.total_not_in_sitemap} not in sitemap)`);
|
|
console.log(`Files needing update: ${files.length} (Critical: ${byPriority.critical}, High: ${byPriority.high}, Medium: ${byPriority.medium}, Low: ${byPriority.low})`);
|
|
|
|
if (files.length > 0) {
|
|
console.log('\nTop 20 by priority:');
|
|
for (const f of files.slice(0, 20)) {
|
|
console.log(` [${f.priority.toUpperCase()}] ${f.path}`);
|
|
console.log(` ${f.changed_url_count} source(s) changed. Latest: ${f.newest_source_change}. File: ${f.file_last_updated}`);
|
|
}
|
|
if (files.length > 20) {
|
|
console.log(` ... and ${files.length - 20} more`);
|
|
}
|
|
}
|
|
|
|
console.log('\nRun: /architect:generate-skills --update to process updates');
|
|
}
|