#!/usr/bin/env node // report-changes.mjs — Compare sitemap lastmod to reference file "Last updated:" headers. // Generates change-report.json and prints human-readable summary. // Usage: node report-changes.mjs [--json] import { readFileSync, existsSync } from 'node:fs'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { loadRegistry, saveReport } from './lib/registry-io.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const PLUGIN_ROOT = join(__dirname, '..', '..'); const DATA_DIR = join(__dirname, 'data'); const jsonOnly = process.argv.includes('--json'); // Priority classification by file path patterns function getFilePriority(filePath) { const lower = filePath.toLowerCase(); if (/cost|pricing|pris/.test(lower)) return 'critical'; if (/responsible-ai|governance|ai-security-(?:engineering|scoring)/.test(lower)) return 'high'; if (/platforms|copilot|azure-ai-services|agent-orchestration|rag|mlops|prompt-engineering|monitoring|performance/.test(lower)) return 'medium'; return 'low'; } // Parse "Last updated:" header from a reference file const LAST_UPDATED_PATTERNS = [ /\*\*Last updated:\*\*\s*([\d-]+)/i, /\*\*Sist (?:oppdatert|verifisert):\*\*\s*([\d-]+)/i, /\*\*Dato:\*\*\s*([\d-]+)/i, ]; function parseLastUpdated(filePath) { const fullPath = join(PLUGIN_ROOT, filePath); if (!existsSync(fullPath)) return null; // Only read first 500 bytes — header is always at the top const content = readFileSync(fullPath, 'utf8').slice(0, 500); for (const pattern of LAST_UPDATED_PATTERNS) { const match = content.match(pattern); if (match) { const raw = match[1].trim(); // YYYY-MM → YYYY-MM-01, YYYY-MM-DD → as-is return raw.length === 7 ? raw + '-01' : raw; } } return null; // No date found — treat as always stale } // Priority sort order const PRIORITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 }; // --- Main --- const registry = loadRegistry(DATA_DIR); if (!registry.last_poll) { console.error('Registry has not been polled yet. Run poll-sitemaps.mjs first.'); process.exit(1); } // Group changed URLs by reference file const fileChanges = new Map(); // filePath → { changedUrls, newestChange } for (const [url, entry] of Object.entries(registry.urls)) { if (!entry.sitemap_lastmod || entry.status !== 'tracked') continue; for (const refFile of entry.reference_files) { const fileDate = parseLastUpdated(refFile); // If no date found, treat as always stale (date "0000-01-01") const effectiveDate = fileDate || '0000-01-01'; if (entry.sitemap_lastmod > effectiveDate) { if (!fileChanges.has(refFile)) { fileChanges.set(refFile, { changedUrls: [], newestChange: entry.sitemap_lastmod, fileDate }); } const fc = fileChanges.get(refFile); fc.changedUrls.push({ url, sitemap_lastmod: entry.sitemap_lastmod }); if (entry.sitemap_lastmod > fc.newestChange) { fc.newestChange = entry.sitemap_lastmod; } } } } // Build report entries const files = []; for (const [path, changes] of fileChanges) { const priority = getFilePriority(path); const pathParts = path.split('/'); files.push({ path, priority, file_last_updated: changes.fileDate || 'unknown', newest_source_change: changes.newestChange, changed_url_count: changes.changedUrls.length, changed_urls: changes.changedUrls.map(u => u.url), skill: pathParts[1] || 'unknown', category: pathParts[3] || 'unknown', }); } // Sort: priority first, then newest source change descending files.sort((a, b) => { const pDiff = PRIORITY_ORDER[a.priority] - PRIORITY_ORDER[b.priority]; if (pDiff !== 0) return pDiff; return b.newest_source_change.localeCompare(a.newest_source_change); }); // Count by priority const byPriority = { critical: 0, high: 0, medium: 0, low: 0 }; for (const f of files) byPriority[f.priority]++; const report = { generated_at: new Date().toISOString().split('T')[0], last_poll: registry.last_poll, total_tracked: Object.values(registry.urls).filter(u => u.status === 'tracked').length, total_not_in_sitemap: Object.values(registry.urls).filter(u => u.status === 'not_in_sitemap').length, total_files_needing_update: files.length, by_priority: byPriority, files, }; saveReport('change-report.json', report, DATA_DIR); if (jsonOnly) { process.stdout.write(JSON.stringify(report, null, 2) + '\n'); } else { console.log(`\n=== KB Change Report (${report.generated_at}) ===`); console.log(`Sources last polled: ${registry.last_poll}`); console.log(`URLs tracked: ${report.total_tracked}/${Object.keys(registry.urls).length} (${report.total_not_in_sitemap} not in sitemap)`); console.log(`Files needing update: ${files.length} (Critical: ${byPriority.critical}, High: ${byPriority.high}, Medium: ${byPriority.medium}, Low: ${byPriority.low})`); if (files.length > 0) { console.log('\nTop 20 by priority:'); for (const f of files.slice(0, 20)) { console.log(` [${f.priority.toUpperCase()}] ${f.path}`); console.log(` ${f.changed_url_count} source(s) changed. Latest: ${f.newest_source_change}. File: ${f.file_last_updated}`); } if (files.length > 20) { console.log(` ... and ${files.length - 20} more`); } } console.log('\nRun: /architect:generate-skills --update to process updates'); }