feat(ms-ai-architect): sitemap-based KB change detection system
Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps weekly to detect when source documentation changes. Replaces the broken mtime-based staleness check (all files had identical mtime after release). Components: - build-registry.mjs: extracts 1342 URLs from 387 reference files - poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry - report-changes.mjs: prioritized change report (critical/high/medium/low) - discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered - run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run Integration: - session-start hook reads change-report.json instead of broken mtime check - hook triggers background poll if >7 days since last check - generate-skills --update reads change report for targeted MCP updates Current stats: 69% match rate (924/1342 URLs tracked via sitemaps). ~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
035255fc5d
commit
f968f37be3
13 changed files with 976 additions and 59 deletions
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env node
|
||||
// run-weekly-update.mjs — Orchestrator for weekly KB update pipeline.
|
||||
// Runs: poll → report → (optional) discover, sequentially.
|
||||
// Usage: node run-weekly-update.mjs [--force] [--discover] [--dry-run]
|
||||
|
||||
import { dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import { loadRegistry } from './lib/registry-io.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DATA_DIR = join(__dirname, 'data');
|
||||
|
||||
const force = process.argv.includes('--force');
|
||||
const discover = process.argv.includes('--discover');
|
||||
const dryRun = process.argv.includes('--dry-run');
|
||||
|
||||
const DAY_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
function run(script, args = []) {
|
||||
const fullPath = join(__dirname, script);
|
||||
console.log(`\n--- Running ${script} ${args.join(' ')} ---`);
|
||||
try {
|
||||
execFileSync('node', [fullPath, ...args], {
|
||||
stdio: 'inherit',
|
||||
timeout: 10 * 60 * 1000, // 10 min max per step
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(`${script} failed: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Main ---
|
||||
const registry = loadRegistry(DATA_DIR);
|
||||
const lastPoll = registry.last_poll ? new Date(registry.last_poll) : null;
|
||||
const daysSince = lastPoll ? (Date.now() - lastPoll.getTime()) / DAY_MS : Infinity;
|
||||
|
||||
if (!force && daysSince < 7) {
|
||||
console.log(`Last poll: ${Math.floor(daysSince)} day(s) ago. Next in ${Math.ceil(7 - daysSince)} day(s).`);
|
||||
console.log('Use --force to run anyway.');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
console.log('DRY RUN — would execute:');
|
||||
console.log(' 1. poll-sitemaps.mjs' + (force ? ' --force' : ''));
|
||||
console.log(' 2. report-changes.mjs');
|
||||
if (discover) console.log(' 3. discover-new-urls.mjs');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Ensure registry exists
|
||||
if (Object.keys(registry.urls).length === 0) {
|
||||
console.log('Registry empty — building from reference files first...');
|
||||
run('build-registry.mjs');
|
||||
}
|
||||
|
||||
// Step 1: Poll sitemaps
|
||||
run('poll-sitemaps.mjs', force ? ['--force'] : []);
|
||||
|
||||
// Step 2: Generate change report
|
||||
run('report-changes.mjs');
|
||||
|
||||
// Step 3: Optional discovery
|
||||
if (discover) {
|
||||
run('discover-new-urls.mjs', ['--limit', '500']);
|
||||
}
|
||||
|
||||
console.log('\n=== Weekly update complete ===');
|
||||
Loading…
Add table
Add a link
Reference in a new issue