feat(ms-ai-architect): sitemap-based KB change detection system
Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps weekly to detect when source documentation changes. Replaces the broken mtime-based staleness check (all files had identical mtime after release). Components: - build-registry.mjs: extracts 1342 URLs from 387 reference files - poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry - report-changes.mjs: prioritized change report (critical/high/medium/low) - discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered - run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run Integration: - session-start hook reads change-report.json instead of broken mtime check - hook triggers background poll if >7 days since last check - generate-skills --update reads change report for targeted MCP updates Current stats: 69% match rate (924/1342 URLs tracked via sitemaps). ~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
035255fc5d
commit
f968f37be3
13 changed files with 976 additions and 59 deletions
|
|
@ -0,0 +1,75 @@
|
|||
// registry-io.mjs — Atomic read/write for url-registry.json and report files.
|
||||
// Zero dependencies. Uses rename() for atomic writes.
|
||||
|
||||
import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DEFAULT_DATA_DIR = join(__dirname, '..', 'data');
|
||||
|
||||
/**
|
||||
* Load the URL registry from disk.
|
||||
* @param {string} [dataDir] — defaults to ../data/ relative to lib/
|
||||
* @returns {object} parsed registry or empty scaffold
|
||||
*/
|
||||
export function loadRegistry(dataDir = DEFAULT_DATA_DIR) {
|
||||
const path = join(dataDir, 'url-registry.json');
|
||||
if (!existsSync(path)) {
|
||||
return {
|
||||
version: 1,
|
||||
created_at: null,
|
||||
last_poll: null,
|
||||
sitemap_state: {},
|
||||
urls: {},
|
||||
};
|
||||
}
|
||||
return JSON.parse(readFileSync(path, 'utf8'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the URL registry atomically (write to .tmp, then rename).
|
||||
* @param {object} registry
|
||||
* @param {string} [dataDir]
|
||||
*/
|
||||
export function saveRegistry(registry, dataDir = DEFAULT_DATA_DIR) {
|
||||
ensureDir(dataDir);
|
||||
const path = join(dataDir, 'url-registry.json');
|
||||
const tmp = path + '.tmp';
|
||||
writeFileSync(tmp, JSON.stringify(registry, null, 2) + '\n', 'utf8');
|
||||
renameSync(tmp, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a JSON report file (change-report.json or discovery-report.json).
|
||||
* @param {string} name — filename without path (e.g. 'change-report.json')
|
||||
* @param {string} [dataDir]
|
||||
* @returns {object|null} parsed JSON or null if not found
|
||||
*/
|
||||
export function loadReport(name, dataDir = DEFAULT_DATA_DIR) {
|
||||
const path = join(dataDir, name);
|
||||
if (!existsSync(path)) return null;
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, 'utf8'));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a JSON report file atomically.
|
||||
* @param {string} name
|
||||
* @param {object} data
|
||||
* @param {string} [dataDir]
|
||||
*/
|
||||
export function saveReport(name, data, dataDir = DEFAULT_DATA_DIR) {
|
||||
ensureDir(dataDir);
|
||||
const path = join(dataDir, name);
|
||||
const tmp = path + '.tmp';
|
||||
writeFileSync(tmp, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
||||
renameSync(tmp, path);
|
||||
}
|
||||
|
||||
function ensureDir(dir) {
|
||||
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue