ktg-plugin-marketplace/plugins/ms-ai-architect/scripts/kb-update/lib/registry-io.mjs
Kjell Tore Guttormsen f968f37be3 feat(ms-ai-architect): sitemap-based KB change detection system
Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps
weekly to detect when source documentation changes. Replaces the broken
mtime-based staleness check (all files had identical mtime after release).

Components:
- build-registry.mjs: extracts 1342 URLs from 387 reference files
- poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry
- report-changes.mjs: prioritized change report (critical/high/medium/low)
- discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered
- run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run

Integration:
- session-start hook reads change-report.json instead of broken mtime check
- hook triggers background poll if >7 days since last check
- generate-skills --update reads change report for targeted MCP updates

Current stats: 69% match rate (924/1342 URLs tracked via sitemaps).
~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 21:19:51 +02:00

75 lines
2.2 KiB
JavaScript

// registry-io.mjs — Atomic read/write for url-registry.json and report files.
// Zero dependencies. Uses rename() for atomic writes.
import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DEFAULT_DATA_DIR = join(__dirname, '..', 'data');
/**
* Load the URL registry from disk.
* @param {string} [dataDir] — defaults to ../data/ relative to lib/
* @returns {object} parsed registry or empty scaffold
*/
export function loadRegistry(dataDir = DEFAULT_DATA_DIR) {
const path = join(dataDir, 'url-registry.json');
if (!existsSync(path)) {
return {
version: 1,
created_at: null,
last_poll: null,
sitemap_state: {},
urls: {},
};
}
return JSON.parse(readFileSync(path, 'utf8'));
}
/**
* Save the URL registry atomically (write to .tmp, then rename).
* @param {object} registry
* @param {string} [dataDir]
*/
export function saveRegistry(registry, dataDir = DEFAULT_DATA_DIR) {
ensureDir(dataDir);
const path = join(dataDir, 'url-registry.json');
const tmp = path + '.tmp';
writeFileSync(tmp, JSON.stringify(registry, null, 2) + '\n', 'utf8');
renameSync(tmp, path);
}
/**
* Load a JSON report file (change-report.json or discovery-report.json).
* @param {string} name — filename without path (e.g. 'change-report.json')
* @param {string} [dataDir]
* @returns {object|null} parsed JSON or null if not found
*/
export function loadReport(name, dataDir = DEFAULT_DATA_DIR) {
const path = join(dataDir, name);
if (!existsSync(path)) return null;
try {
return JSON.parse(readFileSync(path, 'utf8'));
} catch {
return null;
}
}
/**
* Save a JSON report file atomically.
* @param {string} name
* @param {object} data
* @param {string} [dataDir]
*/
export function saveReport(name, data, dataDir = DEFAULT_DATA_DIR) {
ensureDir(dataDir);
const path = join(dataDir, name);
const tmp = path + '.tmp';
writeFileSync(tmp, JSON.stringify(data, null, 2) + '\n', 'utf8');
renameSync(tmp, path);
}
function ensureDir(dir) {
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
}