Adds a zero-dependency Node.js pipeline that polls Microsoft Learn sitemaps weekly to detect when source documentation changes. Replaces the broken mtime-based staleness check (all files had identical mtime after release). Components: - build-registry.mjs: extracts 1342 URLs from 387 reference files - poll-sitemaps.mjs: streams ~18 child sitemaps, matches against registry - report-changes.mjs: prioritized change report (critical/high/medium/low) - discover-new-urls.mjs: finds relevant new MS Learn pages not yet covered - run-weekly-update.mjs: orchestrator with --force/--discover/--dry-run Integration: - session-start hook reads change-report.json instead of broken mtime check - hook triggers background poll if >7 days since last check - generate-skills --update reads change report for targeted MCP updates Current stats: 69% match rate (924/1342 URLs tracked via sitemaps). ~31% unmatched due to Microsoft URL restructuring (ai-foundry/openai paths). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
75 lines
2.2 KiB
JavaScript
75 lines
2.2 KiB
JavaScript
// registry-io.mjs — Atomic read/write for url-registry.json and report files.
|
|
// Zero dependencies. Uses rename() for atomic writes.
|
|
|
|
import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const DEFAULT_DATA_DIR = join(__dirname, '..', 'data');
|
|
|
|
/**
|
|
* Load the URL registry from disk.
|
|
* @param {string} [dataDir] — defaults to ../data/ relative to lib/
|
|
* @returns {object} parsed registry or empty scaffold
|
|
*/
|
|
export function loadRegistry(dataDir = DEFAULT_DATA_DIR) {
|
|
const path = join(dataDir, 'url-registry.json');
|
|
if (!existsSync(path)) {
|
|
return {
|
|
version: 1,
|
|
created_at: null,
|
|
last_poll: null,
|
|
sitemap_state: {},
|
|
urls: {},
|
|
};
|
|
}
|
|
return JSON.parse(readFileSync(path, 'utf8'));
|
|
}
|
|
|
|
/**
|
|
* Save the URL registry atomically (write to .tmp, then rename).
|
|
* @param {object} registry
|
|
* @param {string} [dataDir]
|
|
*/
|
|
export function saveRegistry(registry, dataDir = DEFAULT_DATA_DIR) {
|
|
ensureDir(dataDir);
|
|
const path = join(dataDir, 'url-registry.json');
|
|
const tmp = path + '.tmp';
|
|
writeFileSync(tmp, JSON.stringify(registry, null, 2) + '\n', 'utf8');
|
|
renameSync(tmp, path);
|
|
}
|
|
|
|
/**
|
|
* Load a JSON report file (change-report.json or discovery-report.json).
|
|
* @param {string} name — filename without path (e.g. 'change-report.json')
|
|
* @param {string} [dataDir]
|
|
* @returns {object|null} parsed JSON or null if not found
|
|
*/
|
|
export function loadReport(name, dataDir = DEFAULT_DATA_DIR) {
|
|
const path = join(dataDir, name);
|
|
if (!existsSync(path)) return null;
|
|
try {
|
|
return JSON.parse(readFileSync(path, 'utf8'));
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save a JSON report file atomically.
|
|
* @param {string} name
|
|
* @param {object} data
|
|
* @param {string} [dataDir]
|
|
*/
|
|
export function saveReport(name, data, dataDir = DEFAULT_DATA_DIR) {
|
|
ensureDir(dataDir);
|
|
const path = join(dataDir, name);
|
|
const tmp = path + '.tmp';
|
|
writeFileSync(tmp, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
renameSync(tmp, path);
|
|
}
|
|
|
|
function ensureDir(dir) {
|
|
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
}
|