#!/usr/bin/env node // build-registry.mjs — Build URL registry from existing reference files. // Extracts all learn.microsoft.com URLs and maps them to their source reference files. // Usage: node build-registry.mjs [--merge] // --merge: preserve existing sitemap_lastmod data, only add new URLs import { readdirSync, readFileSync, existsSync } from 'node:fs'; import { join, relative, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { normalizeUrl, extractUrls } from './lib/url-normalize.mjs'; import { loadRegistry, saveRegistry } from './lib/registry-io.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const PLUGIN_ROOT = join(__dirname, '..', '..'); const SKILLS_DIR = join(PLUGIN_ROOT, 'skills'); const merge = process.argv.includes('--merge'); // Walk directory recursively for .md files function walkMd(dir) { const results = []; if (!existsSync(dir)) return results; for (const entry of readdirSync(dir, { withFileTypes: true })) { const full = join(dir, entry.name); if (entry.isDirectory()) { results.push(...walkMd(full)); } else if (entry.name.endsWith('.md') && entry.name !== 'SKILL.md') { results.push(full); } } return results; } // --- Main --- const existing = merge ? loadRegistry() : null; const urlToFiles = new Map(); // normalizedUrl → Set let totalFiles = 0; const skillDirs = readdirSync(SKILLS_DIR, { withFileTypes: true }) .filter(d => d.isDirectory()) .map(d => d.name); for (const skill of skillDirs) { const refsDir = join(SKILLS_DIR, skill, 'references'); const files = walkMd(refsDir); for (const file of files) { totalFiles++; const content = readFileSync(file, 'utf8'); const urls = extractUrls(content); const relPath = relative(PLUGIN_ROOT, file); for (const url of urls) { if (!urlToFiles.has(url)) urlToFiles.set(url, new Set()); urlToFiles.get(url).add(relPath); } } } // Build registry const today = new Date().toISOString().split('T')[0]; const registry = { version: 1, created_at: today, last_poll: merge ? existing?.last_poll || null : null, sitemap_state: merge ? existing?.sitemap_state || {} : {}, urls: {}, }; for (const [url, files] of urlToFiles) { const prev = merge ? existing?.urls?.[url] : null; registry.urls[url] = { sitemap_lastmod: prev?.sitemap_lastmod || null, reference_files: [...files].sort(), status: prev?.status || 'unpolled', }; } saveRegistry(registry); // Stats const multiRef = [...urlToFiles.values()].filter(s => s.size > 1).length; console.log(`Registry built: ${urlToFiles.size} unique URLs from ${totalFiles} files`); console.log(` URLs referenced by multiple files: ${multiRef}`); if (merge && existing?.urls) { const newUrls = [...urlToFiles.keys()].filter(u => !existing.urls[u]).length; console.log(` New URLs added (merge): ${newUrls}`); }