// lib/parsers/anchor-parser.mjs // Pure I/O-free parser for v4.2 voyage:anchor markdown comments. // // Anchor format (block-level only, on its own line, blank line above and below): // // // Placement rules (validated by validateAnchorPlacement): // - Not in list-items (Prettier #18066 progressive-whitespace bug) // - Not inside fenced code blocks (`​``yaml`/`​``json`/etc.) // - Not at line-start positions matching: --- frontmatter delimiter, // manifest:, plan_version:, ### Step N:, ## , // 40-char hex SHA1 (review finding-IDs) // - ID must match /^ANN-\d{4}$/ // - No duplicate IDs in same document // // Returns Result shape from lib/util/result.mjs. import { issue, ok, fail } from '../util/result.mjs'; const ANCHOR_LINE_RE = /^(\s*)\s*$/; const ATTR_RE = /(\w+)="([^"]*)"/g; const FENCED_OPEN_RE = /^```([a-zA-Z0-9_-]*)\s*$/; const FENCED_CLOSE_RE = /^```\s*$/; const LIST_ITEM_RE = /^\s*(?:[-*+]|\d+[.)])\s+/; const ID_RE = /^ANN-\d{4}$/; const FORBIDDEN_LINE_START = [ /^---\s*$/, /^manifest:\s*$/, /^plan_version:/, /^brief_version:/, /^review_version:/, /^### Step \d+:/, /^## (?:Intent|Goal|Success Criteria|Executive Summary|Coverage|Remediation Summary)\b/, /^[0-9a-f]{40}$/, ]; /** * Parse anchor attributes string (the contents between voyage:anchor and -->). * @returns {object} attribute map */ function parseAttrs(s) { const attrs = {}; let m; ATTR_RE.lastIndex = 0; while ((m = ATTR_RE.exec(s)) !== null) { attrs[m[1]] = m[2]; } return attrs; } /** * Parse all anchor comments in a markdown document. * @param {string} md * @returns {Result} { valid, errors, warnings, parsed: Anchor[] } */ export function parseAnchors(md) { if (typeof md !== 'string') { return fail(issue('ANCHOR_INPUT', 'Input must be a string')); } const lines = md.split(/\r?\n/); const anchors = []; const errors = []; const warnings = []; const seenIds = new Set(); let inFence = false; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (FENCED_OPEN_RE.test(line)) { inFence = !inFence; continue; } if (inFence && FENCED_CLOSE_RE.test(line)) { inFence = false; continue; } if (inFence) continue; const m = line.match(ANCHOR_LINE_RE); if (!m) continue; const attrs = parseAttrs(m[2]); if (!attrs.id) { errors.push(issue('ANCHOR_MALFORMED', `Anchor at line ${i + 1} missing required id attribute`)); continue; } if (!ID_RE.test(attrs.id)) { errors.push(issue('ANCHOR_BAD_ID', `Anchor id "${attrs.id}" at line ${i + 1} does not match /^ANN-\\d{4}$/`)); continue; } if (seenIds.has(attrs.id)) { errors.push(issue('ANCHOR_DUPLICATE_ID', `Duplicate anchor id "${attrs.id}" at line ${i + 1}`)); continue; } seenIds.add(attrs.id); if (!attrs.target) { errors.push(issue('ANCHOR_MALFORMED', `Anchor "${attrs.id}" at line ${i + 1} missing required target attribute`)); continue; } if (attrs.snippet && attrs.snippet.length > 80) { warnings.push(issue('ANCHOR_SNIPPET_TRUNCATED', `Anchor "${attrs.id}" snippet > 80 chars (${attrs.snippet.length})`)); } if (attrs.intent && !['fix', 'change', 'question', 'block'].includes(attrs.intent)) { warnings.push(issue('ANCHOR_BAD_INTENT', `Anchor "${attrs.id}" intent "${attrs.intent}" not in {fix|change|question|block}`)); } anchors.push({ id: attrs.id, target: attrs.target, line: attrs.line ? Number.parseInt(attrs.line, 10) : null, snippet: attrs.snippet || null, intent: attrs.intent || null, raw: line, position: { line: i + 1, col: 0 }, }); } if (errors.length > 0) return { valid: false, errors, warnings, parsed: anchors }; return { valid: true, errors: [], warnings, parsed: anchors }; } /** * Insert anchor comments into markdown above target lines. * Each anchor inserted on its own line with blank line separation. * * @param {string} md - source markdown * @param {Array<{id, target, line, snippet?, intent?}>} anchors - anchors to insert (sorted by line ASC) * @returns {string} markdown with anchors injected */ export function addAnchors(md, anchors) { if (typeof md !== 'string') return md; if (!Array.isArray(anchors) || anchors.length === 0) return md; const lines = md.split(/\r?\n/); // Sort by line desc so insertions don't shift later line numbers const sorted = [...anchors].sort((a, b) => (b.line || 0) - (a.line || 0)); for (const a of sorted) { if (!a.line || a.line < 1 || a.line > lines.length + 1) continue; const attrs = [`id="${a.id}"`, `target="${a.target}"`, `line="${a.line}"`]; if (a.snippet) attrs.push(`snippet="${a.snippet.slice(0, 80)}"`); if (a.intent) attrs.push(`intent="${a.intent}"`); const anchorLine = ``; // Insert above target line: anchorLine + blank line, then target stays lines.splice(a.line - 1, 0, anchorLine, ''); } return lines.join('\n'); } /** * Strip all voyage:anchor comments from markdown, restoring the original. * Matches the format produced by addAnchors() — anchor line + following blank. * * @param {string} md * @returns {string} markdown with anchors removed */ export function stripAnchors(md) { if (typeof md !== 'string') return md; const lines = md.split(/\r?\n/); const out = []; for (let i = 0; i < lines.length; i++) { if (ANCHOR_LINE_RE.test(lines[i])) { // Skip anchor line; if next line is blank (separator inserted by addAnchors), skip it too if (i + 1 < lines.length && lines[i + 1].trim() === '') i++; continue; } out.push(lines[i]); } return out.join('\n'); } /** * Validate anchor placement against voyage's structural constraints. * Returns errors for placement violations; does not mutate input. * * @param {string} md * @param {Anchor[]} anchors * @returns {Result} */ export function validateAnchorPlacement(md, anchors) { if (typeof md !== 'string') { return fail(issue('ANCHOR_INPUT', 'Input must be a string')); } const lines = md.split(/\r?\n/); const errors = []; // Build fenced-block ranges const fenced = []; // [{startLine, endLine}] let openLine = null; for (let i = 0; i < lines.length; i++) { if (FENCED_OPEN_RE.test(lines[i])) { if (openLine === null) { openLine = i; } else { fenced.push({ startLine: openLine, endLine: i }); openLine = null; } } } for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (!ANCHOR_LINE_RE.test(line)) continue; // Inside fenced block? for (const f of fenced) { if (i > f.startLine && i < f.endLine) { errors.push(issue('ANCHOR_IN_FENCED_BLOCK', `Anchor at line ${i + 1} is inside fenced code block (lines ${f.startLine + 1}-${f.endLine + 1}); move it above or below the fence`)); break; } } // List item context: either the anchor line itself starts with a list-marker, // OR the anchor line is indented (whitespace-prefixed) AND the previous // non-empty line is a list item. v4.2 disipline: anchors must start at col 0. if (LIST_ITEM_RE.test(line)) { errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is inside a list-item (Prettier #18066 issue — move above the list)`)); } else if (/^\s+= 0 && lines[j].trim() === '') j--; if (j >= 0 && (LIST_ITEM_RE.test(lines[j]) || /^\s+(?:[-*+]|\d+[.)])\s/.test(lines[j]))) { errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is indented after a list-item — move to col 0 above the list`)); } } // Forbidden line-start collision check: the anchor itself starts with `