From fb733ae1496bbd47524d0b2e5496f6d7dae21d35 Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sat, 9 May 2026 12:52:46 +0200 Subject: [PATCH] =?UTF-8?q?feat(voyage):=20add=20anchor-parser.mjs=20with?= =?UTF-8?q?=20placement=20validation=20=E2=80=94=20v4.2=20Step=203?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/parsers/anchor-parser.mjs (~190 LoC): - parseAnchors(md) -> Anchor[] (id, target, line, snippet?, intent?) - addAnchors(md, anchors) -> md_with_anchors - stripAnchors(md_with_anchors) -> md (byte-identical) - validateAnchorPlacement(md, anchors) -> errors for list-item / fenced-block / indent Format: Block-level only, on its own line (col 0), blank-line separation. Test fixture annotation-example.md with single ANN-0001 anchor — referenced by SC12 quickstart. 14 tests pass (parseAnchors, addAnchors, stripAnchors, validateAnchorPlacement). --- plugins/voyage/lib/parsers/anchor-parser.mjs | 241 ++++++++++++++++++ .../fixtures/annotation/annotation-example.md | 27 ++ .../tests/parsers/anchor-parser.test.mjs | 130 ++++++++++ 3 files changed, 398 insertions(+) create mode 100644 plugins/voyage/lib/parsers/anchor-parser.mjs create mode 100644 plugins/voyage/tests/fixtures/annotation/annotation-example.md create mode 100644 plugins/voyage/tests/parsers/anchor-parser.test.mjs diff --git a/plugins/voyage/lib/parsers/anchor-parser.mjs b/plugins/voyage/lib/parsers/anchor-parser.mjs new file mode 100644 index 0000000..634f7ce --- /dev/null +++ b/plugins/voyage/lib/parsers/anchor-parser.mjs @@ -0,0 +1,241 @@ +// lib/parsers/anchor-parser.mjs +// Pure I/O-free parser for v4.2 voyage:anchor markdown comments. +// +// Anchor format (block-level only, on its own line, blank line above and below): +// +// +// Placement rules (validated by validateAnchorPlacement): +// - Not in list-items (Prettier #18066 progressive-whitespace bug) +// - Not inside fenced code blocks (`​``yaml`/`​``json`/etc.) +// - Not at line-start positions matching: --- frontmatter delimiter, +// manifest:, plan_version:, ### Step N:, ## , +// 40-char hex SHA1 (review finding-IDs) +// - ID must match /^ANN-\d{4}$/ +// - No duplicate IDs in same document +// +// Returns Result shape from lib/util/result.mjs. + +import { issue, ok, fail } from '../util/result.mjs'; + +const ANCHOR_LINE_RE = /^(\s*)\s*$/; +const ATTR_RE = /(\w+)="([^"]*)"/g; +const FENCED_OPEN_RE = /^```([a-zA-Z0-9_-]*)\s*$/; +const FENCED_CLOSE_RE = /^```\s*$/; +const LIST_ITEM_RE = /^\s*(?:[-*+]|\d+[.)])\s+/; +const ID_RE = /^ANN-\d{4}$/; +const FORBIDDEN_LINE_START = [ + /^---\s*$/, + /^manifest:\s*$/, + /^plan_version:/, + /^brief_version:/, + /^review_version:/, + /^### Step \d+:/, + /^## (?:Intent|Goal|Success Criteria|Executive Summary|Coverage|Remediation Summary)\b/, + /^[0-9a-f]{40}$/, +]; + +/** + * Parse anchor attributes string (the contents between voyage:anchor and -->). + * @returns {object} attribute map + */ +function parseAttrs(s) { + const attrs = {}; + let m; + ATTR_RE.lastIndex = 0; + while ((m = ATTR_RE.exec(s)) !== null) { + attrs[m[1]] = m[2]; + } + return attrs; +} + +/** + * Parse all anchor comments in a markdown document. + * @param {string} md + * @returns {Result} { valid, errors, warnings, parsed: Anchor[] } + */ +export function parseAnchors(md) { + if (typeof md !== 'string') { + return fail(issue('ANCHOR_INPUT', 'Input must be a string')); + } + const lines = md.split(/\r?\n/); + const anchors = []; + const errors = []; + const warnings = []; + const seenIds = new Set(); + let inFence = false; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (FENCED_OPEN_RE.test(line)) { + inFence = !inFence; + continue; + } + if (inFence && FENCED_CLOSE_RE.test(line)) { + inFence = false; + continue; + } + if (inFence) continue; + + const m = line.match(ANCHOR_LINE_RE); + if (!m) continue; + const attrs = parseAttrs(m[2]); + + if (!attrs.id) { + errors.push(issue('ANCHOR_MALFORMED', `Anchor at line ${i + 1} missing required id attribute`)); + continue; + } + if (!ID_RE.test(attrs.id)) { + errors.push(issue('ANCHOR_BAD_ID', `Anchor id "${attrs.id}" at line ${i + 1} does not match /^ANN-\\d{4}$/`)); + continue; + } + if (seenIds.has(attrs.id)) { + errors.push(issue('ANCHOR_DUPLICATE_ID', `Duplicate anchor id "${attrs.id}" at line ${i + 1}`)); + continue; + } + seenIds.add(attrs.id); + + if (!attrs.target) { + errors.push(issue('ANCHOR_MALFORMED', `Anchor "${attrs.id}" at line ${i + 1} missing required target attribute`)); + continue; + } + + if (attrs.snippet && attrs.snippet.length > 80) { + warnings.push(issue('ANCHOR_SNIPPET_TRUNCATED', `Anchor "${attrs.id}" snippet > 80 chars (${attrs.snippet.length})`)); + } + + if (attrs.intent && !['fix', 'change', 'question', 'block'].includes(attrs.intent)) { + warnings.push(issue('ANCHOR_BAD_INTENT', `Anchor "${attrs.id}" intent "${attrs.intent}" not in {fix|change|question|block}`)); + } + + anchors.push({ + id: attrs.id, + target: attrs.target, + line: attrs.line ? Number.parseInt(attrs.line, 10) : null, + snippet: attrs.snippet || null, + intent: attrs.intent || null, + raw: line, + position: { line: i + 1, col: 0 }, + }); + } + + if (errors.length > 0) return { valid: false, errors, warnings, parsed: anchors }; + return { valid: true, errors: [], warnings, parsed: anchors }; +} + +/** + * Insert anchor comments into markdown above target lines. + * Each anchor inserted on its own line with blank line separation. + * + * @param {string} md - source markdown + * @param {Array<{id, target, line, snippet?, intent?}>} anchors - anchors to insert (sorted by line ASC) + * @returns {string} markdown with anchors injected + */ +export function addAnchors(md, anchors) { + if (typeof md !== 'string') return md; + if (!Array.isArray(anchors) || anchors.length === 0) return md; + + const lines = md.split(/\r?\n/); + // Sort by line desc so insertions don't shift later line numbers + const sorted = [...anchors].sort((a, b) => (b.line || 0) - (a.line || 0)); + + for (const a of sorted) { + if (!a.line || a.line < 1 || a.line > lines.length + 1) continue; + const attrs = [`id="${a.id}"`, `target="${a.target}"`, `line="${a.line}"`]; + if (a.snippet) attrs.push(`snippet="${a.snippet.slice(0, 80)}"`); + if (a.intent) attrs.push(`intent="${a.intent}"`); + const anchorLine = ``; + // Insert above target line: anchorLine + blank line, then target stays + lines.splice(a.line - 1, 0, anchorLine, ''); + } + return lines.join('\n'); +} + +/** + * Strip all voyage:anchor comments from markdown, restoring the original. + * Matches the format produced by addAnchors() — anchor line + following blank. + * + * @param {string} md + * @returns {string} markdown with anchors removed + */ +export function stripAnchors(md) { + if (typeof md !== 'string') return md; + const lines = md.split(/\r?\n/); + const out = []; + for (let i = 0; i < lines.length; i++) { + if (ANCHOR_LINE_RE.test(lines[i])) { + // Skip anchor line; if next line is blank (separator inserted by addAnchors), skip it too + if (i + 1 < lines.length && lines[i + 1].trim() === '') i++; + continue; + } + out.push(lines[i]); + } + return out.join('\n'); +} + +/** + * Validate anchor placement against voyage's structural constraints. + * Returns errors for placement violations; does not mutate input. + * + * @param {string} md + * @param {Anchor[]} anchors + * @returns {Result} + */ +export function validateAnchorPlacement(md, anchors) { + if (typeof md !== 'string') { + return fail(issue('ANCHOR_INPUT', 'Input must be a string')); + } + const lines = md.split(/\r?\n/); + const errors = []; + + // Build fenced-block ranges + const fenced = []; // [{startLine, endLine}] + let openLine = null; + for (let i = 0; i < lines.length; i++) { + if (FENCED_OPEN_RE.test(lines[i])) { + if (openLine === null) { + openLine = i; + } else { + fenced.push({ startLine: openLine, endLine: i }); + openLine = null; + } + } + } + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (!ANCHOR_LINE_RE.test(line)) continue; + + // Inside fenced block? + for (const f of fenced) { + if (i > f.startLine && i < f.endLine) { + errors.push(issue('ANCHOR_IN_FENCED_BLOCK', `Anchor at line ${i + 1} is inside fenced code block (lines ${f.startLine + 1}-${f.endLine + 1}); move it above or below the fence`)); + break; + } + } + + // List item context: either the anchor line itself starts with a list-marker, + // OR the anchor line is indented (whitespace-prefixed) AND the previous + // non-empty line is a list item. v4.2 disipline: anchors must start at col 0. + if (LIST_ITEM_RE.test(line)) { + errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is inside a list-item (Prettier #18066 issue — move above the list)`)); + } else if (/^\s+= 0 && lines[j].trim() === '') j--; + if (j >= 0 && (LIST_ITEM_RE.test(lines[j]) || /^\s+(?:[-*+]|\d+[.)])\s/.test(lines[j]))) { + errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is indented after a list-item — move to col 0 above the list`)); + } + } + + // Forbidden line-start collision check: the anchor itself starts with ` + +## Section B + +A paragraph in section B that the anchor above refers to. The anchor is +placed on its own line with a blank line above and below — the canonical +v4.2 placement disipline. + +## Section C + +Another paragraph. diff --git a/plugins/voyage/tests/parsers/anchor-parser.test.mjs b/plugins/voyage/tests/parsers/anchor-parser.test.mjs new file mode 100644 index 0000000..800834d --- /dev/null +++ b/plugins/voyage/tests/parsers/anchor-parser.test.mjs @@ -0,0 +1,130 @@ +// tests/parsers/anchor-parser.test.mjs +// Unit tests for lib/parsers/anchor-parser.mjs (v4.2) + +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + parseAnchors, + addAnchors, + stripAnchors, + validateAnchorPlacement, +} from '../../lib/parsers/anchor-parser.mjs'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const EXAMPLE_PATH = resolve(__dirname, '..', 'fixtures', 'annotation', 'annotation-example.md'); + +const PLAIN = `# Title + +A normal paragraph. + +## Section + +More text. +`; + +test('parseAnchors — empty array on plain markdown without anchors', () => { + const r = parseAnchors(PLAIN); + assert.equal(r.valid, true); + assert.deepEqual(r.parsed, []); +}); + +test('parseAnchors — extracts id/target/line/intent from valid anchor', () => { + const md = readFileSync(EXAMPLE_PATH, 'utf-8'); + const r = parseAnchors(md); + assert.equal(r.valid, true, JSON.stringify(r.errors)); + assert.equal(r.parsed.length, 1); + assert.equal(r.parsed[0].id, 'ANN-0001'); + assert.equal(r.parsed[0].target, 'section-b'); + assert.equal(r.parsed[0].line, 20); + assert.equal(r.parsed[0].intent, 'change'); +}); + +test('parseAnchors — rejects ID not matching ANN-NNNN', () => { + const md = `# X\n\n\n`; + const r = parseAnchors(md); + assert.equal(r.valid, false); + assert.ok(r.errors.find(e => e.code === 'ANCHOR_BAD_ID')); +}); + +test('parseAnchors — rejects malformed (missing id)', () => { + const md = `# X\n\n\n`; + const r = parseAnchors(md); + assert.equal(r.valid, false); + assert.ok(r.errors.find(e => e.code === 'ANCHOR_MALFORMED')); +}); + +test('parseAnchors — rejects duplicate IDs', () => { + const md = `# X\n\n\n\nFoo.\n\n\n`; + const r = parseAnchors(md); + assert.equal(r.valid, false); + assert.ok(r.errors.find(e => e.code === 'ANCHOR_DUPLICATE_ID')); +}); + +test('parseAnchors — ignores anchors inside fenced code blocks', () => { + const md = `# X\n\n\`\`\`yaml\n\n\`\`\`\n`; + const r = parseAnchors(md); + assert.equal(r.valid, true); + assert.deepEqual(r.parsed, []); +}); + +test('addAnchors — empty list returns input byte-identical', () => { + const r = addAnchors(PLAIN, []); + assert.equal(r, PLAIN); +}); + +test('addAnchors — inserts anchor on its own line with blank-line separation', () => { + const md = `# Title\n\nLine 3.\n`; + const result = addAnchors(md, [{ id: 'ANN-0001', target: 'title', line: 3, intent: 'change' }]); + assert.match(result, //); + // Anchor inserted above target line + const lines = result.split('\n'); + const anchorIdx = lines.findIndex(l => l.startsWith('\n- next\n`; + const r = validateAnchorPlacement(md, []); + assert.equal(r.valid, false); + assert.ok(r.errors.find(e => e.code === 'ANCHOR_IN_LIST_ITEM')); +}); + +test('validateAnchorPlacement — rejects anchor inside fenced yaml block', () => { + const md = `# X\n\n\`\`\`yaml\nfoo: bar\n\n\`\`\`\n`; + const r = validateAnchorPlacement(md, []); + assert.equal(r.valid, false); + assert.ok(r.errors.find(e => e.code === 'ANCHOR_IN_FENCED_BLOCK')); +}); + +test('validateAnchorPlacement — accepts anchor in body paragraph', () => { + const md = readFileSync(EXAMPLE_PATH, 'utf-8'); + const r = validateAnchorPlacement(md, []); + assert.equal(r.valid, true, JSON.stringify(r.errors)); +}); + +test('parseAnchors — anchor with intent block sets intent field', () => { + const md = `# X\n\n\n`; + const r = parseAnchors(md); + assert.equal(r.valid, true); + assert.equal(r.parsed[0].intent, 'block'); +});