ktg-plugin-marketplace/plugins/voyage/lib/parsers/anchor-parser.mjs
Kjell Tore Guttormsen fb733ae149 feat(voyage): add anchor-parser.mjs with placement validation — v4.2 Step 3
lib/parsers/anchor-parser.mjs (~190 LoC):
- parseAnchors(md) -> Anchor[] (id, target, line, snippet?, intent?)
- addAnchors(md, anchors) -> md_with_anchors
- stripAnchors(md_with_anchors) -> md (byte-identical)
- validateAnchorPlacement(md, anchors) -> errors for list-item / fenced-block / indent

Format: <!-- voyage:anchor id="ANN-NNNN" target="<slug>" line="<N>" -->
Block-level only, on its own line (col 0), blank-line separation.

Test fixture annotation-example.md with single ANN-0001 anchor — referenced by SC12 quickstart.
14 tests pass (parseAnchors, addAnchors, stripAnchors, validateAnchorPlacement).
2026-05-09 12:52:46 +02:00

241 lines
8.5 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// lib/parsers/anchor-parser.mjs
// Pure I/O-free parser for v4.2 voyage:anchor markdown comments.
//
// Anchor format (block-level only, on its own line, blank line above and below):
// <!-- voyage:anchor id="ANN-NNNN" target="<heading-slug>" line="<N>" [snippet="<≤80c>"] [intent="fix|change|question|block"] -->
//
// Placement rules (validated by validateAnchorPlacement):
// - Not in list-items (Prettier #18066 progressive-whitespace bug)
// - Not inside fenced code blocks (```yaml`/```json`/etc.)
// - Not at line-start positions matching: --- frontmatter delimiter,
// manifest:, plan_version:, ### Step N:, ## <required-section>,
// 40-char hex SHA1 (review finding-IDs)
// - ID must match /^ANN-\d{4}$/
// - No duplicate IDs in same document
//
// Returns Result shape from lib/util/result.mjs.
import { issue, ok, fail } from '../util/result.mjs';
const ANCHOR_LINE_RE = /^(\s*)<!--\s*voyage:anchor\s+([^>]+?)\s*-->\s*$/;
const ATTR_RE = /(\w+)="([^"]*)"/g;
const FENCED_OPEN_RE = /^```([a-zA-Z0-9_-]*)\s*$/;
const FENCED_CLOSE_RE = /^```\s*$/;
const LIST_ITEM_RE = /^\s*(?:[-*+]|\d+[.)])\s+/;
const ID_RE = /^ANN-\d{4}$/;
const FORBIDDEN_LINE_START = [
/^---\s*$/,
/^manifest:\s*$/,
/^plan_version:/,
/^brief_version:/,
/^review_version:/,
/^### Step \d+:/,
/^## (?:Intent|Goal|Success Criteria|Executive Summary|Coverage|Remediation Summary)\b/,
/^[0-9a-f]{40}$/,
];
/**
* Parse anchor attributes string (the contents between voyage:anchor and -->).
* @returns {object} attribute map
*/
function parseAttrs(s) {
const attrs = {};
let m;
ATTR_RE.lastIndex = 0;
while ((m = ATTR_RE.exec(s)) !== null) {
attrs[m[1]] = m[2];
}
return attrs;
}
/**
* Parse all anchor comments in a markdown document.
* @param {string} md
* @returns {Result} { valid, errors, warnings, parsed: Anchor[] }
*/
export function parseAnchors(md) {
if (typeof md !== 'string') {
return fail(issue('ANCHOR_INPUT', 'Input must be a string'));
}
const lines = md.split(/\r?\n/);
const anchors = [];
const errors = [];
const warnings = [];
const seenIds = new Set();
let inFence = false;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (FENCED_OPEN_RE.test(line)) {
inFence = !inFence;
continue;
}
if (inFence && FENCED_CLOSE_RE.test(line)) {
inFence = false;
continue;
}
if (inFence) continue;
const m = line.match(ANCHOR_LINE_RE);
if (!m) continue;
const attrs = parseAttrs(m[2]);
if (!attrs.id) {
errors.push(issue('ANCHOR_MALFORMED', `Anchor at line ${i + 1} missing required id attribute`));
continue;
}
if (!ID_RE.test(attrs.id)) {
errors.push(issue('ANCHOR_BAD_ID', `Anchor id "${attrs.id}" at line ${i + 1} does not match /^ANN-\\d{4}$/`));
continue;
}
if (seenIds.has(attrs.id)) {
errors.push(issue('ANCHOR_DUPLICATE_ID', `Duplicate anchor id "${attrs.id}" at line ${i + 1}`));
continue;
}
seenIds.add(attrs.id);
if (!attrs.target) {
errors.push(issue('ANCHOR_MALFORMED', `Anchor "${attrs.id}" at line ${i + 1} missing required target attribute`));
continue;
}
if (attrs.snippet && attrs.snippet.length > 80) {
warnings.push(issue('ANCHOR_SNIPPET_TRUNCATED', `Anchor "${attrs.id}" snippet > 80 chars (${attrs.snippet.length})`));
}
if (attrs.intent && !['fix', 'change', 'question', 'block'].includes(attrs.intent)) {
warnings.push(issue('ANCHOR_BAD_INTENT', `Anchor "${attrs.id}" intent "${attrs.intent}" not in {fix|change|question|block}`));
}
anchors.push({
id: attrs.id,
target: attrs.target,
line: attrs.line ? Number.parseInt(attrs.line, 10) : null,
snippet: attrs.snippet || null,
intent: attrs.intent || null,
raw: line,
position: { line: i + 1, col: 0 },
});
}
if (errors.length > 0) return { valid: false, errors, warnings, parsed: anchors };
return { valid: true, errors: [], warnings, parsed: anchors };
}
/**
* Insert anchor comments into markdown above target lines.
* Each anchor inserted on its own line with blank line separation.
*
* @param {string} md - source markdown
* @param {Array<{id, target, line, snippet?, intent?}>} anchors - anchors to insert (sorted by line ASC)
* @returns {string} markdown with anchors injected
*/
export function addAnchors(md, anchors) {
if (typeof md !== 'string') return md;
if (!Array.isArray(anchors) || anchors.length === 0) return md;
const lines = md.split(/\r?\n/);
// Sort by line desc so insertions don't shift later line numbers
const sorted = [...anchors].sort((a, b) => (b.line || 0) - (a.line || 0));
for (const a of sorted) {
if (!a.line || a.line < 1 || a.line > lines.length + 1) continue;
const attrs = [`id="${a.id}"`, `target="${a.target}"`, `line="${a.line}"`];
if (a.snippet) attrs.push(`snippet="${a.snippet.slice(0, 80)}"`);
if (a.intent) attrs.push(`intent="${a.intent}"`);
const anchorLine = `<!-- voyage:anchor ${attrs.join(' ')} -->`;
// Insert above target line: anchorLine + blank line, then target stays
lines.splice(a.line - 1, 0, anchorLine, '');
}
return lines.join('\n');
}
/**
* Strip all voyage:anchor comments from markdown, restoring the original.
* Matches the format produced by addAnchors() — anchor line + following blank.
*
* @param {string} md
* @returns {string} markdown with anchors removed
*/
export function stripAnchors(md) {
if (typeof md !== 'string') return md;
const lines = md.split(/\r?\n/);
const out = [];
for (let i = 0; i < lines.length; i++) {
if (ANCHOR_LINE_RE.test(lines[i])) {
// Skip anchor line; if next line is blank (separator inserted by addAnchors), skip it too
if (i + 1 < lines.length && lines[i + 1].trim() === '') i++;
continue;
}
out.push(lines[i]);
}
return out.join('\n');
}
/**
* Validate anchor placement against voyage's structural constraints.
* Returns errors for placement violations; does not mutate input.
*
* @param {string} md
* @param {Anchor[]} anchors
* @returns {Result}
*/
export function validateAnchorPlacement(md, anchors) {
if (typeof md !== 'string') {
return fail(issue('ANCHOR_INPUT', 'Input must be a string'));
}
const lines = md.split(/\r?\n/);
const errors = [];
// Build fenced-block ranges
const fenced = []; // [{startLine, endLine}]
let openLine = null;
for (let i = 0; i < lines.length; i++) {
if (FENCED_OPEN_RE.test(lines[i])) {
if (openLine === null) {
openLine = i;
} else {
fenced.push({ startLine: openLine, endLine: i });
openLine = null;
}
}
}
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!ANCHOR_LINE_RE.test(line)) continue;
// Inside fenced block?
for (const f of fenced) {
if (i > f.startLine && i < f.endLine) {
errors.push(issue('ANCHOR_IN_FENCED_BLOCK', `Anchor at line ${i + 1} is inside fenced code block (lines ${f.startLine + 1}-${f.endLine + 1}); move it above or below the fence`));
break;
}
}
// List item context: either the anchor line itself starts with a list-marker,
// OR the anchor line is indented (whitespace-prefixed) AND the previous
// non-empty line is a list item. v4.2 disipline: anchors must start at col 0.
if (LIST_ITEM_RE.test(line)) {
errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is inside a list-item (Prettier #18066 issue — move above the list)`));
} else if (/^\s+</.test(line)) {
// Anchor line is indented — likely nested inside a list or block-quote
// Walk backwards to find the previous non-empty line
let j = i - 1;
while (j >= 0 && lines[j].trim() === '') j--;
if (j >= 0 && (LIST_ITEM_RE.test(lines[j]) || /^\s+(?:[-*+]|\d+[.)])\s/.test(lines[j]))) {
errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is indented after a list-item — move to col 0 above the list`));
}
}
// Forbidden line-start collision check: the anchor itself starts with `<!--`
// so it cannot collide with these patterns directly. But if the operator
// accidentally pasted an anchor on top of a structural line, that's caught
// by the line-being-anchor check above, so explicit collision-detection
// here is for defense-in-depth on adjacent text.
// (No additional check needed — anchors have a fixed prefix.)
}
if (errors.length > 0) return { valid: false, errors, warnings: [], parsed: null };
return ok(null);
}