feat(voyage): add anchor-parser.mjs with placement validation — v4.2 Step 3
lib/parsers/anchor-parser.mjs (~190 LoC): - parseAnchors(md) -> Anchor[] (id, target, line, snippet?, intent?) - addAnchors(md, anchors) -> md_with_anchors - stripAnchors(md_with_anchors) -> md (byte-identical) - validateAnchorPlacement(md, anchors) -> errors for list-item / fenced-block / indent Format: <!-- voyage:anchor id="ANN-NNNN" target="<slug>" line="<N>" --> Block-level only, on its own line (col 0), blank-line separation. Test fixture annotation-example.md with single ANN-0001 anchor — referenced by SC12 quickstart. 14 tests pass (parseAnchors, addAnchors, stripAnchors, validateAnchorPlacement).
This commit is contained in:
parent
ff7a5c63da
commit
fb733ae149
3 changed files with 398 additions and 0 deletions
241
plugins/voyage/lib/parsers/anchor-parser.mjs
Normal file
241
plugins/voyage/lib/parsers/anchor-parser.mjs
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
// lib/parsers/anchor-parser.mjs
|
||||
// Pure I/O-free parser for v4.2 voyage:anchor markdown comments.
|
||||
//
|
||||
// Anchor format (block-level only, on its own line, blank line above and below):
|
||||
// <!-- voyage:anchor id="ANN-NNNN" target="<heading-slug>" line="<N>" [snippet="<≤80c>"] [intent="fix|change|question|block"] -->
|
||||
//
|
||||
// Placement rules (validated by validateAnchorPlacement):
|
||||
// - Not in list-items (Prettier #18066 progressive-whitespace bug)
|
||||
// - Not inside fenced code blocks (```yaml`/```json`/etc.)
|
||||
// - Not at line-start positions matching: --- frontmatter delimiter,
|
||||
// manifest:, plan_version:, ### Step N:, ## <required-section>,
|
||||
// 40-char hex SHA1 (review finding-IDs)
|
||||
// - ID must match /^ANN-\d{4}$/
|
||||
// - No duplicate IDs in same document
|
||||
//
|
||||
// Returns Result shape from lib/util/result.mjs.
|
||||
|
||||
import { issue, ok, fail } from '../util/result.mjs';
|
||||
|
||||
const ANCHOR_LINE_RE = /^(\s*)<!--\s*voyage:anchor\s+([^>]+?)\s*-->\s*$/;
|
||||
const ATTR_RE = /(\w+)="([^"]*)"/g;
|
||||
const FENCED_OPEN_RE = /^```([a-zA-Z0-9_-]*)\s*$/;
|
||||
const FENCED_CLOSE_RE = /^```\s*$/;
|
||||
const LIST_ITEM_RE = /^\s*(?:[-*+]|\d+[.)])\s+/;
|
||||
const ID_RE = /^ANN-\d{4}$/;
|
||||
const FORBIDDEN_LINE_START = [
|
||||
/^---\s*$/,
|
||||
/^manifest:\s*$/,
|
||||
/^plan_version:/,
|
||||
/^brief_version:/,
|
||||
/^review_version:/,
|
||||
/^### Step \d+:/,
|
||||
/^## (?:Intent|Goal|Success Criteria|Executive Summary|Coverage|Remediation Summary)\b/,
|
||||
/^[0-9a-f]{40}$/,
|
||||
];
|
||||
|
||||
/**
|
||||
* Parse anchor attributes string (the contents between voyage:anchor and -->).
|
||||
* @returns {object} attribute map
|
||||
*/
|
||||
function parseAttrs(s) {
|
||||
const attrs = {};
|
||||
let m;
|
||||
ATTR_RE.lastIndex = 0;
|
||||
while ((m = ATTR_RE.exec(s)) !== null) {
|
||||
attrs[m[1]] = m[2];
|
||||
}
|
||||
return attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse all anchor comments in a markdown document.
|
||||
* @param {string} md
|
||||
* @returns {Result} { valid, errors, warnings, parsed: Anchor[] }
|
||||
*/
|
||||
export function parseAnchors(md) {
|
||||
if (typeof md !== 'string') {
|
||||
return fail(issue('ANCHOR_INPUT', 'Input must be a string'));
|
||||
}
|
||||
const lines = md.split(/\r?\n/);
|
||||
const anchors = [];
|
||||
const errors = [];
|
||||
const warnings = [];
|
||||
const seenIds = new Set();
|
||||
let inFence = false;
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (FENCED_OPEN_RE.test(line)) {
|
||||
inFence = !inFence;
|
||||
continue;
|
||||
}
|
||||
if (inFence && FENCED_CLOSE_RE.test(line)) {
|
||||
inFence = false;
|
||||
continue;
|
||||
}
|
||||
if (inFence) continue;
|
||||
|
||||
const m = line.match(ANCHOR_LINE_RE);
|
||||
if (!m) continue;
|
||||
const attrs = parseAttrs(m[2]);
|
||||
|
||||
if (!attrs.id) {
|
||||
errors.push(issue('ANCHOR_MALFORMED', `Anchor at line ${i + 1} missing required id attribute`));
|
||||
continue;
|
||||
}
|
||||
if (!ID_RE.test(attrs.id)) {
|
||||
errors.push(issue('ANCHOR_BAD_ID', `Anchor id "${attrs.id}" at line ${i + 1} does not match /^ANN-\\d{4}$/`));
|
||||
continue;
|
||||
}
|
||||
if (seenIds.has(attrs.id)) {
|
||||
errors.push(issue('ANCHOR_DUPLICATE_ID', `Duplicate anchor id "${attrs.id}" at line ${i + 1}`));
|
||||
continue;
|
||||
}
|
||||
seenIds.add(attrs.id);
|
||||
|
||||
if (!attrs.target) {
|
||||
errors.push(issue('ANCHOR_MALFORMED', `Anchor "${attrs.id}" at line ${i + 1} missing required target attribute`));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attrs.snippet && attrs.snippet.length > 80) {
|
||||
warnings.push(issue('ANCHOR_SNIPPET_TRUNCATED', `Anchor "${attrs.id}" snippet > 80 chars (${attrs.snippet.length})`));
|
||||
}
|
||||
|
||||
if (attrs.intent && !['fix', 'change', 'question', 'block'].includes(attrs.intent)) {
|
||||
warnings.push(issue('ANCHOR_BAD_INTENT', `Anchor "${attrs.id}" intent "${attrs.intent}" not in {fix|change|question|block}`));
|
||||
}
|
||||
|
||||
anchors.push({
|
||||
id: attrs.id,
|
||||
target: attrs.target,
|
||||
line: attrs.line ? Number.parseInt(attrs.line, 10) : null,
|
||||
snippet: attrs.snippet || null,
|
||||
intent: attrs.intent || null,
|
||||
raw: line,
|
||||
position: { line: i + 1, col: 0 },
|
||||
});
|
||||
}
|
||||
|
||||
if (errors.length > 0) return { valid: false, errors, warnings, parsed: anchors };
|
||||
return { valid: true, errors: [], warnings, parsed: anchors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert anchor comments into markdown above target lines.
|
||||
* Each anchor inserted on its own line with blank line separation.
|
||||
*
|
||||
* @param {string} md - source markdown
|
||||
* @param {Array<{id, target, line, snippet?, intent?}>} anchors - anchors to insert (sorted by line ASC)
|
||||
* @returns {string} markdown with anchors injected
|
||||
*/
|
||||
export function addAnchors(md, anchors) {
|
||||
if (typeof md !== 'string') return md;
|
||||
if (!Array.isArray(anchors) || anchors.length === 0) return md;
|
||||
|
||||
const lines = md.split(/\r?\n/);
|
||||
// Sort by line desc so insertions don't shift later line numbers
|
||||
const sorted = [...anchors].sort((a, b) => (b.line || 0) - (a.line || 0));
|
||||
|
||||
for (const a of sorted) {
|
||||
if (!a.line || a.line < 1 || a.line > lines.length + 1) continue;
|
||||
const attrs = [`id="${a.id}"`, `target="${a.target}"`, `line="${a.line}"`];
|
||||
if (a.snippet) attrs.push(`snippet="${a.snippet.slice(0, 80)}"`);
|
||||
if (a.intent) attrs.push(`intent="${a.intent}"`);
|
||||
const anchorLine = `<!-- voyage:anchor ${attrs.join(' ')} -->`;
|
||||
// Insert above target line: anchorLine + blank line, then target stays
|
||||
lines.splice(a.line - 1, 0, anchorLine, '');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip all voyage:anchor comments from markdown, restoring the original.
|
||||
* Matches the format produced by addAnchors() — anchor line + following blank.
|
||||
*
|
||||
* @param {string} md
|
||||
* @returns {string} markdown with anchors removed
|
||||
*/
|
||||
export function stripAnchors(md) {
|
||||
if (typeof md !== 'string') return md;
|
||||
const lines = md.split(/\r?\n/);
|
||||
const out = [];
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (ANCHOR_LINE_RE.test(lines[i])) {
|
||||
// Skip anchor line; if next line is blank (separator inserted by addAnchors), skip it too
|
||||
if (i + 1 < lines.length && lines[i + 1].trim() === '') i++;
|
||||
continue;
|
||||
}
|
||||
out.push(lines[i]);
|
||||
}
|
||||
return out.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate anchor placement against voyage's structural constraints.
|
||||
* Returns errors for placement violations; does not mutate input.
|
||||
*
|
||||
* @param {string} md
|
||||
* @param {Anchor[]} anchors
|
||||
* @returns {Result}
|
||||
*/
|
||||
export function validateAnchorPlacement(md, anchors) {
|
||||
if (typeof md !== 'string') {
|
||||
return fail(issue('ANCHOR_INPUT', 'Input must be a string'));
|
||||
}
|
||||
const lines = md.split(/\r?\n/);
|
||||
const errors = [];
|
||||
|
||||
// Build fenced-block ranges
|
||||
const fenced = []; // [{startLine, endLine}]
|
||||
let openLine = null;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (FENCED_OPEN_RE.test(lines[i])) {
|
||||
if (openLine === null) {
|
||||
openLine = i;
|
||||
} else {
|
||||
fenced.push({ startLine: openLine, endLine: i });
|
||||
openLine = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!ANCHOR_LINE_RE.test(line)) continue;
|
||||
|
||||
// Inside fenced block?
|
||||
for (const f of fenced) {
|
||||
if (i > f.startLine && i < f.endLine) {
|
||||
errors.push(issue('ANCHOR_IN_FENCED_BLOCK', `Anchor at line ${i + 1} is inside fenced code block (lines ${f.startLine + 1}-${f.endLine + 1}); move it above or below the fence`));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// List item context: either the anchor line itself starts with a list-marker,
|
||||
// OR the anchor line is indented (whitespace-prefixed) AND the previous
|
||||
// non-empty line is a list item. v4.2 disipline: anchors must start at col 0.
|
||||
if (LIST_ITEM_RE.test(line)) {
|
||||
errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is inside a list-item (Prettier #18066 issue — move above the list)`));
|
||||
} else if (/^\s+</.test(line)) {
|
||||
// Anchor line is indented — likely nested inside a list or block-quote
|
||||
// Walk backwards to find the previous non-empty line
|
||||
let j = i - 1;
|
||||
while (j >= 0 && lines[j].trim() === '') j--;
|
||||
if (j >= 0 && (LIST_ITEM_RE.test(lines[j]) || /^\s+(?:[-*+]|\d+[.)])\s/.test(lines[j]))) {
|
||||
errors.push(issue('ANCHOR_IN_LIST_ITEM', `Anchor at line ${i + 1} is indented after a list-item — move to col 0 above the list`));
|
||||
}
|
||||
}
|
||||
|
||||
// Forbidden line-start collision check: the anchor itself starts with `<!--`
|
||||
// so it cannot collide with these patterns directly. But if the operator
|
||||
// accidentally pasted an anchor on top of a structural line, that's caught
|
||||
// by the line-being-anchor check above, so explicit collision-detection
|
||||
// here is for defense-in-depth on adjacent text.
|
||||
// (No additional check needed — anchors have a fixed prefix.)
|
||||
}
|
||||
|
||||
if (errors.length > 0) return { valid: false, errors, warnings: [], parsed: null };
|
||||
return ok(null);
|
||||
}
|
||||
27
plugins/voyage/tests/fixtures/annotation/annotation-example.md
vendored
Normal file
27
plugins/voyage/tests/fixtures/annotation/annotation-example.md
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
type: trekplan-fixture
|
||||
plan_version: "1.7"
|
||||
created: 2026-05-09
|
||||
slug: annotation-example
|
||||
---
|
||||
|
||||
# Sample plan with one anchor
|
||||
|
||||
This fixture is referenced by `docs/annotation-quickstart.md` and the SC12
|
||||
machine-proxy verification (`parseAnchors` exits 0).
|
||||
|
||||
## Section A
|
||||
|
||||
A normal paragraph in section A.
|
||||
|
||||
<!-- voyage:anchor id="ANN-0001" target="section-b" line="20" intent="change" -->
|
||||
|
||||
## Section B
|
||||
|
||||
A paragraph in section B that the anchor above refers to. The anchor is
|
||||
placed on its own line with a blank line above and below — the canonical
|
||||
v4.2 placement disipline.
|
||||
|
||||
## Section C
|
||||
|
||||
Another paragraph.
|
||||
130
plugins/voyage/tests/parsers/anchor-parser.test.mjs
Normal file
130
plugins/voyage/tests/parsers/anchor-parser.test.mjs
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
// tests/parsers/anchor-parser.test.mjs
|
||||
// Unit tests for lib/parsers/anchor-parser.mjs (v4.2)
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import {
|
||||
parseAnchors,
|
||||
addAnchors,
|
||||
stripAnchors,
|
||||
validateAnchorPlacement,
|
||||
} from '../../lib/parsers/anchor-parser.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const EXAMPLE_PATH = resolve(__dirname, '..', 'fixtures', 'annotation', 'annotation-example.md');
|
||||
|
||||
const PLAIN = `# Title
|
||||
|
||||
A normal paragraph.
|
||||
|
||||
## Section
|
||||
|
||||
More text.
|
||||
`;
|
||||
|
||||
test('parseAnchors — empty array on plain markdown without anchors', () => {
|
||||
const r = parseAnchors(PLAIN);
|
||||
assert.equal(r.valid, true);
|
||||
assert.deepEqual(r.parsed, []);
|
||||
});
|
||||
|
||||
test('parseAnchors — extracts id/target/line/intent from valid anchor', () => {
|
||||
const md = readFileSync(EXAMPLE_PATH, 'utf-8');
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, true, JSON.stringify(r.errors));
|
||||
assert.equal(r.parsed.length, 1);
|
||||
assert.equal(r.parsed[0].id, 'ANN-0001');
|
||||
assert.equal(r.parsed[0].target, 'section-b');
|
||||
assert.equal(r.parsed[0].line, 20);
|
||||
assert.equal(r.parsed[0].intent, 'change');
|
||||
});
|
||||
|
||||
test('parseAnchors — rejects ID not matching ANN-NNNN', () => {
|
||||
const md = `# X\n\n<!-- voyage:anchor id="X-001" target="foo" line="3" -->\n`;
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, false);
|
||||
assert.ok(r.errors.find(e => e.code === 'ANCHOR_BAD_ID'));
|
||||
});
|
||||
|
||||
test('parseAnchors — rejects malformed (missing id)', () => {
|
||||
const md = `# X\n\n<!-- voyage:anchor target="foo" line="3" -->\n`;
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, false);
|
||||
assert.ok(r.errors.find(e => e.code === 'ANCHOR_MALFORMED'));
|
||||
});
|
||||
|
||||
test('parseAnchors — rejects duplicate IDs', () => {
|
||||
const md = `# X\n\n<!-- voyage:anchor id="ANN-0001" target="a" line="3" -->\n\nFoo.\n\n<!-- voyage:anchor id="ANN-0001" target="b" line="9" -->\n`;
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, false);
|
||||
assert.ok(r.errors.find(e => e.code === 'ANCHOR_DUPLICATE_ID'));
|
||||
});
|
||||
|
||||
test('parseAnchors — ignores anchors inside fenced code blocks', () => {
|
||||
const md = `# X\n\n\`\`\`yaml\n<!-- voyage:anchor id="ANN-0001" target="a" line="4" -->\n\`\`\`\n`;
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, true);
|
||||
assert.deepEqual(r.parsed, []);
|
||||
});
|
||||
|
||||
test('addAnchors — empty list returns input byte-identical', () => {
|
||||
const r = addAnchors(PLAIN, []);
|
||||
assert.equal(r, PLAIN);
|
||||
});
|
||||
|
||||
test('addAnchors — inserts anchor on its own line with blank-line separation', () => {
|
||||
const md = `# Title\n\nLine 3.\n`;
|
||||
const result = addAnchors(md, [{ id: 'ANN-0001', target: 'title', line: 3, intent: 'change' }]);
|
||||
assert.match(result, /<!-- voyage:anchor id="ANN-0001" target="title" line="3" intent="change" -->/);
|
||||
// Anchor inserted above target line
|
||||
const lines = result.split('\n');
|
||||
const anchorIdx = lines.findIndex(l => l.startsWith('<!-- voyage:anchor'));
|
||||
assert.ok(anchorIdx >= 0);
|
||||
});
|
||||
|
||||
test('addAnchors -> stripAnchors round-trips byte-identical', () => {
|
||||
const md = `# Title\n\nLine 3.\n\nLine 5.\n`;
|
||||
const withAnchors = addAnchors(md, [
|
||||
{ id: 'ANN-0001', target: 'title', line: 3 },
|
||||
{ id: 'ANN-0002', target: 'title', line: 5 },
|
||||
]);
|
||||
const stripped = stripAnchors(withAnchors);
|
||||
assert.equal(stripped, md, 'addAnchors then stripAnchors must round-trip byte-identical');
|
||||
});
|
||||
|
||||
test('parseAnchors(stripAnchors(addAnchors(md, []))) returns []', () => {
|
||||
const md = `# Title\n\nBody.\n`;
|
||||
const result = parseAnchors(stripAnchors(addAnchors(md, [])));
|
||||
assert.equal(result.valid, true);
|
||||
assert.deepEqual(result.parsed, []);
|
||||
});
|
||||
|
||||
test('validateAnchorPlacement — rejects anchor in list-item', () => {
|
||||
const md = `# X\n\n- item\n <!-- voyage:anchor id="ANN-0001" target="x" line="4" -->\n- next\n`;
|
||||
const r = validateAnchorPlacement(md, []);
|
||||
assert.equal(r.valid, false);
|
||||
assert.ok(r.errors.find(e => e.code === 'ANCHOR_IN_LIST_ITEM'));
|
||||
});
|
||||
|
||||
test('validateAnchorPlacement — rejects anchor inside fenced yaml block', () => {
|
||||
const md = `# X\n\n\`\`\`yaml\nfoo: bar\n<!-- voyage:anchor id="ANN-0001" target="x" line="5" -->\n\`\`\`\n`;
|
||||
const r = validateAnchorPlacement(md, []);
|
||||
assert.equal(r.valid, false);
|
||||
assert.ok(r.errors.find(e => e.code === 'ANCHOR_IN_FENCED_BLOCK'));
|
||||
});
|
||||
|
||||
test('validateAnchorPlacement — accepts anchor in body paragraph', () => {
|
||||
const md = readFileSync(EXAMPLE_PATH, 'utf-8');
|
||||
const r = validateAnchorPlacement(md, []);
|
||||
assert.equal(r.valid, true, JSON.stringify(r.errors));
|
||||
});
|
||||
|
||||
test('parseAnchors — anchor with intent block sets intent field', () => {
|
||||
const md = `# X\n\n<!-- voyage:anchor id="ANN-0001" target="x" line="3" intent="block" -->\n`;
|
||||
const r = parseAnchors(md);
|
||||
assert.equal(r.valid, true);
|
||||
assert.equal(r.parsed[0].intent, 'block');
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue