feat(voyage): implement block-boundary-fallback for code-fence/table/list anchors

Step 17 of v4.3 playground plan. Pure function relocateAnchorsToBlockBoundaries
(text, anchors) detects atomic markdown blocks (fenced code, tables, deeply
nested lists) and relocates anchor-comment insertion to the line BEFORE block
opening rather than inside the block. Pure markdown-text -> markdown-text
transform (no DOM, no markdown-it dependency).

Companion test tests/integration/annotation-block-boundary.test.mjs extracts
the function via balanced-brace scan and exercises it through Function() —
7 unit tests covering empty anchors, outside-block stays, fenced-code
relocation, table relocation, deeply-nested list relocation, mixed
inside/outside, and shape contract.

Trace: SC6, research/04 Dim 3 (Notion block-level fallback), plan-critic
major #6 (DOM-vs-no-DOM contradiction resolved via pure-function design).
This commit is contained in:
Kjell Tore Guttormsen 2026-05-10 17:04:27 +02:00
commit 75130fe979
3 changed files with 269 additions and 0 deletions

View file

@ -60,6 +60,101 @@
if (intent !== null && VOYAGE_ANCHOR_INTENTS.indexOf(intent) === -1) return null; if (intent !== null && VOYAGE_ANCHOR_INTENTS.indexOf(intent) === -1) return null;
return { id: attrs.id, target: attrs.target, line: lineNum, snippet: snippet, intent: intent }; return { id: attrs.id, target: attrs.target, line: lineNum, snippet: snippet, intent: intent };
} }
// Block-boundary fallback (Step 17). Pure markdown-text -> markdown-text transform.
// For each anchor whose line falls inside an atomic block (fenced code-block,
// table-row, or deeply-nested list), inject the anchor-comment at the line
// BEFORE block-opening rather than inside. Anchors outside atomic blocks
// inject at their original line. Mirrors addAnchors semantics from
// lib/parsers/anchor-parser.mjs but with block-boundary awareness.
function relocateAnchorsToBlockBoundaries(text, anchors) {
if (typeof text !== 'string') return text;
if (!Array.isArray(anchors) || anchors.length === 0) return text;
var lines = text.split(/\r?\n/);
var FENCED_RE = /^\s*```/;
var TABLE_ROW_RE = /^\s*\|.*\|\s*$/;
var TABLE_SEP_RE = /^\s*\|[\s\-:|]+\|\s*$/;
var LIST_RE = /^(\s*)(?:[-*+]|\d+[.)])\s+/;
var atomicRanges = [];
var inFence = false;
var fenceStart = -1;
var inTable = false;
var tableStart = -1;
for (var i = 0; i < lines.length; i++) {
var ln = lines[i];
if (FENCED_RE.test(ln)) {
if (!inFence) { inFence = true; fenceStart = i + 1; }
else { atomicRanges.push({ start: fenceStart, end: i + 1 }); inFence = false; fenceStart = -1; }
continue;
}
if (inFence) continue;
if (!inTable) {
var nextLine = i + 1 < lines.length ? lines[i + 1] : '';
if (TABLE_ROW_RE.test(ln) && TABLE_SEP_RE.test(nextLine)) {
inTable = true;
tableStart = i + 1;
}
} else if (!TABLE_ROW_RE.test(ln) || ln.trim() === '') {
atomicRanges.push({ start: tableStart, end: i });
inTable = false;
tableStart = -1;
}
}
if (inTable) atomicRanges.push({ start: tableStart, end: lines.length });
if (inFence && fenceStart > 0) atomicRanges.push({ start: fenceStart, end: lines.length });
// Deeply-nested list-items (indent >= 4 spaces = depth >= 2 in CommonMark)
for (var j = 0; j < lines.length; j++) {
var lm = lines[j].match(LIST_RE);
if (lm && lm[1].length >= 4) {
var nestStart = j + 1;
var k = j;
while (k + 1 < lines.length) {
var nm = lines[k + 1].match(LIST_RE);
if (nm && nm[1].length >= 2) k++;
else break;
}
atomicRanges.push({ start: nestStart, end: k + 1 });
j = k;
}
}
function insertionLine(line) {
var n = Number(line);
if (!Number.isInteger(n) || n < 1) return n;
for (var r = 0; r < atomicRanges.length; r++) {
var range = atomicRanges[r];
if (n >= range.start && n <= range.end) {
return Math.max(1, range.start - 1);
}
}
return n;
}
var adjusted = anchors.map(function (a) {
var newLine = insertionLine(a.line);
return Object.assign({}, a, { line: newLine });
});
var sorted = adjusted.slice().sort(function (a, b) {
return (Number(b.line) || 0) - (Number(a.line) || 0);
});
for (var s = 0; s < sorted.length; s++) {
var d = sorted[s];
var dl = Number(d.line);
if (!dl || dl < 1 || dl > lines.length + 1) continue;
var attrParts = ['id="' + d.id + '"', 'target="' + (d.target || 'page') + '"', 'line="' + dl + '"'];
if (d.snippet) attrParts.push('snippet="' + String(d.snippet).slice(0, 80).replace(/"/g, '&quot;') + '"');
if (d.intent) attrParts.push('intent="' + d.intent + '"');
var anchorLine = '<!-- voyage:anchor ' + attrParts.join(' ') + ' -->';
lines.splice(dl - 1, 0, anchorLine, '');
}
return lines.join('\n');
}
</script> </script>
<link rel="stylesheet" href="vendor/playground-design-system/fonts.css"> <link rel="stylesheet" href="vendor/playground-design-system/fonts.css">

View file

@ -0,0 +1,168 @@
// tests/integration/annotation-block-boundary.test.mjs
// Step 17 — verify relocateAnchorsToBlockBoundaries pure-function transforms
// markdown anchors away from atomic-block interiors (fenced code, tables,
// deeply-nested lists) toward the block-boundary line.
//
// Function lives in playground/voyage-playground.html as inline-script (file://
// compat). We extract it via balanced-brace scan and exercise via Function().
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import { readFileSync } from 'node:fs';
import { dirname, resolve, join } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = resolve(__dirname, '..', '..');
const HTML = join(ROOT, 'playground', 'voyage-playground.html');
function extractFunctionSource(text, fnName) {
const needle = `function ${fnName}`;
const start = text.indexOf(needle);
if (start === -1) return null;
const braceStart = text.indexOf('{', start);
if (braceStart === -1) return null;
let depth = 0;
for (let i = braceStart; i < text.length; i++) {
if (text[i] === '{') depth++;
else if (text[i] === '}') {
depth--;
if (depth === 0) return text.slice(start, i + 1);
}
}
return null;
}
function loadRelocate() {
const html = readFileSync(HTML, 'utf-8');
const src = extractFunctionSource(html, 'relocateAnchorsToBlockBoundaries');
if (!src) throw new Error('relocateAnchorsToBlockBoundaries not found in HTML');
// Function() factory creates an isolated scope; safe for pure function.
// eslint-disable-next-line no-new-func
const factory = new Function(`${src}; return relocateAnchorsToBlockBoundaries;`);
return factory();
}
const relocate = loadRelocate();
test('relocateAnchorsToBlockBoundaries returns input unchanged when anchors empty', () => {
const md = 'Line 1\nLine 2\nLine 3\n';
assert.equal(relocate(md, []), md);
});
test('relocateAnchorsToBlockBoundaries leaves anchor outside atomic block at original line', () => {
const lines = [];
for (let i = 1; i <= 20; i++) lines.push(`Line ${i}`);
const md = lines.join('\n');
const out = relocate(md, [{ id: 'ANN-0001', target: 'sec-a', line: 5 }]);
const outLines = out.split('\n');
// Anchor injected at output line 5 (1-indexed = index 4); blank line at index 5
assert.match(outLines[4], /<!-- voyage:anchor id="ANN-0001"/);
assert.equal(outLines[5], '');
// Original line 5 ("Line 5") shifted to output line 7 (index 6)
assert.equal(outLines[6], 'Line 5');
});
test('relocateAnchorsToBlockBoundaries moves anchor inside fenced code-block to block-boundary', () => {
const md = [
'Line 1', // 1
'Line 2', // 2
'Line 3', // 3
'Line 4', // 4
'Line 5', // 5
'Line 6', // 6
'Line 7', // 7
'Line 8', // 8
'Line 9', // 9
'```js', // 10 - fence opens
'const x = 1;', // 11
'const y = 2;', // 12
'const z = 3;', // 13
'const a = 4;', // 14
'const b = 5;', // 15 <- anchor target
'const c = 6;', // 16
'const d = 7;', // 17
'const e = 8;', // 18
'const f = 9;', // 19
'```', // 20 - fence closes
'Line 21', // 21
].join('\n');
const out = relocate(md, [{ id: 'ANN-0002', target: 'code-block', line: 15 }]);
const outLines = out.split('\n');
// Anchor was at line 15 inside fence (10-20); block-boundary insertion at fence.start - 1 = 9
assert.match(outLines[8], /<!-- voyage:anchor id="ANN-0002"/, `expected anchor at output line 9, got: ${JSON.stringify(outLines.slice(7, 12))}`);
// Fence-opening still intact further down (shifted by 2 inserted lines)
assert.equal(outLines.find((l) => l === '```js'), '```js');
});
test('relocateAnchorsToBlockBoundaries moves anchor inside table to block-boundary', () => {
const md = [
'Intro paragraph 1', // 1
'Intro paragraph 2', // 2
'Intro paragraph 3', // 3
'Intro paragraph 4', // 4
'', // 5
'| Col A | Col B |', // 6 - table header
'|-------|-------|', // 7 - separator
'| a1 | b1 |', // 8 <- anchor target inside table
'| a2 | b2 |', // 9
'| a3 | b3 |', // 10
'', // 11
'After table', // 12
].join('\n');
const out = relocate(md, [{ id: 'ANN-0003', target: 'table-row', line: 8 }]);
const outLines = out.split('\n');
// Table starts at line 6; anchor relocated to line 5 (start-1)
assert.match(outLines[4], /<!-- voyage:anchor id="ANN-0003"/, `expected anchor at output line 5, got: ${JSON.stringify(outLines.slice(3, 8))}`);
});
test('relocateAnchorsToBlockBoundaries moves anchor inside deeply-nested list to block-boundary', () => {
const md = [
'Heading paragraph', // 1
'', // 2
'- Top-level item A', // 3
' - Second-level A.1', // 4
' - Third-level A.1.a', // 5 <- nested-list start (4-space indent = depth >= 2)
' - Third-level A.1.b', // 6 <- anchor target inside nest
' - Third-level A.1.c', // 7
' - Second-level A.2', // 8
'- Top-level item B', // 9
].join('\n');
const out = relocate(md, [{ id: 'ANN-0004', target: 'list-item', line: 6 }]);
const outLines = out.split('\n');
// Deeply-nested list starts at line 5; anchor relocated to line 4
assert.match(outLines[3], /<!-- voyage:anchor id="ANN-0004"/, `expected anchor at output line 4, got: ${JSON.stringify(outLines.slice(2, 7))}`);
});
test('relocateAnchorsToBlockBoundaries handles multiple anchors mixed inside/outside blocks', () => {
const md = [
'Para A', // 1
'Para B', // 2 <- anchor 1 (outside, stays)
'Para C', // 3
'Para D', // 4
'Para E', // 5
'```py', // 6 - fence open
'x = 1', // 7
'y = 2', // 8 <- anchor 2 (inside fence, moves to 5)
'z = 3', // 9
'```', // 10 - fence close
'Para K', // 11
].join('\n');
const out = relocate(md, [
{ id: 'ANN-0010', target: 'p', line: 2 },
{ id: 'ANN-0011', target: 'code', line: 8 },
]);
// Both anchors must appear; ANN-0011 must precede the fence-opening in output
assert.match(out, /<!-- voyage:anchor id="ANN-0010"/);
assert.match(out, /<!-- voyage:anchor id="ANN-0011"/);
const outLines = out.split('\n');
const ann11Idx = outLines.findIndex((l) => /ANN-0011/.test(l));
const fenceIdx = outLines.findIndex((l) => l === '```py');
assert.ok(ann11Idx < fenceIdx, `ANN-0011 (${ann11Idx}) must precede fence-open (${fenceIdx})`);
});
test('relocateAnchorsToBlockBoundaries returns string (basic shape)', () => {
const out = relocate('a\nb\nc\n', [{ id: 'ANN-0099', target: 't', line: 2 }]);
assert.equal(typeof out, 'string');
assert.ok(out.length > 0);
});

View file

@ -220,3 +220,9 @@ test('voyage-playground.html declares parseAnchor validator (v4.3 Step 16)', ()
const text = readFileSync(HTML, 'utf-8'); const text = readFileSync(HTML, 'utf-8');
assert.match(text, /function\s+parseAnchor\s*\(\s*line\s*\)/, 'parseAnchor(line) function required'); assert.match(text, /function\s+parseAnchor\s*\(\s*line\s*\)/, 'parseAnchor(line) function required');
}); });
test('voyage-playground.html declares relocateAnchorsToBlockBoundaries pure function (v4.3 Step 17)', () => {
const text = readFileSync(HTML, 'utf-8');
assert.match(text, /function\s+relocateAnchorsToBlockBoundaries\s*\(\s*text\s*,\s*anchors\s*\)/,
'relocateAnchorsToBlockBoundaries(text, anchors) pure function required');
});