feat(voyage): implement block-boundary-fallback for code-fence/table/list anchors
Step 17 of v4.3 playground plan. Pure function relocateAnchorsToBlockBoundaries (text, anchors) detects atomic markdown blocks (fenced code, tables, deeply nested lists) and relocates anchor-comment insertion to the line BEFORE block opening rather than inside the block. Pure markdown-text -> markdown-text transform (no DOM, no markdown-it dependency). Companion test tests/integration/annotation-block-boundary.test.mjs extracts the function via balanced-brace scan and exercises it through Function() — 7 unit tests covering empty anchors, outside-block stays, fenced-code relocation, table relocation, deeply-nested list relocation, mixed inside/outside, and shape contract. Trace: SC6, research/04 Dim 3 (Notion block-level fallback), plan-critic major #6 (DOM-vs-no-DOM contradiction resolved via pure-function design).
This commit is contained in:
parent
3973be2a90
commit
75130fe979
3 changed files with 269 additions and 0 deletions
|
|
@ -60,6 +60,101 @@
|
|||
if (intent !== null && VOYAGE_ANCHOR_INTENTS.indexOf(intent) === -1) return null;
|
||||
return { id: attrs.id, target: attrs.target, line: lineNum, snippet: snippet, intent: intent };
|
||||
}
|
||||
|
||||
// Block-boundary fallback (Step 17). Pure markdown-text -> markdown-text transform.
|
||||
// For each anchor whose line falls inside an atomic block (fenced code-block,
|
||||
// table-row, or deeply-nested list), inject the anchor-comment at the line
|
||||
// BEFORE block-opening rather than inside. Anchors outside atomic blocks
|
||||
// inject at their original line. Mirrors addAnchors semantics from
|
||||
// lib/parsers/anchor-parser.mjs but with block-boundary awareness.
|
||||
function relocateAnchorsToBlockBoundaries(text, anchors) {
|
||||
if (typeof text !== 'string') return text;
|
||||
if (!Array.isArray(anchors) || anchors.length === 0) return text;
|
||||
|
||||
var lines = text.split(/\r?\n/);
|
||||
var FENCED_RE = /^\s*```/;
|
||||
var TABLE_ROW_RE = /^\s*\|.*\|\s*$/;
|
||||
var TABLE_SEP_RE = /^\s*\|[\s\-:|]+\|\s*$/;
|
||||
var LIST_RE = /^(\s*)(?:[-*+]|\d+[.)])\s+/;
|
||||
|
||||
var atomicRanges = [];
|
||||
var inFence = false;
|
||||
var fenceStart = -1;
|
||||
var inTable = false;
|
||||
var tableStart = -1;
|
||||
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
var ln = lines[i];
|
||||
if (FENCED_RE.test(ln)) {
|
||||
if (!inFence) { inFence = true; fenceStart = i + 1; }
|
||||
else { atomicRanges.push({ start: fenceStart, end: i + 1 }); inFence = false; fenceStart = -1; }
|
||||
continue;
|
||||
}
|
||||
if (inFence) continue;
|
||||
|
||||
if (!inTable) {
|
||||
var nextLine = i + 1 < lines.length ? lines[i + 1] : '';
|
||||
if (TABLE_ROW_RE.test(ln) && TABLE_SEP_RE.test(nextLine)) {
|
||||
inTable = true;
|
||||
tableStart = i + 1;
|
||||
}
|
||||
} else if (!TABLE_ROW_RE.test(ln) || ln.trim() === '') {
|
||||
atomicRanges.push({ start: tableStart, end: i });
|
||||
inTable = false;
|
||||
tableStart = -1;
|
||||
}
|
||||
}
|
||||
if (inTable) atomicRanges.push({ start: tableStart, end: lines.length });
|
||||
if (inFence && fenceStart > 0) atomicRanges.push({ start: fenceStart, end: lines.length });
|
||||
|
||||
// Deeply-nested list-items (indent >= 4 spaces = depth >= 2 in CommonMark)
|
||||
for (var j = 0; j < lines.length; j++) {
|
||||
var lm = lines[j].match(LIST_RE);
|
||||
if (lm && lm[1].length >= 4) {
|
||||
var nestStart = j + 1;
|
||||
var k = j;
|
||||
while (k + 1 < lines.length) {
|
||||
var nm = lines[k + 1].match(LIST_RE);
|
||||
if (nm && nm[1].length >= 2) k++;
|
||||
else break;
|
||||
}
|
||||
atomicRanges.push({ start: nestStart, end: k + 1 });
|
||||
j = k;
|
||||
}
|
||||
}
|
||||
|
||||
function insertionLine(line) {
|
||||
var n = Number(line);
|
||||
if (!Number.isInteger(n) || n < 1) return n;
|
||||
for (var r = 0; r < atomicRanges.length; r++) {
|
||||
var range = atomicRanges[r];
|
||||
if (n >= range.start && n <= range.end) {
|
||||
return Math.max(1, range.start - 1);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
var adjusted = anchors.map(function (a) {
|
||||
var newLine = insertionLine(a.line);
|
||||
return Object.assign({}, a, { line: newLine });
|
||||
});
|
||||
|
||||
var sorted = adjusted.slice().sort(function (a, b) {
|
||||
return (Number(b.line) || 0) - (Number(a.line) || 0);
|
||||
});
|
||||
for (var s = 0; s < sorted.length; s++) {
|
||||
var d = sorted[s];
|
||||
var dl = Number(d.line);
|
||||
if (!dl || dl < 1 || dl > lines.length + 1) continue;
|
||||
var attrParts = ['id="' + d.id + '"', 'target="' + (d.target || 'page') + '"', 'line="' + dl + '"'];
|
||||
if (d.snippet) attrParts.push('snippet="' + String(d.snippet).slice(0, 80).replace(/"/g, '"') + '"');
|
||||
if (d.intent) attrParts.push('intent="' + d.intent + '"');
|
||||
var anchorLine = '<!-- voyage:anchor ' + attrParts.join(' ') + ' -->';
|
||||
lines.splice(dl - 1, 0, anchorLine, '');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
</script>
|
||||
|
||||
<link rel="stylesheet" href="vendor/playground-design-system/fonts.css">
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
// tests/integration/annotation-block-boundary.test.mjs
|
||||
// Step 17 — verify relocateAnchorsToBlockBoundaries pure-function transforms
|
||||
// markdown anchors away from atomic-block interiors (fenced code, tables,
|
||||
// deeply-nested lists) toward the block-boundary line.
|
||||
//
|
||||
// Function lives in playground/voyage-playground.html as inline-script (file://
|
||||
// compat). We extract it via balanced-brace scan and exercise via Function().
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { dirname, resolve, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = resolve(__dirname, '..', '..');
|
||||
const HTML = join(ROOT, 'playground', 'voyage-playground.html');
|
||||
|
||||
function extractFunctionSource(text, fnName) {
|
||||
const needle = `function ${fnName}`;
|
||||
const start = text.indexOf(needle);
|
||||
if (start === -1) return null;
|
||||
const braceStart = text.indexOf('{', start);
|
||||
if (braceStart === -1) return null;
|
||||
let depth = 0;
|
||||
for (let i = braceStart; i < text.length; i++) {
|
||||
if (text[i] === '{') depth++;
|
||||
else if (text[i] === '}') {
|
||||
depth--;
|
||||
if (depth === 0) return text.slice(start, i + 1);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function loadRelocate() {
|
||||
const html = readFileSync(HTML, 'utf-8');
|
||||
const src = extractFunctionSource(html, 'relocateAnchorsToBlockBoundaries');
|
||||
if (!src) throw new Error('relocateAnchorsToBlockBoundaries not found in HTML');
|
||||
// Function() factory creates an isolated scope; safe for pure function.
|
||||
// eslint-disable-next-line no-new-func
|
||||
const factory = new Function(`${src}; return relocateAnchorsToBlockBoundaries;`);
|
||||
return factory();
|
||||
}
|
||||
|
||||
const relocate = loadRelocate();
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries returns input unchanged when anchors empty', () => {
|
||||
const md = 'Line 1\nLine 2\nLine 3\n';
|
||||
assert.equal(relocate(md, []), md);
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries leaves anchor outside atomic block at original line', () => {
|
||||
const lines = [];
|
||||
for (let i = 1; i <= 20; i++) lines.push(`Line ${i}`);
|
||||
const md = lines.join('\n');
|
||||
const out = relocate(md, [{ id: 'ANN-0001', target: 'sec-a', line: 5 }]);
|
||||
const outLines = out.split('\n');
|
||||
// Anchor injected at output line 5 (1-indexed = index 4); blank line at index 5
|
||||
assert.match(outLines[4], /<!-- voyage:anchor id="ANN-0001"/);
|
||||
assert.equal(outLines[5], '');
|
||||
// Original line 5 ("Line 5") shifted to output line 7 (index 6)
|
||||
assert.equal(outLines[6], 'Line 5');
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries moves anchor inside fenced code-block to block-boundary', () => {
|
||||
const md = [
|
||||
'Line 1', // 1
|
||||
'Line 2', // 2
|
||||
'Line 3', // 3
|
||||
'Line 4', // 4
|
||||
'Line 5', // 5
|
||||
'Line 6', // 6
|
||||
'Line 7', // 7
|
||||
'Line 8', // 8
|
||||
'Line 9', // 9
|
||||
'```js', // 10 - fence opens
|
||||
'const x = 1;', // 11
|
||||
'const y = 2;', // 12
|
||||
'const z = 3;', // 13
|
||||
'const a = 4;', // 14
|
||||
'const b = 5;', // 15 <- anchor target
|
||||
'const c = 6;', // 16
|
||||
'const d = 7;', // 17
|
||||
'const e = 8;', // 18
|
||||
'const f = 9;', // 19
|
||||
'```', // 20 - fence closes
|
||||
'Line 21', // 21
|
||||
].join('\n');
|
||||
const out = relocate(md, [{ id: 'ANN-0002', target: 'code-block', line: 15 }]);
|
||||
const outLines = out.split('\n');
|
||||
// Anchor was at line 15 inside fence (10-20); block-boundary insertion at fence.start - 1 = 9
|
||||
assert.match(outLines[8], /<!-- voyage:anchor id="ANN-0002"/, `expected anchor at output line 9, got: ${JSON.stringify(outLines.slice(7, 12))}`);
|
||||
// Fence-opening still intact further down (shifted by 2 inserted lines)
|
||||
assert.equal(outLines.find((l) => l === '```js'), '```js');
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries moves anchor inside table to block-boundary', () => {
|
||||
const md = [
|
||||
'Intro paragraph 1', // 1
|
||||
'Intro paragraph 2', // 2
|
||||
'Intro paragraph 3', // 3
|
||||
'Intro paragraph 4', // 4
|
||||
'', // 5
|
||||
'| Col A | Col B |', // 6 - table header
|
||||
'|-------|-------|', // 7 - separator
|
||||
'| a1 | b1 |', // 8 <- anchor target inside table
|
||||
'| a2 | b2 |', // 9
|
||||
'| a3 | b3 |', // 10
|
||||
'', // 11
|
||||
'After table', // 12
|
||||
].join('\n');
|
||||
const out = relocate(md, [{ id: 'ANN-0003', target: 'table-row', line: 8 }]);
|
||||
const outLines = out.split('\n');
|
||||
// Table starts at line 6; anchor relocated to line 5 (start-1)
|
||||
assert.match(outLines[4], /<!-- voyage:anchor id="ANN-0003"/, `expected anchor at output line 5, got: ${JSON.stringify(outLines.slice(3, 8))}`);
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries moves anchor inside deeply-nested list to block-boundary', () => {
|
||||
const md = [
|
||||
'Heading paragraph', // 1
|
||||
'', // 2
|
||||
'- Top-level item A', // 3
|
||||
' - Second-level A.1', // 4
|
||||
' - Third-level A.1.a', // 5 <- nested-list start (4-space indent = depth >= 2)
|
||||
' - Third-level A.1.b', // 6 <- anchor target inside nest
|
||||
' - Third-level A.1.c', // 7
|
||||
' - Second-level A.2', // 8
|
||||
'- Top-level item B', // 9
|
||||
].join('\n');
|
||||
const out = relocate(md, [{ id: 'ANN-0004', target: 'list-item', line: 6 }]);
|
||||
const outLines = out.split('\n');
|
||||
// Deeply-nested list starts at line 5; anchor relocated to line 4
|
||||
assert.match(outLines[3], /<!-- voyage:anchor id="ANN-0004"/, `expected anchor at output line 4, got: ${JSON.stringify(outLines.slice(2, 7))}`);
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries handles multiple anchors mixed inside/outside blocks', () => {
|
||||
const md = [
|
||||
'Para A', // 1
|
||||
'Para B', // 2 <- anchor 1 (outside, stays)
|
||||
'Para C', // 3
|
||||
'Para D', // 4
|
||||
'Para E', // 5
|
||||
'```py', // 6 - fence open
|
||||
'x = 1', // 7
|
||||
'y = 2', // 8 <- anchor 2 (inside fence, moves to 5)
|
||||
'z = 3', // 9
|
||||
'```', // 10 - fence close
|
||||
'Para K', // 11
|
||||
].join('\n');
|
||||
const out = relocate(md, [
|
||||
{ id: 'ANN-0010', target: 'p', line: 2 },
|
||||
{ id: 'ANN-0011', target: 'code', line: 8 },
|
||||
]);
|
||||
// Both anchors must appear; ANN-0011 must precede the fence-opening in output
|
||||
assert.match(out, /<!-- voyage:anchor id="ANN-0010"/);
|
||||
assert.match(out, /<!-- voyage:anchor id="ANN-0011"/);
|
||||
const outLines = out.split('\n');
|
||||
const ann11Idx = outLines.findIndex((l) => /ANN-0011/.test(l));
|
||||
const fenceIdx = outLines.findIndex((l) => l === '```py');
|
||||
assert.ok(ann11Idx < fenceIdx, `ANN-0011 (${ann11Idx}) must precede fence-open (${fenceIdx})`);
|
||||
});
|
||||
|
||||
test('relocateAnchorsToBlockBoundaries returns string (basic shape)', () => {
|
||||
const out = relocate('a\nb\nc\n', [{ id: 'ANN-0099', target: 't', line: 2 }]);
|
||||
assert.equal(typeof out, 'string');
|
||||
assert.ok(out.length > 0);
|
||||
});
|
||||
|
|
@ -220,3 +220,9 @@ test('voyage-playground.html declares parseAnchor validator (v4.3 Step 16)', ()
|
|||
const text = readFileSync(HTML, 'utf-8');
|
||||
assert.match(text, /function\s+parseAnchor\s*\(\s*line\s*\)/, 'parseAnchor(line) function required');
|
||||
});
|
||||
|
||||
test('voyage-playground.html declares relocateAnchorsToBlockBoundaries pure function (v4.3 Step 17)', () => {
|
||||
const text = readFileSync(HTML, 'utf-8');
|
||||
assert.match(text, /function\s+relocateAnchorsToBlockBoundaries\s*\(\s*text\s*,\s*anchors\s*\)/,
|
||||
'relocateAnchorsToBlockBoundaries(text, anchors) pure function required');
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue