feat(voyage): implement HTML-comment indirect prompt injection mitigation (Sec T4)

This commit is contained in:
Kjell Tore Guttormsen 2026-05-10 18:03:37 +02:00
commit 6293775f30
3 changed files with 144 additions and 1 deletions

View file

@ -1225,10 +1225,29 @@ playground first-run shows a complete round-trip-able artifact.
console.warn('markdown-it-front-matter plugin not loaded:', e && e.message);
}
// ---- v4.3 Step 25 — Sec T4 HTML-comment indirect prompt-injection
// mitigation. -----------------------------------
// Strip every <!-- ... --> comment from the source text BEFORE
// markdown-it render, except those matching the VOYAGE_ANCHOR_RE
// allowlist (Step 16). Uses parseAnchor as the negative-form filter:
// if parseAnchor returns a non-null value, the comment is a valid
// voyage:anchor and survives; everything else (including
// "<!-- IGNORE PREVIOUS INSTRUCTIONS -->" and similar prompt-injection
// payloads embedded in artifacts) is dropped silently. Pure
// string-in-string-out — no DOM access, no I/O.
function stripUnsafeComments(text) {
if (typeof text !== 'string') return text;
return text.replace(/<!--[\s\S]*?-->/g, function (match) {
return parseAnchor(match) ? match : '';
});
}
// ---- render pipeline ----------------------------------------------
function renderArtifact(text) {
capturedFrontmatter = '';
var bodyHtml = md.render(text || '');
// v4.3 Step 25 — strip unsafe HTML-comments before markdown-it sees them.
var safeText = stripUnsafeComments(text || '');
var bodyHtml = md.render(safeText);
// Pre-render-then-wrap for <details>: prepend a folded frontmatter
// <details> block at the top if the front-matter plugin captured one.
var fmHtml = '';