feat(voyage): implement HTML-comment indirect prompt injection mitigation (Sec T4)

2026-05-10 18:03:37 +02:00 · 2026-05-10 18:03:37 +02:00 · 6293775f30
commit 6293775f30
parent fc8c9eecdd
3 changed files with 144 additions and 1 deletions
--- a/plugins/voyage/playground/voyage-playground.html
+++ b/plugins/voyage/playground/voyage-playground.html
@ -1225,10 +1225,29 @@ playground first-run shows a complete round-trip-able artifact.
        console.warn('markdown-it-front-matter plugin not loaded:', e && e.message);
      }

+      // ---- v4.3 Step 25 — Sec T4 HTML-comment indirect prompt-injection
+      //                     mitigation. -----------------------------------
+      // Strip every <!-- ... --> comment from the source text BEFORE
+      // markdown-it render, except those matching the VOYAGE_ANCHOR_RE
+      // allowlist (Step 16). Uses parseAnchor as the negative-form filter:
+      // if parseAnchor returns a non-null value, the comment is a valid
+      // voyage:anchor and survives; everything else (including
+      // "<!-- IGNORE PREVIOUS INSTRUCTIONS -->" and similar prompt-injection
+      // payloads embedded in artifacts) is dropped silently. Pure
+      // string-in-string-out — no DOM access, no I/O.
+      function stripUnsafeComments(text) {
+        if (typeof text !== 'string') return text;
+        return text.replace(/<!--[\s\S]*?-->/g, function (match) {
+          return parseAnchor(match) ? match : '';
+        });
+      }
+
      // ---- render pipeline ----------------------------------------------
      function renderArtifact(text) {
        capturedFrontmatter = '';
-        var bodyHtml = md.render(text || '');
+        // v4.3 Step 25 — strip unsafe HTML-comments before markdown-it sees them.
+        var safeText = stripUnsafeComments(text || '');
+        var bodyHtml = md.render(safeText);
        // Pre-render-then-wrap for <details>: prepend a folded frontmatter
        // <details> block at the top if the front-matter plugin captured one.
        var fmHtml = '';