feat(post-mcp-verify): E7 — scan HTML comment nodes for injection

The existing CRITICAL pattern in injection-patterns.mjs only fires when a comment body contains AGENT/AI/HIDDEN markers. Adversaries can drop the marker and still hide instructions inside  for any agent that reads page source. This generalizes the comment scan: every comment body is HTML-entity-decoded and run through the full injection rule set. The existing keyword-restricted pattern still fires (defense-in-depth). Emits at the strongest tier with category html-comment-injection. +3 tests (65 → 68). Refs: Batch B Wave 4 / Step 11 / v7.2.0
2026-04-29 15:01:56 +02:00 · 2026-04-29 15:01:56 +02:00 · d441abba20
commit d441abba20
parent 716c8384d9
2 changed files with 80 additions and 0 deletions
--- a/plugins/llm-security/hooks/scripts/post-mcp-verify.mjs
+++ b/plugins/llm-security/hooks/scripts/post-mcp-verify.mjs
@ -351,6 +351,38 @@ if (isHtmlSource && outputText.length >= MIN_INJECTION_SCAN_LENGTH) {
      );
    }

+    // -----------------------------------------------------------------------
+    // E7 (v7.2.0): HTML comment node injection.
+    // Generalizes the existing keyword-restricted CRITICAL pattern in
+    // injection-patterns.mjs (which only fires on AGENT/AI/HIDDEN markers).
+    // The existing pattern still fires (defense-in-depth); this scans the
+    // body of any <!-- ... --> comment for the full injection rule set.
+    // -----------------------------------------------------------------------
+    const commentRegex = /<!--([\s\S]*?)-->/g;
+    const commentBodies = [];
+    let commentMatch;
+    while ((commentMatch = commentRegex.exec(htmlSlice)) !== null) {
+      const body = commentMatch[1].trim();
+      if (body.length > 0) {
+        commentBodies.push(decodeHtmlEntities(body));
+      }
+    }
+    if (commentBodies.length > 0) {
+      const commentScan = scanForInjection(commentBodies.join('\n'));
+      if (commentScan.critical.length > 0 || commentScan.high.length > 0 || commentScan.medium.length > 0) {
+        const labels = [...commentScan.critical, ...commentScan.high, ...commentScan.medium];
+        const sev = commentScan.critical.length > 0 ? 'CRITICAL'
+                  : commentScan.high.length > 0 ? 'HIGH'
+                  : 'MEDIUM';
+        advisories.push(
+          `HTML comment-node injection detected — ${sev} (html-comment-injection, OWASP LLM01).\n` +
+          `  Adversarial content inside <!-- ... --> — invisible in render, parsed by agents.\n` +
+          labels.slice(0, 5).map(l => `  - ${l}`).join('\n') + '\n' +
+          `  ${formatToolContext(toolName, toolInput)}`
+        );
+      }
+    }
+
    // -----------------------------------------------------------------------
    // E5 (v7.2.0): SVG element-content injection.
    // Adversarial text inside <desc>, <title>, <metadata>, <foreignObject>