feat(post-mcp-verify): E4 — scan markdown link titles for injection

Adversarial payloads in markdown link title attributes (rendered as
tooltips, parsed by agents) bypassed the existing HTML-content checks
which gated on `<tag>` presence. Pattern: [text](url "title").

Adds linkTitleRegex extraction to the HTML-content block, runs each
captured title through scanForInjection, emits at the strongest tier
encountered with category markdown-link-title-injection.

+3 tests (62 → 62 in post-mcp-verify.test.mjs file, was 59).

Refs: Batch B Wave 4 / Step 9 / v7.2.0
This commit is contained in:
Kjell Tore Guttormsen 2026-04-29 14:52:30 +02:00
commit b95d85bb4c
2 changed files with 78 additions and 0 deletions

View file

@ -21,6 +21,7 @@ import { tmpdir } from 'node:os';
import { scanForInjection } from '../../scanners/lib/injection-patterns.mjs';
import { checkDescriptionDrift } from '../../scanners/lib/mcp-description-cache.mjs';
import { getPolicyValue } from '../../scanners/lib/policy-loader.mjs';
import { decodeHtmlEntities } from '../../scanners/lib/string-utils.mjs';
// ---------------------------------------------------------------------------
// Secret patterns — same set as pre-edit-secrets.mjs so any secret that
@ -293,6 +294,36 @@ if (outputText.length >= MIN_INJECTION_SCAN_LENGTH) {
const isHtmlSource = toolName === 'WebFetch' || toolName === 'Read' || toolName?.startsWith('mcp__');
if (isHtmlSource && outputText.length >= MIN_INJECTION_SCAN_LENGTH) {
const htmlSlice = outputText.slice(0, 100_000);
// -------------------------------------------------------------------------
// E4 (v7.2.0): Markdown link title-attribute injection.
// Pattern: [text](url "title") — the quoted title is rendered as a tooltip
// and parsed by agents, but rarely inspected by humans during review.
// Markdown does not require HTML tags, so this runs outside the HTML gate.
// -------------------------------------------------------------------------
const linkTitleRegex = /\[[^\]]*\]\([^)]*\s+"([^"]+)"\s*\)/g;
const linkTitles = [];
let linkTitleMatch;
while ((linkTitleMatch = linkTitleRegex.exec(htmlSlice)) !== null) {
linkTitles.push(decodeHtmlEntities(linkTitleMatch[1]));
}
if (linkTitles.length > 0) {
const titlesText = linkTitles.join('\n');
const titleScan = scanForInjection(titlesText);
if (titleScan.critical.length > 0 || titleScan.high.length > 0 || titleScan.medium.length > 0) {
const labels = [...titleScan.critical, ...titleScan.high, ...titleScan.medium];
const sev = titleScan.critical.length > 0 ? 'CRITICAL'
: titleScan.high.length > 0 ? 'HIGH'
: 'MEDIUM';
advisories.push(
`Markdown link-title injection detected — ${sev} (markdown-link-title-injection, OWASP LLM01).\n` +
` Adversarial content hidden in link title attributes — rendered as tooltips, parsed by agents.\n` +
labels.slice(0, 5).map(l => ` - ${l}`).join('\n') + '\n' +
` ${formatToolContext(toolName, toolInput)}`
);
}
}
// Only run HTML-specific checks if content looks like HTML
if (/<[a-zA-Z][^>]*>/.test(htmlSlice)) {
const htmlFindings = [];