diff --git a/plugins/llm-security/knowledge/attack-scenarios.json b/plugins/llm-security/knowledge/attack-scenarios.json index a9419cc..baa897c 100644 --- a/plugins/llm-security/knowledge/attack-scenarios.json +++ b/plugins/llm-security/knowledge/attack-scenarios.json @@ -881,7 +881,7 @@ }, { "id": "UNI-003", - "name": "Homoglyph Cyrillic-Latin mixing (MEDIUM)", + "name": "Homoglyph Cyrillic-Latin mixing (now blocked via E16 fold, v7.2.0)", "input": { "session_id": "red-team", "message": { @@ -890,8 +890,8 @@ } }, "expect": { - "exit_code": 0, - "stdout_match": "MEDIUM" + "exit_code": 2, + "stdout_match": "block" } }, { diff --git a/plugins/llm-security/scanners/lib/string-utils.mjs b/plugins/llm-security/scanners/lib/string-utils.mjs index 8add0de..5653e17 100644 --- a/plugins/llm-security/scanners/lib/string-utils.mjs +++ b/plugins/llm-security/scanners/lib/string-utils.mjs @@ -451,11 +451,23 @@ const HOMOGLYPH_MAP = Object.freeze({ * Norwegian/Polish/German/etc. text is NOT affected — characters like * æ, ø, å, é, ñ, ü, ö, ä are not in HOMOGLYPH_MAP. * + * Performance: pure-ASCII inputs short-circuit before NFKC, since NFKC is + * a no-op on ASCII and HOMOGLYPH_MAP only contains non-ASCII keys. + * scanForInjection calls this on every scan; the fast-path keeps the + * common-case overhead near zero. + * * @param {string} s * @returns {string} */ export function foldHomoglyphs(s) { if (!s) return s; + // Fast path: pure ASCII has nothing to fold and NFKC is identity. + // charCodeAt is cheaper than iterating codepoints. + let asciiOnly = true; + for (let i = 0; i < s.length; i++) { + if (s.charCodeAt(i) > 127) { asciiOnly = false; break; } + } + if (asciiOnly) return s; const normalized = s.normalize('NFKC'); let out = ''; for (const ch of normalized) {