// string-utils-hidden-unicode.test.mjs — E1 (v7.2.0) — extended hidden-Unicode detection // // Critical-review §4 E1 finding: pre-v7.2.0 the Unicode-stego detector // covered only U+E0001-E007F (Tag block). PUA-A (U+F0000-FFFFD) and // PUA-B (U+100000-10FFFD) — also invisible in most terminals — were not // detected. Attackers can encode payloads in PUA codepoints that survive // normalization and pass through `scanForInjection` undetected. // // E1 fix: extend `containsUnicodeTags` to cover both PUAs. PUA chars are // detection-only — no ASCII mapping exists, so `decodeUnicodeTags` leaves // them unchanged. import { describe, it } from 'node:test'; import assert from 'node:assert/strict'; import { containsUnicodeTags, decodeUnicodeTags, } from '../../scanners/lib/string-utils.mjs'; describe('containsUnicodeTags — Tag block (regression guard)', () => { it('returns true for U+E0001 (start of Tag block)', () => { assert.equal(containsUnicodeTags('hello' + String.fromCodePoint(0xE0001)), true); }); it('returns true for U+E0069 ("i" tag)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xE0069)), true); }); it('returns true for U+E007F (end of Tag block)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xE007F)), true); }); it('returns false for plain ASCII', () => { assert.equal(containsUnicodeTags('plain text'), false); }); it('returns false for empty string', () => { assert.equal(containsUnicodeTags(''), false); }); }); describe('containsUnicodeTags — PUA-A range (E1)', () => { it('returns true for U+F0000 (start of PUA-A)', () => { assert.equal(containsUnicodeTags('hello' + String.fromCodePoint(0xF0000)), true); }); it('returns true for U+F0001 (just inside PUA-A)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xF0001)), true); }); it('returns true for U+FFFFD (end of PUA-A)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xFFFFD)), true); }); it('returns true for PUA-A char buried in ASCII', () => { const payload = 'normal text' + String.fromCodePoint(0xF0042) + 'more text'; assert.equal(containsUnicodeTags(payload), true); }); }); describe('containsUnicodeTags — PUA-B range (E1)', () => { it('returns true for U+100000 (start of PUA-B)', () => { assert.equal(containsUnicodeTags('hello' + String.fromCodePoint(0x100000)), true); }); it('returns true for U+100001 (just inside PUA-B)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0x100001)), true); }); it('returns true for U+10FFFD (end of PUA-B, just below noncharacter)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0x10FFFD)), true); }); }); describe('containsUnicodeTags — boundary cases (E1)', () => { it('returns false for U+EFFFF (just below PUA-A start)', () => { // U+E0080 to U+EFFFF — a gap between Tag block and PUA-A assert.equal(containsUnicodeTags(String.fromCodePoint(0xEFFFF)), false); }); it('returns false for U+E0080 (just past Tag block end)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xE0080)), false); }); it('returns false for U+E0000 (just below Tag block start)', () => { assert.equal(containsUnicodeTags(String.fromCodePoint(0xE0000)), false); }); it('returns false for U+10FFFE (noncharacter — outside PUA-B)', () => { // U+10FFFE and U+10FFFF are noncharacters, not PUA. Out of range. assert.equal(containsUnicodeTags(String.fromCodePoint(0x10FFFE)), false); }); it('returns false for emoji and other plane chars', () => { assert.equal(containsUnicodeTags('🚀'), false); // U+1F680 assert.equal(containsUnicodeTags('日本'), false); // CJK assert.equal(containsUnicodeTags('café'), false); // Latin Extended }); }); describe('decodeUnicodeTags — PUA passthrough (E1)', () => { it('leaves PUA-A characters unchanged (no ASCII mapping)', () => { const puaChar = String.fromCodePoint(0xF0001); const input = `before${puaChar}after`; const result = decodeUnicodeTags(input); assert.equal(result, input); }); it('leaves PUA-B characters unchanged', () => { const puaBChar = String.fromCodePoint(0x100042); const input = `before${puaBChar}after`; const result = decodeUnicodeTags(input); assert.equal(result, input); }); it('still decodes Tag block (regression guard)', () => { // U+E0069 U+E0067 U+E006E → "ign" const tags = String.fromCodePoint(0xE0069) + String.fromCodePoint(0xE0067) + String.fromCodePoint(0xE006E); const result = decodeUnicodeTags(tags); assert.equal(result, 'ign'); }); it('handles mixed Tag + PUA — decodes Tag, passes PUA through', () => { // "i" tag + PUA-A char + "g" tag + "n" tag const tagI = String.fromCodePoint(0xE0069); const puaA = String.fromCodePoint(0xF0042); const tagG = String.fromCodePoint(0xE0067); const tagN = String.fromCodePoint(0xE006E); const input = tagI + puaA + tagG + tagN; const result = decodeUnicodeTags(input); // Tag chars are flushed when a non-tag (PUA) character is encountered. // Implementation detail: result is `i${puaA}gn`. assert.ok(result.includes('i'), `expected 'i' in result, got: ${[...result].map(c => 'U+' + c.codePointAt(0).toString(16)).join(' ')}`); assert.ok(result.includes(puaA), 'PUA char must remain undecoded'); assert.ok(result.includes('gn') || result.includes('g') && result.includes('n'), 'gn tags must decode'); }); });