feat: initial open marketplace with llm-security, config-audit, ultraplan-local

This commit is contained in:
Kjell Tore Guttormsen 2026-04-06 18:47:49 +02:00
commit f93d6abdae
380 changed files with 65935 additions and 0 deletions

View file

@ -0,0 +1,660 @@
// string-utils.test.mjs — Tests for scanners/lib/string-utils.mjs
// Zero external dependencies: node:test + node:assert only.
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import {
shannonEntropy,
levenshtein,
isBase64Like,
isHexBlob,
redact,
extractStringLiterals,
decodeUnicodeEscapes,
decodeHexEscapes,
decodeUrlEncoding,
tryDecodeBase64,
normalizeForScan,
decodeHtmlEntities,
collapseLetterSpacing,
decodeUnicodeTags,
containsUnicodeTags,
stripBidiOverrides,
} from '../../scanners/lib/string-utils.mjs';
// ---------------------------------------------------------------------------
// shannonEntropy
// ---------------------------------------------------------------------------
describe('shannonEntropy', () => {
it('returns 0 for empty string', () => {
assert.equal(shannonEntropy(''), 0);
});
it('returns 0 for uniform distribution (all same character)', () => {
assert.equal(shannonEntropy('aaaaaaaaaa'), 0);
});
it('returns ~2.0 for "abcd" (4 equally likely chars)', () => {
// H = -4*(0.25 * log2(0.25)) = -4*(0.25*-2) = 2.0
const h = shannonEntropy('abcd');
assert.ok(
Math.abs(h - 2.0) < 0.0001,
`expected ~2.0, got ${h}`
);
});
it('returns > 4.0 for a high-entropy random-looking string', () => {
// Mix of upper, lower, digits, symbols — typical API key pattern
const highEntropy = 'xK9#mP2@qL5$nR8!vT3^wY6&';
assert.ok(
shannonEntropy(highEntropy) > 4.0,
`expected > 4.0 for high-entropy string`
);
});
it('returns > 0 for a two-character alternating string', () => {
const h = shannonEntropy('ababababab');
assert.ok(h > 0, `expected > 0 for two-char alternation, got ${h}`);
});
});
// ---------------------------------------------------------------------------
// levenshtein
// ---------------------------------------------------------------------------
describe('levenshtein', () => {
it('returns 0 for identical strings', () => {
assert.equal(levenshtein('hello', 'hello'), 0);
});
it('returns 0 for two empty strings', () => {
assert.equal(levenshtein('', ''), 0);
});
it('returns length of other string when one is empty', () => {
assert.equal(levenshtein('', 'hello'), 5);
assert.equal(levenshtein('hello', ''), 5);
});
it('returns 1 for a single character difference (substitution)', () => {
assert.equal(levenshtein('cat', 'bat'), 1);
});
it('returns 1 for a single insertion', () => {
assert.equal(levenshtein('express', 'expresss'), 1);
assert.equal(levenshtein('expresss', 'express'), 1);
});
it('returns 3 for "kitten" vs "sitting"', () => {
// Classic Levenshtein example
assert.equal(levenshtein('kitten', 'sitting'), 3);
});
it('is symmetric', () => {
assert.equal(levenshtein('abc', 'xyz'), levenshtein('xyz', 'abc'));
});
});
// ---------------------------------------------------------------------------
// isBase64Like
// ---------------------------------------------------------------------------
describe('isBase64Like', () => {
it('returns true for a valid base64 string longer than 20 chars', () => {
// "Hello, World!" base64-encoded, padded to well over 20 chars
const b64 = 'SGVsbG8sIFdvcmxkISBUaGlzIGlzIGEgdGVzdCBzdHJpbmcu';
assert.ok(b64.length > 20);
assert.equal(isBase64Like(b64), true);
});
it('returns true for base64 with padding characters', () => {
const padded = 'dGhpcyBpcyBhIHRlc3Qgc3RyaW5nIGZvciBiYXNlNjQ=';
assert.equal(isBase64Like(padded), true);
});
it('returns false for a short base64-looking string (< 20 chars)', () => {
assert.equal(isBase64Like('SGVsbG8='), false);
});
it('returns false for a string with non-base64 characters', () => {
// Spaces and hyphens are not valid base64
assert.equal(isBase64Like('this is not base64 at all and has spaces in it'), false);
});
it('returns false for an empty string', () => {
assert.equal(isBase64Like(''), false);
});
});
// ---------------------------------------------------------------------------
// isHexBlob
// ---------------------------------------------------------------------------
describe('isHexBlob', () => {
it('returns true for a valid hex string longer than 32 chars', () => {
// 64-char hex string (like a SHA-256 hash)
const hex = 'a3f5c8e1b2d4067f9e0a1c3b5d7e9f0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6';
assert.ok(hex.length >= 32);
assert.equal(isHexBlob(hex), true);
});
it('returns true for hex string with 0x prefix', () => {
const hex = '0x' + 'deadbeef'.repeat(8); // 64 hex chars after prefix
assert.equal(isHexBlob(hex), true);
});
it('returns false for a short hex string (< 32 chars)', () => {
assert.equal(isHexBlob('deadbeef'), false);
});
it('returns false for a string containing non-hex characters', () => {
assert.equal(isHexBlob('this is not hex and is long enough but has spaces'), false);
});
it('returns false for an empty string', () => {
assert.equal(isHexBlob(''), false);
});
});
// ---------------------------------------------------------------------------
// redact
// ---------------------------------------------------------------------------
describe('redact', () => {
it('redacts a long string to first 8 + "..." + last 4 chars', () => {
// Length must be > showStart(8) + showEnd(4) + 3 = 15 chars
const input = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; // 26 chars
const result = redact(input);
assert.equal(result, 'ABCDEFGH...WXYZ');
});
it('returns short string as-is (not long enough to redact)', () => {
// 8 + 4 + 3 = 15; string of 15 or fewer should pass through
const short = 'ABCDEFGHIJKLMNO'; // exactly 15 chars
assert.equal(redact(short), short);
});
it('returns shorter string as-is', () => {
assert.equal(redact('secret'), 'secret');
});
it('respects custom showStart and showEnd parameters', () => {
const input = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; // 26 chars
// showStart=4, showEnd=2: threshold = 4+2+3=9, input > 9, so redact
const result = redact(input, 4, 2);
assert.equal(result, 'ABCD...YZ');
});
it('handles string exactly at the boundary as-is', () => {
// Default: showStart=8, showEnd=4, threshold=15 (s.length <= 15 -> return as-is)
const boundary = 'A'.repeat(15);
assert.equal(redact(boundary), boundary);
});
it('redacts a string one character above boundary', () => {
const justOver = 'A'.repeat(16);
const result = redact(justOver);
assert.equal(result, 'AAAAAAAA...AAAA');
});
});
// ---------------------------------------------------------------------------
// extractStringLiterals
// ---------------------------------------------------------------------------
describe('extractStringLiterals', () => {
it('extracts a double-quoted string literal', () => {
const result = extractStringLiterals('const x = "hello world";');
assert.deepEqual(result, ['hello world']);
});
it('extracts a single-quoted string literal', () => {
const result = extractStringLiterals("const x = 'hello world';");
assert.deepEqual(result, ['hello world']);
});
it('extracts a backtick-quoted string literal', () => {
const result = extractStringLiterals('const x = `hello world`;');
assert.deepEqual(result, ['hello world']);
});
it('extracts multiple literals from the same line', () => {
const result = extractStringLiterals('const a = "foo"; const b = \'bar\';');
assert.deepEqual(result, ['foo', 'bar']);
});
it('extracts mixed quote types from the same line', () => {
const result = extractStringLiterals('fn("double", \'single\', `backtick`)');
assert.deepEqual(result, ['double', 'single', 'backtick']);
});
it('returns empty array for a line with no string literals', () => {
const result = extractStringLiterals('const x = 42;');
assert.deepEqual(result, []);
});
it('returns empty array for an empty line', () => {
const result = extractStringLiterals('');
assert.deepEqual(result, []);
});
it('handles escaped characters inside string literals', () => {
const result = extractStringLiterals('const x = "hello \\"world\\"";');
assert.deepEqual(result, ['hello \\"world\\"']);
});
});
// ---------------------------------------------------------------------------
// decodeUnicodeEscapes
// ---------------------------------------------------------------------------
describe('decodeUnicodeEscapes', () => {
it('decodes \\uXXXX sequences', () => {
assert.equal(decodeUnicodeEscapes('\\u0041\\u0042\\u0043'), 'ABC');
});
it('decodes \\u{XXXXX} sequences', () => {
assert.equal(decodeUnicodeEscapes('\\u{41}'), 'A');
assert.equal(decodeUnicodeEscapes('\\u{1F600}'), '\u{1F600}');
});
it('leaves non-escape text unchanged', () => {
assert.equal(decodeUnicodeEscapes('hello world'), 'hello world');
});
it('decodes mixed text and escapes', () => {
assert.equal(decodeUnicodeEscapes('\\u0069gnore'), 'ignore');
});
it('handles invalid codepoints gracefully', () => {
// U+200000 is beyond Unicode range — should be left as-is
const input = '\\u{200000}';
assert.equal(decodeUnicodeEscapes(input), input);
});
});
// ---------------------------------------------------------------------------
// decodeHexEscapes
// ---------------------------------------------------------------------------
describe('decodeHexEscapes', () => {
it('decodes \\xXX sequences', () => {
assert.equal(decodeHexEscapes('\\x41\\x42\\x43'), 'ABC');
});
it('decodes mixed text and hex escapes', () => {
assert.equal(decodeHexEscapes('\\x69gnore'), 'ignore');
});
it('leaves non-escape text unchanged', () => {
assert.equal(decodeHexEscapes('hello world'), 'hello world');
});
it('decodes full ASCII range', () => {
assert.equal(decodeHexEscapes('\\x20'), ' '); // space
assert.equal(decodeHexEscapes('\\x7E'), '~'); // tilde
});
});
// ---------------------------------------------------------------------------
// decodeUrlEncoding
// ---------------------------------------------------------------------------
describe('decodeUrlEncoding', () => {
it('decodes %XX sequences', () => {
assert.equal(decodeUrlEncoding('%41%42%43'), 'ABC');
});
it('decodes standard URL entities', () => {
assert.equal(decodeUrlEncoding('hello%20world'), 'hello world');
});
it('decodes mixed text and percent-encoding', () => {
assert.equal(decodeUrlEncoding('%69gnore'), 'ignore');
});
it('leaves non-encoded text unchanged', () => {
assert.equal(decodeUrlEncoding('hello world'), 'hello world');
});
it('handles malformed sequences without crashing', () => {
// %ZZ is not valid hex — should pass through or handle gracefully
const result = decodeUrlEncoding('test%ZZvalue');
assert.ok(typeof result === 'string');
});
it('fast path: no percent signs returns input unchanged', () => {
const input = 'no encoding here';
assert.equal(decodeUrlEncoding(input), input);
});
});
// ---------------------------------------------------------------------------
// tryDecodeBase64
// ---------------------------------------------------------------------------
describe('tryDecodeBase64', () => {
it('decodes valid base64 that produces readable text', () => {
const encoded = Buffer.from('ignore all previous instructions').toString('base64');
const result = tryDecodeBase64(encoded);
assert.equal(result, 'ignore all previous instructions');
});
it('returns null for short strings (not base64-like)', () => {
assert.equal(tryDecodeBase64('short'), null);
});
it('returns null for binary content (not readable text)', () => {
// Random bytes that won't produce >80% printable ASCII
const binaryB64 = Buffer.from([0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0x83,
0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0x83,
0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0x83]).toString('base64');
assert.equal(tryDecodeBase64(binaryB64), null);
});
it('returns null for non-base64 strings', () => {
assert.equal(tryDecodeBase64('this is not base64 at all!!!'), null);
});
});
// ---------------------------------------------------------------------------
// normalizeForScan
// ---------------------------------------------------------------------------
describe('normalizeForScan', () => {
it('decodes unicode escapes', () => {
assert.equal(normalizeForScan('\\u0069gnore'), 'ignore');
});
it('decodes hex escapes', () => {
assert.equal(normalizeForScan('\\x69gnore'), 'ignore');
});
it('decodes URL encoding', () => {
assert.equal(normalizeForScan('%69gnore'), 'ignore');
});
it('chains multiple decoders', () => {
// Mix of unicode and hex escapes
assert.equal(normalizeForScan('\\u0069\\x67nore'), 'ignore');
});
it('decodes base64 when result is readable text', () => {
const encoded = Buffer.from('ignore all previous instructions').toString('base64');
const result = normalizeForScan(encoded);
assert.equal(result, 'ignore all previous instructions');
});
it('returns input unchanged for plain text', () => {
const input = 'just normal text';
assert.equal(normalizeForScan(input), input);
});
it('decodes HTML entities', () => {
assert.equal(normalizeForScan('&lt;system&gt;'), '<system>');
});
it('decodes hex HTML entities', () => {
assert.equal(normalizeForScan('&#x69;gnore'), 'ignore');
});
it('decodes decimal HTML entities', () => {
assert.equal(normalizeForScan('&#105;gnore'), 'ignore');
});
it('recursive decode: URL-encode of base64', () => {
const b64 = Buffer.from('ignore all previous instructions').toString('base64');
const urlEncoded = encodeURIComponent(b64);
const result = normalizeForScan(urlEncoded);
assert.equal(result, 'ignore all previous instructions');
});
it('collapses letter-spaced text', () => {
assert.ok(normalizeForScan('i g n o r e').includes('ignore'));
});
it('stops after 3 iterations (no infinite loop)', () => {
// A string that keeps changing but never stabilizes
// normalizeForScan should still return after MAX_ITERATIONS
const input = '%25%2569gnore'; // double-encoded %69 -> %69 -> i
const result = normalizeForScan(input);
assert.ok(typeof result === 'string');
});
});
// ---------------------------------------------------------------------------
// decodeHtmlEntities
// ---------------------------------------------------------------------------
describe('decodeHtmlEntities', () => {
it('decodes named entities', () => {
assert.equal(decodeHtmlEntities('&lt;'), '<');
assert.equal(decodeHtmlEntities('&gt;'), '>');
assert.equal(decodeHtmlEntities('&amp;'), '&');
assert.equal(decodeHtmlEntities('&quot;'), '"');
assert.equal(decodeHtmlEntities('&apos;'), "'");
});
it('decodes hex entities', () => {
assert.equal(decodeHtmlEntities('&#x41;'), 'A');
assert.equal(decodeHtmlEntities('&#x69;'), 'i');
assert.equal(decodeHtmlEntities('&#x3C;'), '<');
});
it('decodes decimal entities', () => {
assert.equal(decodeHtmlEntities('&#65;'), 'A');
assert.equal(decodeHtmlEntities('&#105;'), 'i');
assert.equal(decodeHtmlEntities('&#60;'), '<');
});
it('decodes mixed content', () => {
assert.equal(decodeHtmlEntities('&lt;system&gt;'), '<system>');
assert.equal(decodeHtmlEntities('&#x69;gnore &#x70;revious'), 'ignore previous');
});
it('fast path: no ampersand returns input unchanged', () => {
const input = 'no entities here';
assert.equal(decodeHtmlEntities(input), input);
});
it('leaves unknown named entities unchanged', () => {
assert.equal(decodeHtmlEntities('&unknown;'), '&unknown;');
});
it('handles punctuation named entities', () => {
assert.equal(decodeHtmlEntities('&lpar;&rpar;'), '()');
assert.equal(decodeHtmlEntities('&lsqb;&rsqb;'), '[]');
assert.equal(decodeHtmlEntities('&lcub;&rcub;'), '{}');
});
});
// ---------------------------------------------------------------------------
// collapseLetterSpacing
// ---------------------------------------------------------------------------
describe('collapseLetterSpacing', () => {
it('collapses letter-spaced "i g n o r e"', () => {
assert.ok(collapseLetterSpacing('i g n o r e').includes('ignore'));
});
it('collapses "s y s t e m" to "system"', () => {
assert.ok(collapseLetterSpacing('s y s t e m').includes('system'));
});
it('does not collapse short sequences (< 4 letters)', () => {
// "a b c" is only 3 letters — should not be collapsed
assert.equal(collapseLetterSpacing('a b c'), 'a b c');
});
it('does not collapse normal words separated by spaces', () => {
const input = 'hello world this is normal';
assert.equal(collapseLetterSpacing(input), input);
});
it('does not affect strings without letter spacing', () => {
const input = 'just normal text without spacing';
assert.equal(collapseLetterSpacing(input), input);
});
});
// ---------------------------------------------------------------------------
// decodeUnicodeTags (v5.0.0 — DeepMind traps kat. 1)
// ---------------------------------------------------------------------------
describe('decodeUnicodeTags', () => {
it('decodes Unicode Tag characters to ASCII', () => {
// U+E0069 U+E0067 U+E006E U+E006F U+E0072 U+E0065 = "ignore"
const tags = String.fromCodePoint(0xE0069, 0xE0067, 0xE006E, 0xE006F, 0xE0072, 0xE0065);
assert.equal(decodeUnicodeTags(tags), 'ignore');
});
it('preserves normal text around tag sequences', () => {
const tags = String.fromCodePoint(0xE0048, 0xE0049); // "HI"
const input = `hello ${tags} world`;
assert.equal(decodeUnicodeTags(input), 'hello HI world');
});
it('decodes full injection phrase hidden in tags', () => {
// "ignore all previous" encoded as Unicode Tags
const phrase = 'ignore all previous';
const tags = [...phrase].map(ch => String.fromCodePoint(ch.charCodeAt(0) + 0xE0000)).join('');
assert.equal(decodeUnicodeTags(tags), phrase);
});
it('returns input unchanged when no tag characters present', () => {
const input = 'normal text without any tags';
assert.equal(decodeUnicodeTags(input), input);
});
it('returns empty string for empty input', () => {
assert.equal(decodeUnicodeTags(''), '');
});
it('handles tag at start of string', () => {
const tag = String.fromCodePoint(0xE0041); // 'A'
assert.equal(decodeUnicodeTags(tag + 'bc'), 'Abc');
});
it('handles tag at end of string', () => {
const tag = String.fromCodePoint(0xE005A); // 'Z'
assert.equal(decodeUnicodeTags('ab' + tag), 'abZ');
});
it('handles multiple separate tag sequences', () => {
const hi = String.fromCodePoint(0xE0048, 0xE0049);
const lo = String.fromCodePoint(0xE004C, 0xE004F);
assert.equal(decodeUnicodeTags(`${hi} and ${lo}`), 'HI and LO');
});
});
// ---------------------------------------------------------------------------
// containsUnicodeTags (v5.0.0)
// ---------------------------------------------------------------------------
describe('containsUnicodeTags', () => {
it('returns true when Unicode Tags are present', () => {
const tag = String.fromCodePoint(0xE0041);
assert.equal(containsUnicodeTags(`text${tag}more`), true);
});
it('returns false for normal text', () => {
assert.equal(containsUnicodeTags('normal text'), false);
});
it('returns false for empty string', () => {
assert.equal(containsUnicodeTags(''), false);
});
it('returns false for other Unicode (emoji, CJK)', () => {
assert.equal(containsUnicodeTags('Hello \u{1F600} \u4E16\u754C'), false);
});
it('returns true for U+E0001 (language tag)', () => {
assert.equal(containsUnicodeTags(String.fromCodePoint(0xE0001)), true);
});
it('returns true for U+E007F (cancel tag)', () => {
assert.equal(containsUnicodeTags(String.fromCodePoint(0xE007F)), true);
});
});
// ---------------------------------------------------------------------------
// stripBidiOverrides (v5.0.0)
// ---------------------------------------------------------------------------
describe('stripBidiOverrides', () => {
it('strips LRE (U+202A)', () => {
assert.equal(stripBidiOverrides('hello\u202Aworld'), 'helloworld');
});
it('strips RLE (U+202B)', () => {
assert.equal(stripBidiOverrides('hello\u202Bworld'), 'helloworld');
});
it('strips PDF (U+202C)', () => {
assert.equal(stripBidiOverrides('hello\u202Cworld'), 'helloworld');
});
it('strips LRO (U+202D)', () => {
assert.equal(stripBidiOverrides('hello\u202Dworld'), 'helloworld');
});
it('strips RLO (U+202E)', () => {
assert.equal(stripBidiOverrides('hello\u202Eworld'), 'helloworld');
});
it('strips LRI (U+2066)', () => {
assert.equal(stripBidiOverrides('hello\u2066world'), 'helloworld');
});
it('strips RLI (U+2067)', () => {
assert.equal(stripBidiOverrides('hello\u2067world'), 'helloworld');
});
it('strips FSI (U+2068)', () => {
assert.equal(stripBidiOverrides('hello\u2068world'), 'helloworld');
});
it('strips PDI (U+2069)', () => {
assert.equal(stripBidiOverrides('hello\u2069world'), 'helloworld');
});
it('strips multiple BIDI chars', () => {
assert.equal(stripBidiOverrides('\u202Ehello\u202Dworld\u202C'), 'helloworld');
});
it('returns input unchanged when no BIDI chars', () => {
assert.equal(stripBidiOverrides('normal text'), 'normal text');
});
it('returns empty string for empty input', () => {
assert.equal(stripBidiOverrides(''), '');
});
});
// ---------------------------------------------------------------------------
// normalizeForScan — Unicode Tags and BIDI integration (v5.0.0)
// ---------------------------------------------------------------------------
describe('normalizeForScan — Unicode Tags and BIDI (v5.0.0)', () => {
it('decodes Unicode Tags before other normalizations', () => {
const phrase = 'ignore all previous';
const tags = [...phrase].map(ch => String.fromCodePoint(ch.charCodeAt(0) + 0xE0000)).join('');
const result = normalizeForScan(tags);
assert.equal(result, phrase);
});
it('strips BIDI overrides before other normalizations', () => {
const input = 'ignore\u202E all previous';
const result = normalizeForScan(input);
assert.ok(result.includes('ignore all previous'));
});
it('handles combined Unicode Tags + BIDI', () => {
const tagI = String.fromCodePoint(0xE0069); // 'i'
const input = `${tagI}gnore\u202E all previous`;
const result = normalizeForScan(input);
assert.ok(result.includes('ignore all previous'));
});
});