// string-utils-tokens.test.mjs — B7 (v7.2.0) — tokenize + tokenOverlap helpers // // These helpers are used by dep-auditor and supply-chain-recheck to detect // typosquats with edit distance >= 3 that contain all tokens of a popular // package plus a suspicious suffix (e.g. `lodash-utils` vs `lodash`). // // Critical-review §2 B7 finding: pure Levenshtein <=2 misses the most common // modern typosquat pattern — popular-name + token-injection suffix. import { describe, it } from 'node:test'; import assert from 'node:assert/strict'; import { tokenize, tokenOverlap, TYPOSQUAT_SUSPICIOUS_TOKENS, } from '../../scanners/lib/string-utils.mjs'; describe('tokenize (B7)', () => { it('splits on hyphens', () => { assert.deepEqual(tokenize('lodash-utils'), ['lodash', 'utils']); }); it('splits on underscores', () => { assert.deepEqual(tokenize('react_helper'), ['react', 'helper']); }); it('splits on mixed separators', () => { assert.deepEqual(tokenize('foo-bar_baz'), ['foo', 'bar', 'baz']); }); it('lowercases tokens', () => { assert.deepEqual(tokenize('LODASH-Utils'), ['lodash', 'utils']); }); it('drops empty tokens from consecutive separators', () => { assert.deepEqual(tokenize('foo--bar'), ['foo', 'bar']); }); it('returns empty array for empty input', () => { assert.deepEqual(tokenize(''), []); assert.deepEqual(tokenize(null), []); assert.deepEqual(tokenize(undefined), []); }); it('returns single-element array for token with no separators', () => { assert.deepEqual(tokenize('lodash'), ['lodash']); }); }); describe('tokenOverlap (B7)', () => { it('returns 1.0 when popular tokens are subset of declared', () => { assert.equal(tokenOverlap('lodash-utils', 'lodash'), 1.0); assert.equal(tokenOverlap('react-router-dom', 'react'), 1.0); }); it('returns 1.0 for identical token sets', () => { assert.equal(tokenOverlap('lodash', 'lodash'), 1.0); }); it('returns 0.5 for half-overlap', () => { // 'react-helper' tokens = {react, helper}; 'react-router' tokens = {react, router} // intersection = {react}, min size = 2, overlap = 1/2 = 0.5 assert.equal(tokenOverlap('react-helper', 'react-router'), 0.5); }); it('returns 0 for disjoint tokens', () => { assert.equal(tokenOverlap('foo', 'bar'), 0); }); it('returns 0 for empty inputs', () => { assert.equal(tokenOverlap('', 'lodash'), 0); assert.equal(tokenOverlap('lodash', ''), 0); }); it('is symmetric for sets of same size', () => { const a = tokenOverlap('foo-bar', 'foo-baz'); const b = tokenOverlap('foo-baz', 'foo-bar'); assert.equal(a, b); }); }); describe('TYPOSQUAT_SUSPICIOUS_TOKENS (B7)', () => { it('contains common typosquat suffixes', () => { for (const t of ['utils', 'helper', 'core', 'plus', 'cli', 'wrapper']) { assert.ok( TYPOSQUAT_SUSPICIOUS_TOKENS.includes(t), `expected '${t}' in TYPOSQUAT_SUSPICIOUS_TOKENS`, ); } }); it('is frozen (cannot be mutated)', () => { assert.throws(() => { TYPOSQUAT_SUSPICIOUS_TOKENS.push('newtoken'); }); }); it('does NOT include legitimate short-name tools (allowlist intersection guard)', () => { // These are legitimate package names from the v7.0.0 typosquat allowlist. // The token-overlap heuristic would NEVER flag them as suspicious because // they don't decompose into multiple tokens that include a popular package. // But guard the suspicious-tokens list against accidentally including them. for (const t of ['knip', 'tsx', 'nx', 'uv', 'ruff', 'oxlint', 'rimraf']) { assert.ok( !TYPOSQUAT_SUSPICIOUS_TOKENS.includes(t), `'${t}' must not be in TYPOSQUAT_SUSPICIOUS_TOKENS — it is a legitimate tool`, ); } }); });