Critical-review §2 B7 finding: pure Levenshtein <=2 misses the most common
modern typosquat pattern — popular-name + token-injection suffix. Examples:
lodash → lodash-utils (edit distance 6, not flagged pre-B7)
react → react-helper (edit distance 7, not flagged pre-B7)
express → express-wrapper (edit distance 8, not flagged pre-B7)
Three coordinated edits:
scanners/lib/string-utils.mjs
- Adds tokenize(name): string[] splits on -/_, lowercases
- Adds tokenOverlap(a, b): number intersection.size / min(|a|,|b|)
- Adds TYPOSQUAT_SUSPICIOUS_TOKENS frozen list of common typosquat
suffixes. Excludes language-extension tokens (js, jsx, ts, tsx) — the
v7.0.0 allowlist contains `tsx` as a legit package and including the
same token in the suspicious set creates a contradiction. Caught by
the new allowlist-intersection-guard test. Also excludes 'pro'
(legitimate edition marker).
scanners/dep-auditor.mjs + scanners/supply-chain-recheck.mjs
- New checkTyposquatTokenOverlap() helper — fires AFTER Levenshtein 1/2
branches, only when:
1. popular package's tokens ⊆ declared name's tokens (strict superset)
2. declared name has at least one suspicious suffix
3. popular package is in topCutoff window
All three conditions required — conservative by design. Allowlist
precedence preserved (existing 22 npm + 13 PyPI entries always pass).
MEDIUM severity, NOT block. New finding title prefix:
"Possible typosquatting via token-overlap".
Tests: +21 cases across two new files
- tests/lib/string-utils-tokens.test.mjs (15) — tokenize, tokenOverlap,
TYPOSQUAT_SUSPICIOUS_TOKENS frozen contract, allowlist-intersection
guard (caught the tsx conflict on first run)
- tests/scanners/dep-token-overlap.test.mjs (7) — integration via
in-memory tmpdir fixtures: lodash-utils flagged, react-helper flagged,
express-wrapper flagged, lodash exact NOT flagged, allowlist tools
(knip/tsx/nx/rimraf) NOT flagged, react-router-dom (no suspicious
suffix) NOT flagged, react itself (equal token set, not superset)
NOT flagged.
Existing dep.test.mjs and supply-chain-recheck.test.mjs unchanged —
all green (149 → 149 regression guard).
Suite: 1570 → 1591 (+21). All green.
150 lines
5.8 KiB
JavaScript
150 lines
5.8 KiB
JavaScript
// dep-token-overlap.test.mjs — B7 (v7.2.0) — typosquat token-overlap integration
|
|
//
|
|
// Verifies that dep-auditor's checkTyposquatting now flags packages with
|
|
// edit distance >= 3 that contain all tokens of a popular package plus a
|
|
// suspicious suffix (e.g. `lodash-utils` vs `lodash`).
|
|
//
|
|
// Builds an in-memory fixture in tmpdir per test instead of mutating the
|
|
// shared `tests/fixtures/dep-test/` fixture (which other tests assert
|
|
// exact finding counts against).
|
|
|
|
import { describe, it, before, after, beforeEach } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { mkdtemp, writeFile, rm } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { resetCounter } from '../../scanners/lib/output.mjs';
|
|
import { scan } from '../../scanners/dep-auditor.mjs';
|
|
|
|
async function makeFixture(deps) {
|
|
const dir = await mkdtemp(join(tmpdir(), 'llm-security-dep-token-'));
|
|
await writeFile(
|
|
join(dir, 'package.json'),
|
|
JSON.stringify({ name: 'token-overlap-fixture', version: '1.0.0', dependencies: deps }, null, 2),
|
|
'utf8',
|
|
);
|
|
return dir;
|
|
}
|
|
|
|
describe('dep-auditor B7 — token-overlap typosquat heuristic', () => {
|
|
beforeEach(() => {
|
|
resetCounter();
|
|
});
|
|
|
|
it('flags lodash-utils as token-overlap typosquat of lodash', async () => {
|
|
const dir = await makeFixture({ 'lodash-utils': '^1.0.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const finding = result.findings.find(
|
|
f => f.title.toLowerCase().includes('token-overlap') &&
|
|
f.title.includes('lodash-utils') &&
|
|
f.title.includes('lodash')
|
|
);
|
|
assert.ok(
|
|
finding,
|
|
`expected token-overlap finding for lodash-utils. Got: ${result.findings.map(f => f.title).join('; ')}`,
|
|
);
|
|
assert.equal(finding.severity, 'medium');
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('flags react-helper as token-overlap typosquat of react', async () => {
|
|
const dir = await makeFixture({ 'react-helper': '^1.0.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const finding = result.findings.find(
|
|
f => f.title.toLowerCase().includes('token-overlap') &&
|
|
f.title.includes('react-helper')
|
|
);
|
|
assert.ok(finding, `expected react-helper to be flagged. Got: ${result.findings.map(f => f.title).join('; ')}`);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('flags express-wrapper as token-overlap typosquat of express', async () => {
|
|
const dir = await makeFixture({ 'express-wrapper': '^1.0.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const finding = result.findings.find(
|
|
f => f.title.toLowerCase().includes('token-overlap') &&
|
|
f.title.includes('express-wrapper')
|
|
);
|
|
assert.ok(finding, `expected express-wrapper to be flagged. Got: ${result.findings.map(f => f.title).join('; ')}`);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('does NOT flag legitimate package lodash (exact match)', async () => {
|
|
const dir = await makeFixture({ 'lodash': '^4.17.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const typosquatFindings = result.findings.filter(
|
|
f => f.title.toLowerCase().includes('typosquat')
|
|
);
|
|
assert.equal(typosquatFindings.length, 0, `lodash must not be flagged. Got: ${typosquatFindings.map(f => f.title).join('; ')}`);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('does NOT flag legitimate short-name tools from allowlist', async () => {
|
|
// These are in knowledge/typosquat-allowlist.json (22 npm + 5 PyPI from v7.0.0).
|
|
// The allowlist check fires BEFORE Levenshtein and BEFORE token-overlap.
|
|
const dir = await makeFixture({
|
|
'knip': '^5.0.0',
|
|
'tsx': '^4.0.0',
|
|
'nx': '^17.0.0',
|
|
'rimraf': '^5.0.0',
|
|
});
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const typosquatFindings = result.findings.filter(
|
|
f => f.title.toLowerCase().includes('typosquat')
|
|
);
|
|
assert.equal(
|
|
typosquatFindings.length, 0,
|
|
`allowlisted short-name tools must not be flagged. Got: ${typosquatFindings.map(f => f.title).join('; ')}`,
|
|
);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('does NOT flag packages with no suspicious suffix even with token overlap', async () => {
|
|
// `react-router-dom` contains `react` token + `router` + `dom` — but neither
|
|
// `router` nor `dom` is in TYPOSQUAT_SUSPICIOUS_TOKENS. This is a legitimate
|
|
// ecosystem name and must pass.
|
|
const dir = await makeFixture({ 'react-router-dom': '^6.0.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const tokenOverlapFindings = result.findings.filter(
|
|
f => f.title.toLowerCase().includes('token-overlap')
|
|
);
|
|
assert.equal(
|
|
tokenOverlapFindings.length, 0,
|
|
`react-router-dom must not be flagged as token-overlap typosquat`,
|
|
);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
it('does NOT flag packages whose all tokens match a popular package (subset, not superset)', async () => {
|
|
// `react` itself has tokens {react} which equal the popular `react` token set.
|
|
// The heuristic requires declared ⊃ popular (strict superset), not equal.
|
|
const dir = await makeFixture({ 'react': '^18.0.0' });
|
|
try {
|
|
const result = await scan(dir, { files: [] });
|
|
const tokenOverlapFindings = result.findings.filter(
|
|
f => f.title.toLowerCase().includes('token-overlap')
|
|
);
|
|
assert.equal(tokenOverlapFindings.length, 0);
|
|
} finally {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
});
|