test(ultraplan-local): add ngram-overlap node:test suite
This commit is contained in:
parent
4d541418ba
commit
491711119a
1 changed files with 281 additions and 0 deletions
281
plugins/ultraplan-local/scripts/ngram-overlap.test.mjs
Normal file
281
plugins/ultraplan-local/scripts/ngram-overlap.test.mjs
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
// node:test suite for scripts/ngram-overlap.mjs
|
||||
//
|
||||
// Run: node --test scripts/ngram-overlap.test.mjs
|
||||
//
|
||||
// Covers: identical text, disjoint text, partial overlap bands,
|
||||
// longest-run override, fenced-code stripping, short-source fallback,
|
||||
// markdown-emphasis stripping, fixture integration.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import { existsSync } from 'node:fs';
|
||||
import { analyze, tokenize, shingles, overlap, verdict, stripMarkdown } from './ngram-overlap.mjs';
|
||||
|
||||
// === Fixtures (inline prose to control word counts and overlap) ===
|
||||
|
||||
// 600+ word source on a generic technical topic (Claude Code hooks).
|
||||
// Reused across multiple tests with different drafts.
|
||||
const SOURCE_LONG = (() => {
|
||||
const sentences = [
|
||||
'Hooks in Claude Code allow you to intercept events emitted by the agent runtime',
|
||||
'These events fire at specific lifecycle points such as before a tool call runs',
|
||||
'or after the agent completes a turn or when a session starts up for the first time',
|
||||
'A hook is configured by adding an entry to the settings file under the hooks key',
|
||||
'Each hook entry binds a matcher pattern to a shell command that the runtime executes',
|
||||
'The matcher uses simple glob syntax to select which tool calls trigger the hook',
|
||||
'When a tool call matches the pattern the hook runs synchronously before the call proceeds',
|
||||
'A non-zero exit code from a hook script blocks the underlying tool call entirely',
|
||||
'This blocking behavior makes hooks useful for security policy enforcement and audit logging',
|
||||
'For example a pre-bash-executor hook can scan command strings against a denylist',
|
||||
'Hooks receive structured JSON input on standard input describing the event payload',
|
||||
'The schema includes the tool name the parameters and the working directory among other fields',
|
||||
'Hooks can emit JSON output on standard output to add additional context for the model',
|
||||
'Output is appended to the conversation as a system message before the next turn begins',
|
||||
'Plugin hooks live inside the plugin directory and apply only when the plugin is enabled',
|
||||
'User hooks live in the home directory under dot claude and apply across every project',
|
||||
'Project hooks live in the project root and apply only when working in that project',
|
||||
'Conflicts between hook layers resolve in a documented precedence order favoring user settings',
|
||||
'Hooks are written as plain executable scripts in any language that the system can run',
|
||||
'Common languages include shell python and node although any executable will work fine',
|
||||
'Best practice is to keep hooks fast and deterministic so they do not slow down the agent',
|
||||
'Slow hooks add latency to every tool call which compounds across long agent turns',
|
||||
'Hook scripts should also avoid making destructive changes during their execution',
|
||||
'Read-only checks fail safely while write operations from hooks are very hard to debug',
|
||||
'Testing hooks is straightforward by invoking them directly with the same input json',
|
||||
'Capture the output and exit code and verify they match the expected values',
|
||||
'Document hook behavior in the project readme so other contributors understand the constraints',
|
||||
'Hook misconfigurations often manifest as mysterious blocked tool calls during normal use',
|
||||
'Always include a clear error message in stderr when a hook intentionally blocks a call',
|
||||
'This makes debugging easier when the user wonders why their command did not run',
|
||||
'When designing a hook you should think first about what event you actually need to intercept',
|
||||
'Pre-tool-use events fire before any tool runs and can block dangerous operations early',
|
||||
'Post-tool-use events fire after a tool returns and can log results or trigger follow-up actions',
|
||||
'Session-start events fire when the agent begins a new conversation in a fresh context window',
|
||||
'Session-end events fire when the user closes the session and are useful for cleanup tasks',
|
||||
'Stop events fire whenever the agent finishes generating a response and yields back to the user',
|
||||
'Compaction events fire when the conversation history grows too large and must be summarized',
|
||||
'Each event type passes a different payload shape so you must read the schema documentation carefully',
|
||||
'A common pattern is to write a small dispatcher hook that routes events to language-specific handlers',
|
||||
'The dispatcher pattern keeps individual handlers simple and lets you add new ones without rewriting glue code',
|
||||
'Avoid putting business logic directly in the dispatcher because it becomes a bottleneck for testing',
|
||||
'Instead keep the dispatcher pure and delegate all real work to small focused single-purpose handler scripts',
|
||||
'Hook timeouts matter because slow handlers block the agent indefinitely until they return or error out',
|
||||
'Set a strict timeout in your handler implementation rather than relying on the runtime to kill it',
|
||||
'Use exit code two for hard errors and exit code zero for normal pass-through with no policy violation',
|
||||
'Reserve exit code one for soft warnings that should appear in the conversation but not block execution',
|
||||
];
|
||||
return sentences.join('. ') + '.';
|
||||
})();
|
||||
|
||||
const wordCount = (s) => (s.match(/[\p{L}\p{N}]+/gu) || []).length;
|
||||
|
||||
// === Unit tests on pure functions ===
|
||||
|
||||
test('tokenize: lowercases and splits on word boundaries', () => {
|
||||
const tokens = tokenize('Hello, World! Foo-bar.');
|
||||
assert.deepEqual(tokens, ['hello', 'world', 'foo', 'bar']);
|
||||
});
|
||||
|
||||
test('tokenize: NFKC normalizes', () => {
|
||||
// Full-width digits normalize to ASCII
|
||||
const tokens = tokenize('café 123');
|
||||
assert.deepEqual(tokens, ['café', '123']);
|
||||
});
|
||||
|
||||
test('shingles: returns empty when input shorter than n', () => {
|
||||
assert.deepEqual(shingles(['a', 'b', 'c'], 5), []);
|
||||
});
|
||||
|
||||
test('shingles: returns sliding window of size n', () => {
|
||||
const result = shingles(['a', 'b', 'c', 'd', 'e'], 3);
|
||||
assert.deepEqual(result, ['a b c', 'b c d', 'c d e']);
|
||||
});
|
||||
|
||||
test('stripMarkdown: removes fenced code blocks', () => {
|
||||
const input = 'Before\n```js\nconst x = 1;\n```\nAfter';
|
||||
const stripped = stripMarkdown(input);
|
||||
assert.ok(!stripped.includes('const x'));
|
||||
assert.ok(stripped.includes('Before'));
|
||||
assert.ok(stripped.includes('After'));
|
||||
});
|
||||
|
||||
test('stripMarkdown: removes inline code', () => {
|
||||
const stripped = stripMarkdown('Use `npm install` to set up.');
|
||||
assert.ok(!stripped.includes('npm install'));
|
||||
});
|
||||
|
||||
test('stripMarkdown: removes heading markers but keeps text', () => {
|
||||
const stripped = stripMarkdown('# Title\nBody');
|
||||
assert.ok(!stripped.includes('#'));
|
||||
assert.ok(stripped.includes('Title'));
|
||||
});
|
||||
|
||||
test('stripMarkdown: removes emphasis markers', () => {
|
||||
const stripped = stripMarkdown('This **is bold** and *italic* and ~~strike~~');
|
||||
assert.ok(!stripped.includes('**'));
|
||||
assert.ok(!stripped.includes('~~'));
|
||||
assert.ok(stripped.includes('is bold'));
|
||||
assert.ok(stripped.includes('italic'));
|
||||
});
|
||||
|
||||
test('stripMarkdown: links keep text only', () => {
|
||||
const stripped = stripMarkdown('See [docs](https://example.com) for info.');
|
||||
assert.ok(!stripped.includes('https'));
|
||||
assert.ok(stripped.includes('docs'));
|
||||
});
|
||||
|
||||
test('stripMarkdown: removes YAML frontmatter at start', () => {
|
||||
const input = '---\nname: foo\n---\nBody text here';
|
||||
const stripped = stripMarkdown(input);
|
||||
assert.ok(!stripped.includes('name: foo'));
|
||||
assert.ok(stripped.includes('Body text here'));
|
||||
});
|
||||
|
||||
// === Overlap behavior ===
|
||||
|
||||
test('overlap: identical token streams give containment 1.0', () => {
|
||||
const tokens = tokenize(SOURCE_LONG);
|
||||
const m = overlap(tokens, tokens, 5);
|
||||
assert.equal(m.containment, 1);
|
||||
assert.ok(m.longestRun > 15);
|
||||
});
|
||||
|
||||
test('overlap: completely disjoint streams give containment 0', () => {
|
||||
const a = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 'iota', 'kappa'];
|
||||
const b = ['xray', 'yankee', 'zulu', 'whiskey', 'victor', 'uniform', 'tango', 'sierra', 'romeo', 'quebec'];
|
||||
const m = overlap(a, b, 5);
|
||||
assert.equal(m.containment, 0);
|
||||
assert.equal(m.longestRun, 0);
|
||||
});
|
||||
|
||||
// === Verdict bands ===
|
||||
|
||||
test('verdict 1: identical text → rejected (containment 1.0)', () => {
|
||||
const result = analyze(SOURCE_LONG, SOURCE_LONG);
|
||||
assert.equal(result.verdict, 'rejected');
|
||||
assert.equal(result.containment, 1);
|
||||
});
|
||||
|
||||
test('verdict 2: completely disjoint text → accepted (low containment, low run)', () => {
|
||||
// Build a draft of unrelated words ≥300 to skip too-short fallback
|
||||
const draftWords = [];
|
||||
for (let i = 0; i < 350; i++) {
|
||||
draftWords.push(`uniqueword${i}`);
|
||||
}
|
||||
const draft = draftWords.join(' ');
|
||||
const result = analyze(draft, SOURCE_LONG);
|
||||
assert.equal(result.verdict, 'accepted');
|
||||
assert.equal(result.containment, 0);
|
||||
assert.equal(result.longestRun, 0);
|
||||
});
|
||||
|
||||
test('verdict 3: partial overlap (mid-band) → needs-review', () => {
|
||||
// Construct draft where ~25% of 5-grams match source but no run is long.
|
||||
// Strategy: alternate 6-token source chunks with 2-token padding. Each
|
||||
// chunk yields exactly 2 source 5-grams (longestRun = 2). Need both
|
||||
// draft and source ≥500 tokens to keep shingleSize=5 (no fallback).
|
||||
// 65 chunks × 8 = 520 draft tokens; SOURCE_LONG is ~600 tokens.
|
||||
const sourceTokens = tokenize(SOURCE_LONG);
|
||||
const draftWords = [];
|
||||
let pad = 0;
|
||||
for (let i = 0; i < 65; i++) {
|
||||
draftWords.push(...sourceTokens.slice(i * 6, i * 6 + 6));
|
||||
draftWords.push(`padword${pad++}`, `padword${pad++}`);
|
||||
}
|
||||
const draft = draftWords.join(' ');
|
||||
const result = analyze(draft, SOURCE_LONG);
|
||||
assert.equal(result.shingleSize, 5,
|
||||
`precondition: expected shingleSize=5 (no fallback), got ${result.shingleSize}`);
|
||||
assert.equal(result.verdict, 'needs-review',
|
||||
`expected needs-review, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
});
|
||||
|
||||
test('verdict 4: high overlap → rejected (containment ≥0.35)', () => {
|
||||
// Draft is 60% source + 40% padding
|
||||
const sourceTokens = tokenize(SOURCE_LONG);
|
||||
const sourcePart = sourceTokens.slice(0, 200);
|
||||
const padding = [];
|
||||
for (let i = 0; i < 130; i++) padding.push(`pad${i}`);
|
||||
const draft = sourcePart.concat(padding).join(' ');
|
||||
const result = analyze(draft, SOURCE_LONG);
|
||||
assert.equal(result.verdict, 'rejected',
|
||||
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
});
|
||||
|
||||
test('verdict 5: long verbatim run triggers rejection even with low containment', () => {
|
||||
// Mostly unique words (low containment) but one 25-word verbatim sentence
|
||||
// from source — longestRun ≥15 should reject.
|
||||
const verbatim = tokenize(SOURCE_LONG).slice(50, 75).join(' ');
|
||||
const padding = [];
|
||||
for (let i = 0; i < 500; i++) padding.push(`unique${i}`);
|
||||
const draft = padding.slice(0, 250).join(' ') + ' ' + verbatim + ' ' + padding.slice(250).join(' ');
|
||||
const result = analyze(draft, SOURCE_LONG);
|
||||
assert.equal(result.verdict, 'rejected',
|
||||
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
assert.ok(result.longestRun >= 15, `longestRun ${result.longestRun} should be ≥15`);
|
||||
});
|
||||
|
||||
test('verdict 6: fenced code block in source → stripped → not counted as match', () => {
|
||||
const draftBody = [];
|
||||
for (let i = 0; i < 350; i++) draftBody.push(`uniq${i}`);
|
||||
const draft = draftBody.join(' ');
|
||||
// Source with a fenced code block containing some of the draft's words
|
||||
const sourceWithCode = SOURCE_LONG + '\n```\n' + draftBody.slice(0, 100).join(' ') + '\n```\n';
|
||||
const result = analyze(draft, sourceWithCode);
|
||||
// The code-block words should be stripped from source, so the draft remains disjoint
|
||||
assert.equal(result.containment, 0,
|
||||
`code-block words should be stripped (got containment ${result.containment})`);
|
||||
});
|
||||
|
||||
test('verdict 7: short draft (<300 words) → needs-review with too-short reason', () => {
|
||||
const draft = 'This is a short note. It has fewer than three hundred words. Just a quick sketch.';
|
||||
const result = analyze(draft, SOURCE_LONG);
|
||||
assert.equal(result.verdict, 'needs-review');
|
||||
assert.equal(result.reason, 'too-short-to-score');
|
||||
});
|
||||
|
||||
test('verdict 8: markdown emphasis is stripped before tokenization', () => {
|
||||
// Build a draft of unique tokens then wrap parts in **bold** and *italic*
|
||||
const baseWords = [];
|
||||
for (let i = 0; i < 350; i++) baseWords.push(`tok${i}`);
|
||||
const plain = baseWords.join(' ');
|
||||
const wrapped = baseWords
|
||||
.map((w, i) => (i % 5 === 0 ? `**${w}**` : i % 7 === 0 ? `*${w}*` : w))
|
||||
.join(' ');
|
||||
const plainResult = analyze(plain, SOURCE_LONG);
|
||||
const wrappedResult = analyze(wrapped, SOURCE_LONG);
|
||||
// After stripping, both should yield the same containment / longestRun
|
||||
assert.equal(plainResult.containment, wrappedResult.containment,
|
||||
'markdown emphasis should not change containment after stripping');
|
||||
assert.equal(plainResult.longestRun, wrappedResult.longestRun,
|
||||
'markdown emphasis should not change longestRun after stripping');
|
||||
});
|
||||
|
||||
// === Integration: fixtures (Step 5 will create these; skip if missing) ===
|
||||
|
||||
const FIXTURE_DIR = 'tests/fixtures/skill-factory';
|
||||
const SCRIPT = 'scripts/ngram-overlap.mjs';
|
||||
|
||||
function runCli(draft, source) {
|
||||
const out = execFileSync('node', [SCRIPT, draft, source], { encoding: 'utf8' });
|
||||
return JSON.parse(out);
|
||||
}
|
||||
|
||||
test('integration: accepted fixture pair → verdict accepted', { skip: !existsSync(`${FIXTURE_DIR}/draft-accepted.md`) }, () => {
|
||||
const result = runCli(`${FIXTURE_DIR}/draft-accepted.md`, `${FIXTURE_DIR}/source-accepted.md`);
|
||||
assert.equal(result.verdict, 'accepted',
|
||||
`expected accepted, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
});
|
||||
|
||||
test('integration: needs-review fixture pair → verdict needs-review', { skip: !existsSync(`${FIXTURE_DIR}/draft-needs-review.md`) }, () => {
|
||||
const result = runCli(`${FIXTURE_DIR}/draft-needs-review.md`, `${FIXTURE_DIR}/source-needs-review.md`);
|
||||
assert.equal(result.verdict, 'needs-review',
|
||||
`expected needs-review, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
});
|
||||
|
||||
test('integration: rejected fixture pair → verdict rejected', { skip: !existsSync(`${FIXTURE_DIR}/draft-rejected.md`) }, () => {
|
||||
const result = runCli(`${FIXTURE_DIR}/draft-rejected.md`, `${FIXTURE_DIR}/source-rejected.md`);
|
||||
assert.equal(result.verdict, 'rejected',
|
||||
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue