ktg-plugin-marketplace/plugins/ultraplan-local/scripts/ngram-overlap.test.mjs
2026-04-18 15:08:41 +02:00

281 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// node:test suite for scripts/ngram-overlap.mjs
//
// Run: node --test scripts/ngram-overlap.test.mjs
//
// Covers: identical text, disjoint text, partial overlap bands,
// longest-run override, fenced-code stripping, short-source fallback,
// markdown-emphasis stripping, fixture integration.
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { execFileSync } from 'node:child_process';
import { existsSync } from 'node:fs';
import { analyze, tokenize, shingles, overlap, verdict, stripMarkdown } from './ngram-overlap.mjs';
// === Fixtures (inline prose to control word counts and overlap) ===
// 600+ word source on a generic technical topic (Claude Code hooks).
// Reused across multiple tests with different drafts.
const SOURCE_LONG = (() => {
const sentences = [
'Hooks in Claude Code allow you to intercept events emitted by the agent runtime',
'These events fire at specific lifecycle points such as before a tool call runs',
'or after the agent completes a turn or when a session starts up for the first time',
'A hook is configured by adding an entry to the settings file under the hooks key',
'Each hook entry binds a matcher pattern to a shell command that the runtime executes',
'The matcher uses simple glob syntax to select which tool calls trigger the hook',
'When a tool call matches the pattern the hook runs synchronously before the call proceeds',
'A non-zero exit code from a hook script blocks the underlying tool call entirely',
'This blocking behavior makes hooks useful for security policy enforcement and audit logging',
'For example a pre-bash-executor hook can scan command strings against a denylist',
'Hooks receive structured JSON input on standard input describing the event payload',
'The schema includes the tool name the parameters and the working directory among other fields',
'Hooks can emit JSON output on standard output to add additional context for the model',
'Output is appended to the conversation as a system message before the next turn begins',
'Plugin hooks live inside the plugin directory and apply only when the plugin is enabled',
'User hooks live in the home directory under dot claude and apply across every project',
'Project hooks live in the project root and apply only when working in that project',
'Conflicts between hook layers resolve in a documented precedence order favoring user settings',
'Hooks are written as plain executable scripts in any language that the system can run',
'Common languages include shell python and node although any executable will work fine',
'Best practice is to keep hooks fast and deterministic so they do not slow down the agent',
'Slow hooks add latency to every tool call which compounds across long agent turns',
'Hook scripts should also avoid making destructive changes during their execution',
'Read-only checks fail safely while write operations from hooks are very hard to debug',
'Testing hooks is straightforward by invoking them directly with the same input json',
'Capture the output and exit code and verify they match the expected values',
'Document hook behavior in the project readme so other contributors understand the constraints',
'Hook misconfigurations often manifest as mysterious blocked tool calls during normal use',
'Always include a clear error message in stderr when a hook intentionally blocks a call',
'This makes debugging easier when the user wonders why their command did not run',
'When designing a hook you should think first about what event you actually need to intercept',
'Pre-tool-use events fire before any tool runs and can block dangerous operations early',
'Post-tool-use events fire after a tool returns and can log results or trigger follow-up actions',
'Session-start events fire when the agent begins a new conversation in a fresh context window',
'Session-end events fire when the user closes the session and are useful for cleanup tasks',
'Stop events fire whenever the agent finishes generating a response and yields back to the user',
'Compaction events fire when the conversation history grows too large and must be summarized',
'Each event type passes a different payload shape so you must read the schema documentation carefully',
'A common pattern is to write a small dispatcher hook that routes events to language-specific handlers',
'The dispatcher pattern keeps individual handlers simple and lets you add new ones without rewriting glue code',
'Avoid putting business logic directly in the dispatcher because it becomes a bottleneck for testing',
'Instead keep the dispatcher pure and delegate all real work to small focused single-purpose handler scripts',
'Hook timeouts matter because slow handlers block the agent indefinitely until they return or error out',
'Set a strict timeout in your handler implementation rather than relying on the runtime to kill it',
'Use exit code two for hard errors and exit code zero for normal pass-through with no policy violation',
'Reserve exit code one for soft warnings that should appear in the conversation but not block execution',
];
return sentences.join('. ') + '.';
})();
const wordCount = (s) => (s.match(/[\p{L}\p{N}]+/gu) || []).length;
// === Unit tests on pure functions ===
test('tokenize: lowercases and splits on word boundaries', () => {
const tokens = tokenize('Hello, World! Foo-bar.');
assert.deepEqual(tokens, ['hello', 'world', 'foo', 'bar']);
});
test('tokenize: NFKC normalizes', () => {
// Full-width digits normalize to ASCII
const tokens = tokenize('café 123');
assert.deepEqual(tokens, ['café', '123']);
});
test('shingles: returns empty when input shorter than n', () => {
assert.deepEqual(shingles(['a', 'b', 'c'], 5), []);
});
test('shingles: returns sliding window of size n', () => {
const result = shingles(['a', 'b', 'c', 'd', 'e'], 3);
assert.deepEqual(result, ['a b c', 'b c d', 'c d e']);
});
test('stripMarkdown: removes fenced code blocks', () => {
const input = 'Before\n```js\nconst x = 1;\n```\nAfter';
const stripped = stripMarkdown(input);
assert.ok(!stripped.includes('const x'));
assert.ok(stripped.includes('Before'));
assert.ok(stripped.includes('After'));
});
test('stripMarkdown: removes inline code', () => {
const stripped = stripMarkdown('Use `npm install` to set up.');
assert.ok(!stripped.includes('npm install'));
});
test('stripMarkdown: removes heading markers but keeps text', () => {
const stripped = stripMarkdown('# Title\nBody');
assert.ok(!stripped.includes('#'));
assert.ok(stripped.includes('Title'));
});
test('stripMarkdown: removes emphasis markers', () => {
const stripped = stripMarkdown('This **is bold** and *italic* and ~~strike~~');
assert.ok(!stripped.includes('**'));
assert.ok(!stripped.includes('~~'));
assert.ok(stripped.includes('is bold'));
assert.ok(stripped.includes('italic'));
});
test('stripMarkdown: links keep text only', () => {
const stripped = stripMarkdown('See [docs](https://example.com) for info.');
assert.ok(!stripped.includes('https'));
assert.ok(stripped.includes('docs'));
});
test('stripMarkdown: removes YAML frontmatter at start', () => {
const input = '---\nname: foo\n---\nBody text here';
const stripped = stripMarkdown(input);
assert.ok(!stripped.includes('name: foo'));
assert.ok(stripped.includes('Body text here'));
});
// === Overlap behavior ===
test('overlap: identical token streams give containment 1.0', () => {
const tokens = tokenize(SOURCE_LONG);
const m = overlap(tokens, tokens, 5);
assert.equal(m.containment, 1);
assert.ok(m.longestRun > 15);
});
test('overlap: completely disjoint streams give containment 0', () => {
const a = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 'iota', 'kappa'];
const b = ['xray', 'yankee', 'zulu', 'whiskey', 'victor', 'uniform', 'tango', 'sierra', 'romeo', 'quebec'];
const m = overlap(a, b, 5);
assert.equal(m.containment, 0);
assert.equal(m.longestRun, 0);
});
// === Verdict bands ===
test('verdict 1: identical text → rejected (containment 1.0)', () => {
const result = analyze(SOURCE_LONG, SOURCE_LONG);
assert.equal(result.verdict, 'rejected');
assert.equal(result.containment, 1);
});
test('verdict 2: completely disjoint text → accepted (low containment, low run)', () => {
// Build a draft of unrelated words ≥300 to skip too-short fallback
const draftWords = [];
for (let i = 0; i < 350; i++) {
draftWords.push(`uniqueword${i}`);
}
const draft = draftWords.join(' ');
const result = analyze(draft, SOURCE_LONG);
assert.equal(result.verdict, 'accepted');
assert.equal(result.containment, 0);
assert.equal(result.longestRun, 0);
});
test('verdict 3: partial overlap (mid-band) → needs-review', () => {
// Construct draft where ~25% of 5-grams match source but no run is long.
// Strategy: alternate 6-token source chunks with 2-token padding. Each
// chunk yields exactly 2 source 5-grams (longestRun = 2). Need both
// draft and source ≥500 tokens to keep shingleSize=5 (no fallback).
// 65 chunks × 8 = 520 draft tokens; SOURCE_LONG is ~600 tokens.
const sourceTokens = tokenize(SOURCE_LONG);
const draftWords = [];
let pad = 0;
for (let i = 0; i < 65; i++) {
draftWords.push(...sourceTokens.slice(i * 6, i * 6 + 6));
draftWords.push(`padword${pad++}`, `padword${pad++}`);
}
const draft = draftWords.join(' ');
const result = analyze(draft, SOURCE_LONG);
assert.equal(result.shingleSize, 5,
`precondition: expected shingleSize=5 (no fallback), got ${result.shingleSize}`);
assert.equal(result.verdict, 'needs-review',
`expected needs-review, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
});
test('verdict 4: high overlap → rejected (containment ≥0.35)', () => {
// Draft is 60% source + 40% padding
const sourceTokens = tokenize(SOURCE_LONG);
const sourcePart = sourceTokens.slice(0, 200);
const padding = [];
for (let i = 0; i < 130; i++) padding.push(`pad${i}`);
const draft = sourcePart.concat(padding).join(' ');
const result = analyze(draft, SOURCE_LONG);
assert.equal(result.verdict, 'rejected',
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
});
test('verdict 5: long verbatim run triggers rejection even with low containment', () => {
// Mostly unique words (low containment) but one 25-word verbatim sentence
// from source — longestRun ≥15 should reject.
const verbatim = tokenize(SOURCE_LONG).slice(50, 75).join(' ');
const padding = [];
for (let i = 0; i < 500; i++) padding.push(`unique${i}`);
const draft = padding.slice(0, 250).join(' ') + ' ' + verbatim + ' ' + padding.slice(250).join(' ');
const result = analyze(draft, SOURCE_LONG);
assert.equal(result.verdict, 'rejected',
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
assert.ok(result.longestRun >= 15, `longestRun ${result.longestRun} should be ≥15`);
});
test('verdict 6: fenced code block in source → stripped → not counted as match', () => {
const draftBody = [];
for (let i = 0; i < 350; i++) draftBody.push(`uniq${i}`);
const draft = draftBody.join(' ');
// Source with a fenced code block containing some of the draft's words
const sourceWithCode = SOURCE_LONG + '\n```\n' + draftBody.slice(0, 100).join(' ') + '\n```\n';
const result = analyze(draft, sourceWithCode);
// The code-block words should be stripped from source, so the draft remains disjoint
assert.equal(result.containment, 0,
`code-block words should be stripped (got containment ${result.containment})`);
});
test('verdict 7: short draft (<300 words) → needs-review with too-short reason', () => {
const draft = 'This is a short note. It has fewer than three hundred words. Just a quick sketch.';
const result = analyze(draft, SOURCE_LONG);
assert.equal(result.verdict, 'needs-review');
assert.equal(result.reason, 'too-short-to-score');
});
test('verdict 8: markdown emphasis is stripped before tokenization', () => {
// Build a draft of unique tokens then wrap parts in **bold** and *italic*
const baseWords = [];
for (let i = 0; i < 350; i++) baseWords.push(`tok${i}`);
const plain = baseWords.join(' ');
const wrapped = baseWords
.map((w, i) => (i % 5 === 0 ? `**${w}**` : i % 7 === 0 ? `*${w}*` : w))
.join(' ');
const plainResult = analyze(plain, SOURCE_LONG);
const wrappedResult = analyze(wrapped, SOURCE_LONG);
// After stripping, both should yield the same containment / longestRun
assert.equal(plainResult.containment, wrappedResult.containment,
'markdown emphasis should not change containment after stripping');
assert.equal(plainResult.longestRun, wrappedResult.longestRun,
'markdown emphasis should not change longestRun after stripping');
});
// === Integration: fixtures (Step 5 will create these; skip if missing) ===
const FIXTURE_DIR = 'tests/fixtures/skill-factory';
const SCRIPT = 'scripts/ngram-overlap.mjs';
function runCli(draft, source) {
const out = execFileSync('node', [SCRIPT, draft, source], { encoding: 'utf8' });
return JSON.parse(out);
}
test('integration: accepted fixture pair → verdict accepted', { skip: !existsSync(`${FIXTURE_DIR}/draft-accepted.md`) }, () => {
const result = runCli(`${FIXTURE_DIR}/draft-accepted.md`, `${FIXTURE_DIR}/source-accepted.md`);
assert.equal(result.verdict, 'accepted',
`expected accepted, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
});
test('integration: needs-review fixture pair → verdict needs-review', { skip: !existsSync(`${FIXTURE_DIR}/draft-needs-review.md`) }, () => {
const result = runCli(`${FIXTURE_DIR}/draft-needs-review.md`, `${FIXTURE_DIR}/source-needs-review.md`);
assert.equal(result.verdict, 'needs-review',
`expected needs-review, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
});
test('integration: rejected fixture pair → verdict rejected', { skip: !existsSync(`${FIXTURE_DIR}/draft-rejected.md`) }, () => {
const result = runCli(`${FIXTURE_DIR}/draft-rejected.md`, `${FIXTURE_DIR}/source-rejected.md`);
assert.equal(result.verdict, 'rejected',
`expected rejected, got ${result.verdict} (containment=${result.containment.toFixed(3)}, longestRun=${result.longestRun})`);
});