feat(config-audit): add 'mcp' kind to estimateTokens (v5 F2)

Differentiate MCP servers from generic 'item' (flat 15) — they actually
cost 500+ tokens per turn for protocol metadata and tool schemas.

estimateTokens(bytes, 'mcp', {toolCount}) returns max of:
- 500 token floor (base overhead)
- ceil(bytes / 3.5) (json-rate when bytes known)
- 500 + toolCount * 200 (when tool count is detected; Step 14 wires this)

Caller-side migration in next commit (Step 5).

Tests: +4 cases for mcp kind.
This commit is contained in:
Kjell Tore Guttormsen 2026-05-01 06:21:30 +02:00
commit 48d560a209
2 changed files with 42 additions and 2 deletions

View file

@ -22,12 +22,27 @@ const SCHEMA_VERSION = '1.0.0';
* Estimate tokens for a given byte count and content kind.
* Deterministic heuristic see feature plan §4 for rationale.
*
* MCP (v5 F2): an active MCP server consumes a base overhead of ~500 tokens
* for protocol metadata + tool schemas, even before any tool is described.
* When tool count is known (Step 14 wires this up), we estimate ~200 tokens
* per tool description.
*
* @param {number} bytes - Byte count (or item count for kind='item')
* @param {'markdown'|'frontmatter'|'json'|'item'} kind
* @param {'markdown'|'frontmatter'|'json'|'item'|'mcp'} kind
* @param {{toolCount?: number}} [opts] - kind-specific options (mcp: toolCount)
* @returns {number} Integer token count (rounded up)
*/
export function estimateTokens(bytes, kind = 'markdown') {
export function estimateTokens(bytes, kind = 'markdown', opts = {}) {
if (kind === 'item') return 15;
if (kind === 'mcp') {
const base = 500;
const perTool = 200;
const toolCount = typeof opts.toolCount === 'number' && opts.toolCount > 0 ? opts.toolCount : 0;
const safeBytes = typeof bytes === 'number' && bytes > 0 && Number.isFinite(bytes) ? bytes : 0;
const fromBytes = Math.ceil(safeBytes / 3.5);
const fromTools = base + toolCount * perTool;
return Math.max(base, fromBytes, fromTools);
}
if (typeof bytes !== 'number' || bytes < 0 || !Number.isFinite(bytes)) return 0;
if (kind === 'frontmatter') {
const capped = Math.min(bytes, 600);

View file

@ -165,6 +165,31 @@ describe('estimateTokens', () => {
assert.equal(estimateTokens(-1, 'markdown'), 0);
assert.equal(estimateTokens(NaN, 'markdown'), 0);
});
// v5 F2: differentiated MCP estimate
it('mcp: 0 bytes → at least 500 (base overhead floor)', () => {
assert.ok(estimateTokens(0, 'mcp') >= 500,
`expected >= 500, got ${estimateTokens(0, 'mcp')}`);
});
it('mcp: with toolCount: 10 → at least 2000', () => {
assert.ok(estimateTokens(0, 'mcp', { toolCount: 10 }) >= 2000,
`expected >= 2000, got ${estimateTokens(0, 'mcp', { toolCount: 10 })}`);
});
it('mcp: ratio mcp/item ≥ 30 for 10-tool server', () => {
const mcp = estimateTokens(0, 'mcp', { toolCount: 10 });
const item = estimateTokens(0, 'item');
assert.ok(mcp / item >= 30,
`expected ratio >= 30, got mcp=${mcp} item=${item} ratio=${mcp / item}`);
});
it('mcp: with bytes uses json-rate floor', () => {
// 700 bytes JSON ≈ 200 tokens, but mcp keeps 500 floor
assert.equal(estimateTokens(700, 'mcp'), 500);
// 3500 bytes JSON = 1000 tokens, exceeds floor
assert.equal(estimateTokens(3500, 'mcp'), 1000);
});
});
// ─────────────────────────────────────────────────────────────────────────