feat(config-audit): add 'mcp' kind to estimateTokens (v5 F2)
Differentiate MCP servers from generic 'item' (flat 15) — they actually
cost 500+ tokens per turn for protocol metadata and tool schemas.
estimateTokens(bytes, 'mcp', {toolCount}) returns max of:
- 500 token floor (base overhead)
- ceil(bytes / 3.5) (json-rate when bytes known)
- 500 + toolCount * 200 (when tool count is detected; Step 14 wires this)
Caller-side migration in next commit (Step 5).
Tests: +4 cases for mcp kind.
This commit is contained in:
parent
8ca391fdb2
commit
48d560a209
2 changed files with 42 additions and 2 deletions
|
|
@ -22,12 +22,27 @@ const SCHEMA_VERSION = '1.0.0';
|
|||
* Estimate tokens for a given byte count and content kind.
|
||||
* Deterministic heuristic — see feature plan §4 for rationale.
|
||||
*
|
||||
* MCP (v5 F2): an active MCP server consumes a base overhead of ~500 tokens
|
||||
* for protocol metadata + tool schemas, even before any tool is described.
|
||||
* When tool count is known (Step 14 wires this up), we estimate ~200 tokens
|
||||
* per tool description.
|
||||
*
|
||||
* @param {number} bytes - Byte count (or item count for kind='item')
|
||||
* @param {'markdown'|'frontmatter'|'json'|'item'} kind
|
||||
* @param {'markdown'|'frontmatter'|'json'|'item'|'mcp'} kind
|
||||
* @param {{toolCount?: number}} [opts] - kind-specific options (mcp: toolCount)
|
||||
* @returns {number} Integer token count (rounded up)
|
||||
*/
|
||||
export function estimateTokens(bytes, kind = 'markdown') {
|
||||
export function estimateTokens(bytes, kind = 'markdown', opts = {}) {
|
||||
if (kind === 'item') return 15;
|
||||
if (kind === 'mcp') {
|
||||
const base = 500;
|
||||
const perTool = 200;
|
||||
const toolCount = typeof opts.toolCount === 'number' && opts.toolCount > 0 ? opts.toolCount : 0;
|
||||
const safeBytes = typeof bytes === 'number' && bytes > 0 && Number.isFinite(bytes) ? bytes : 0;
|
||||
const fromBytes = Math.ceil(safeBytes / 3.5);
|
||||
const fromTools = base + toolCount * perTool;
|
||||
return Math.max(base, fromBytes, fromTools);
|
||||
}
|
||||
if (typeof bytes !== 'number' || bytes < 0 || !Number.isFinite(bytes)) return 0;
|
||||
if (kind === 'frontmatter') {
|
||||
const capped = Math.min(bytes, 600);
|
||||
|
|
|
|||
|
|
@ -165,6 +165,31 @@ describe('estimateTokens', () => {
|
|||
assert.equal(estimateTokens(-1, 'markdown'), 0);
|
||||
assert.equal(estimateTokens(NaN, 'markdown'), 0);
|
||||
});
|
||||
|
||||
// v5 F2: differentiated MCP estimate
|
||||
it('mcp: 0 bytes → at least 500 (base overhead floor)', () => {
|
||||
assert.ok(estimateTokens(0, 'mcp') >= 500,
|
||||
`expected >= 500, got ${estimateTokens(0, 'mcp')}`);
|
||||
});
|
||||
|
||||
it('mcp: with toolCount: 10 → at least 2000', () => {
|
||||
assert.ok(estimateTokens(0, 'mcp', { toolCount: 10 }) >= 2000,
|
||||
`expected >= 2000, got ${estimateTokens(0, 'mcp', { toolCount: 10 })}`);
|
||||
});
|
||||
|
||||
it('mcp: ratio mcp/item ≥ 30 for 10-tool server', () => {
|
||||
const mcp = estimateTokens(0, 'mcp', { toolCount: 10 });
|
||||
const item = estimateTokens(0, 'item');
|
||||
assert.ok(mcp / item >= 30,
|
||||
`expected ratio >= 30, got mcp=${mcp} item=${item} ratio=${mcp / item}`);
|
||||
});
|
||||
|
||||
it('mcp: with bytes uses json-rate floor', () => {
|
||||
// 700 bytes JSON ≈ 200 tokens, but mcp keeps 500 floor
|
||||
assert.equal(estimateTokens(700, 'mcp'), 500);
|
||||
// 3500 bytes JSON = 1000 tokens, exceeds floor
|
||||
assert.equal(estimateTokens(3500, 'mcp'), 1000);
|
||||
});
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue