feat(config-audit): --accurate-tokens API calibration (v5 N5) [skip-docs]
This commit is contained in:
parent
1d12231748
commit
b7414303de
3 changed files with 386 additions and 3 deletions
|
|
@ -6,22 +6,46 @@
|
|||
*
|
||||
* Usage:
|
||||
* node token-hotspots-cli.mjs [path] [--json] [--output-file <path>] [--global]
|
||||
* [--with-telemetry-recipe]
|
||||
* [--with-telemetry-recipe] [--accurate-tokens]
|
||||
*
|
||||
* Exit codes: 0=ok, 3=unrecoverable error.
|
||||
* Zero external dependencies.
|
||||
*/
|
||||
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { writeFile, stat } from 'node:fs/promises';
|
||||
import { writeFile, readFile, stat } from 'node:fs/promises';
|
||||
import { discoverConfigFiles } from './lib/file-discovery.mjs';
|
||||
import { resetCounter } from './lib/output.mjs';
|
||||
import { scan } from './token-hotspots.mjs';
|
||||
import * as tokenizerApi from './lib/tokenizer-api.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const TELEMETRY_RECIPE_PATH = resolve(__dirname, '..', 'knowledge', 'cache-telemetry-recipe.md');
|
||||
|
||||
const ACCURATE_TOKENS_SAMPLE_SIZE = 3;
|
||||
|
||||
async function calibrateAgainstApi(hotspots, apiKey) {
|
||||
const sampled = hotspots.slice(0, ACCURATE_TOKENS_SAMPLE_SIZE);
|
||||
let actualTokens = 0;
|
||||
for (const hotspot of sampled) {
|
||||
if (!hotspot?.path) continue;
|
||||
let content;
|
||||
try {
|
||||
content = await readFile(hotspot.path, 'utf-8');
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const result = await tokenizerApi.callCountTokensApi(content, apiKey);
|
||||
actualTokens += result.input_tokens;
|
||||
}
|
||||
return {
|
||||
actual_tokens: actualTokens,
|
||||
source: 'count_tokens_api',
|
||||
sampled_hotspots: sampled.length,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
let targetPath = '.';
|
||||
|
|
@ -29,11 +53,13 @@ async function main() {
|
|||
let jsonMode = false;
|
||||
let includeGlobal = false;
|
||||
let withTelemetryRecipe = false;
|
||||
let accurateTokens = false;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === '--json') jsonMode = true;
|
||||
else if (args[i] === '--global') includeGlobal = true;
|
||||
else if (args[i] === '--with-telemetry-recipe') withTelemetryRecipe = true;
|
||||
else if (args[i] === '--accurate-tokens') accurateTokens = true;
|
||||
else if (args[i] === '--output-file' && args[i + 1]) outputFile = args[++i];
|
||||
else if (!args[i].startsWith('-')) targetPath = args[i];
|
||||
}
|
||||
|
|
@ -69,6 +95,22 @@ async function main() {
|
|||
payload.telemetry_recipe_path = TELEMETRY_RECIPE_PATH;
|
||||
}
|
||||
|
||||
if (accurateTokens) {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey || apiKey.length === 0) {
|
||||
process.stderr.write('ANTHROPIC_API_KEY not set — skipping API calibration\n');
|
||||
payload.calibration = { skipped: 'no-api-key' };
|
||||
} else {
|
||||
try {
|
||||
payload.calibration = await calibrateAgainstApi(result.hotspots || [], apiKey);
|
||||
} catch (err) {
|
||||
// Error message is already key-masked by tokenizer-api.mjs.
|
||||
process.stderr.write(`Calibration error: ${err.message}\n`);
|
||||
payload.calibration = { skipped: 'api-error', error: err.message };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const json = JSON.stringify(payload, null, 2);
|
||||
|
||||
if (outputFile) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue