Step 9 av v4.1-execute (Wave 2, Session 3). Pure function transformToPrometheus(records) → Prometheus text-format 0.0.4. Hard rules: - NO client-side timestamps (research/01 node_exporter#1284 mitigation) - Allowlist-redacted records ONLY (caller responsibility — Step 11 enforces) - UTF-8 metric names normalized: lowercase, [.\\-\\s] → _, voyage_ prefix - Empty input → empty string output - Sorted output for determinism (snapshot-test-friendly) Heuristic metric typing: - counter: *_total, *_count, *_passed, *_failed, *_skipped - histogram: *_ms, *_duration, *_p\\d+, *_seconds - gauge: everything else (Prometheus convention) Snapshot: tests/fixtures/expected.prom byte-for-byte match. Regenerate: node scripts/gen-expected-prom.mjs > tests/fixtures/expected.prom Tester (6 nye, baseline 400 → 406): - Snapshot byte-for-byte match (SC #12) - Empty input handling (null, undefined, []) - Allowlist-redaction sanity (post-bash-stats uten command_excerpt) - NO client-side timestamps (token-count-assertion per linje) - normalizeMetricName edge-cases - Determinism (identisk input → identisk output) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
135 lines
5.1 KiB
JavaScript
135 lines
5.1 KiB
JavaScript
// lib/exporters/textfile-format.mjs
|
|
// Pure transform: voyage JSONL stats records → Prometheus text-format 0.0.4.
|
|
//
|
|
// Output contract (Prometheus exposition format 0.0.4):
|
|
// # HELP voyage_<metric_name> <description>
|
|
// # TYPE voyage_<metric_name> {gauge|counter|histogram}
|
|
// voyage_<metric_name>{label="value",...} <numeric_value>
|
|
//
|
|
// Hard rules:
|
|
// - NO client-side timestamps (per research/01 — node_exporter#1284 known issue
|
|
// where stale textfile samples re-emit with old timestamps).
|
|
// - Allowlist-redacted records ONLY (caller must apply field-allowlist first).
|
|
// - UTF-8 metric names normalized: dots/dashes → underscore, lowercase, prefixed `voyage_`.
|
|
// - Empty input → empty string output (no headers, no errors).
|
|
|
|
const METRIC_PREFIX = 'voyage_';
|
|
|
|
/**
|
|
* Normalize a JSONL field name to a Prometheus-safe metric name.
|
|
* Per Prometheus 3.0 rules: [a-zA-Z_:][a-zA-Z0-9_:]*. Replace dot/dash/space → '_'.
|
|
*/
|
|
function normalizeMetricName(name) {
|
|
const safe = String(name).toLowerCase().replace(/[.\-\s]+/g, '_').replace(/[^a-zA-Z0-9_:]/g, '_');
|
|
return METRIC_PREFIX + safe;
|
|
}
|
|
|
|
/**
|
|
* Quote a Prometheus label value per spec § Format: backslash, double-quote, newline.
|
|
*/
|
|
function escapeLabel(v) {
|
|
return String(v).replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
|
|
}
|
|
|
|
/**
|
|
* Determine which fields in a record are numeric metrics vs string labels.
|
|
* Numeric → emitted as samples. String/bool → labels on the sample.
|
|
*/
|
|
function partitionRecord(record) {
|
|
const labels = {};
|
|
const metrics = {};
|
|
for (const [k, v] of Object.entries(record)) {
|
|
if (k === 'ts') continue; // never emit ts as label nor metric (Prom server timestamps)
|
|
if (typeof v === 'number') metrics[k] = v;
|
|
else if (typeof v === 'boolean') metrics[k] = v ? 1 : 0;
|
|
else if (typeof v === 'string') labels[k] = v;
|
|
// Skip arrays/objects — caller's allowlist should have flattened these.
|
|
}
|
|
return { labels, metrics };
|
|
}
|
|
|
|
/**
|
|
* Group records by their schema_id (an extracted convention: schema_id is the
|
|
* caller-provided identifier; if absent, fall back to 'unknown').
|
|
*/
|
|
function groupBySchema(records) {
|
|
const groups = new Map();
|
|
for (const r of records) {
|
|
const id = (r && typeof r._schema_id === 'string') ? r._schema_id : 'unknown';
|
|
if (!groups.has(id)) groups.set(id, []);
|
|
groups.get(id).push(r);
|
|
}
|
|
return groups;
|
|
}
|
|
|
|
/**
|
|
* Transform JSONL records into Prometheus text-format. Pure function.
|
|
*
|
|
* @param {Array<object>} records Allowlist-redacted records (caller responsibility).
|
|
* @param {{help?: object}} [opts] Optional: help-text overrides per metric (object).
|
|
* @returns {string} Prometheus text-format. Empty input → empty string.
|
|
*/
|
|
export function transformToPrometheus(records, opts = {}) {
|
|
if (!Array.isArray(records) || records.length === 0) return '';
|
|
|
|
const lines = [];
|
|
const helpMap = opts.help || {};
|
|
const groups = groupBySchema(records);
|
|
|
|
// Track which metric names we've emitted HELP/TYPE for (per Prometheus spec:
|
|
// emit HELP/TYPE once per metric, then all samples for that metric).
|
|
const emittedMeta = new Set();
|
|
|
|
// First pass: collect all unique (schema, metric_field) → sample lines
|
|
// grouped by metric so HELP/TYPE come before all samples for that metric.
|
|
const samplesByMetric = new Map();
|
|
|
|
for (const [schemaId, group] of groups.entries()) {
|
|
for (const record of group) {
|
|
const { labels, metrics } = partitionRecord(record);
|
|
const labelStr = Object.entries(labels)
|
|
.map(([k, v]) => `${k}="${escapeLabel(v)}"`)
|
|
.join(',');
|
|
const labelBlock = labelStr ? `{${labelStr}}` : '';
|
|
|
|
for (const [metricField, value] of Object.entries(metrics)) {
|
|
const metricName = normalizeMetricName(`${schemaId}_${metricField}`);
|
|
if (!samplesByMetric.has(metricName)) {
|
|
samplesByMetric.set(metricName, []);
|
|
}
|
|
samplesByMetric.get(metricName).push(`${metricName}${labelBlock} ${value}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort metric names for deterministic output (snapshot-test-friendly)
|
|
const sortedMetrics = [...samplesByMetric.keys()].sort();
|
|
|
|
for (const metricName of sortedMetrics) {
|
|
const help = helpMap[metricName] || `voyage stats — ${metricName.slice(METRIC_PREFIX.length)}`;
|
|
const type = inferMetricType(metricName);
|
|
if (!emittedMeta.has(metricName)) {
|
|
lines.push(`# HELP ${metricName} ${help}`);
|
|
lines.push(`# TYPE ${metricName} ${type}`);
|
|
emittedMeta.add(metricName);
|
|
}
|
|
// Sort samples for determinism
|
|
const samples = samplesByMetric.get(metricName).sort();
|
|
for (const s of samples) lines.push(s);
|
|
}
|
|
|
|
return lines.join('\n') + (lines.length > 0 ? '\n' : '');
|
|
}
|
|
|
|
/**
|
|
* Heuristic: counter for *_total / *_count / *_passed / *_failed; histogram for
|
|
* *_ms / *_duration / *_p50 / *_p99; gauge for everything else (per Prometheus
|
|
* conventions). Pure & deterministic.
|
|
*/
|
|
function inferMetricType(metricName) {
|
|
if (/_total$|_count$|_passed$|_failed$|_skipped$/.test(metricName)) return 'counter';
|
|
if (/_ms$|_duration|_p\d+$|_seconds$/.test(metricName)) return 'histogram';
|
|
return 'gauge';
|
|
}
|
|
|
|
export { normalizeMetricName, partitionRecord, inferMetricType };
|