// lib/exporters/textfile-format.mjs // Pure transform: voyage JSONL stats records → Prometheus text-format 0.0.4. // // Output contract (Prometheus exposition format 0.0.4): // # HELP voyage_ // # TYPE voyage_ {gauge|counter|histogram} // voyage_{label="value",...} // // Hard rules: // - NO client-side timestamps (per research/01 — node_exporter#1284 known issue // where stale textfile samples re-emit with old timestamps). // - Allowlist-redacted records ONLY (caller must apply field-allowlist first). // - UTF-8 metric names normalized: dots/dashes → underscore, lowercase, prefixed `voyage_`. // - Empty input → empty string output (no headers, no errors). const METRIC_PREFIX = 'voyage_'; /** * Normalize a JSONL field name to a Prometheus-safe metric name. * Per Prometheus 3.0 rules: [a-zA-Z_:][a-zA-Z0-9_:]*. Replace dot/dash/space → '_'. */ function normalizeMetricName(name) { const safe = String(name).toLowerCase().replace(/[.\-\s]+/g, '_').replace(/[^a-zA-Z0-9_:]/g, '_'); return METRIC_PREFIX + safe; } /** * Quote a Prometheus label value per spec § Format: backslash, double-quote, newline. */ function escapeLabel(v) { return String(v).replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n'); } /** * Determine which fields in a record are numeric metrics vs string labels. * Numeric → emitted as samples. String/bool → labels on the sample. */ function partitionRecord(record) { const labels = {}; const metrics = {}; for (const [k, v] of Object.entries(record)) { if (k === 'ts') continue; // never emit ts as label nor metric (Prom server timestamps) if (typeof v === 'number') metrics[k] = v; else if (typeof v === 'boolean') metrics[k] = v ? 1 : 0; else if (typeof v === 'string') labels[k] = v; // Skip arrays/objects — caller's allowlist should have flattened these. } return { labels, metrics }; } /** * Group records by their schema_id (an extracted convention: schema_id is the * caller-provided identifier; if absent, fall back to 'unknown'). */ function groupBySchema(records) { const groups = new Map(); for (const r of records) { const id = (r && typeof r._schema_id === 'string') ? r._schema_id : 'unknown'; if (!groups.has(id)) groups.set(id, []); groups.get(id).push(r); } return groups; } /** * Transform JSONL records into Prometheus text-format. Pure function. * * @param {Array} records Allowlist-redacted records (caller responsibility). * @param {{help?: object}} [opts] Optional: help-text overrides per metric (object). * @returns {string} Prometheus text-format. Empty input → empty string. */ export function transformToPrometheus(records, opts = {}) { if (!Array.isArray(records) || records.length === 0) return ''; const lines = []; const helpMap = opts.help || {}; const groups = groupBySchema(records); // Track which metric names we've emitted HELP/TYPE for (per Prometheus spec: // emit HELP/TYPE once per metric, then all samples for that metric). const emittedMeta = new Set(); // First pass: collect all unique (schema, metric_field) → sample lines // grouped by metric so HELP/TYPE come before all samples for that metric. const samplesByMetric = new Map(); for (const [schemaId, group] of groups.entries()) { for (const record of group) { const { labels, metrics } = partitionRecord(record); const labelStr = Object.entries(labels) .map(([k, v]) => `${k}="${escapeLabel(v)}"`) .join(','); const labelBlock = labelStr ? `{${labelStr}}` : ''; for (const [metricField, value] of Object.entries(metrics)) { const metricName = normalizeMetricName(`${schemaId}_${metricField}`); if (!samplesByMetric.has(metricName)) { samplesByMetric.set(metricName, []); } samplesByMetric.get(metricName).push(`${metricName}${labelBlock} ${value}`); } } } // Sort metric names for deterministic output (snapshot-test-friendly) const sortedMetrics = [...samplesByMetric.keys()].sort(); for (const metricName of sortedMetrics) { const help = helpMap[metricName] || `voyage stats — ${metricName.slice(METRIC_PREFIX.length)}`; const type = inferMetricType(metricName); if (!emittedMeta.has(metricName)) { lines.push(`# HELP ${metricName} ${help}`); lines.push(`# TYPE ${metricName} ${type}`); emittedMeta.add(metricName); } // Sort samples for determinism const samples = samplesByMetric.get(metricName).sort(); for (const s of samples) lines.push(s); } return lines.join('\n') + (lines.length > 0 ? '\n' : ''); } /** * Heuristic: counter for *_total / *_count / *_passed / *_failed; histogram for * *_ms / *_duration / *_p50 / *_p99; gauge for everything else (per Prometheus * conventions). Pure & deterministic. */ function inferMetricType(metricName) { if (/_total$|_count$|_passed$|_failed$|_skipped$/.test(metricName)) return 'counter'; if (/_ms$|_duration|_p\d+$|_seconds$/.test(metricName)) return 'histogram'; return 'gauge'; } export { normalizeMetricName, partitionRecord, inferMetricType };