ktg-plugin-marketplace/plugins/voyage/lib/exporters/textfile-format.mjs

// lib/exporters/textfile-format.mjs
// Pure transform: voyage JSONL stats records → Prometheus text-format 0.0.4.
//
// Output contract (Prometheus exposition format 0.0.4):
//   # HELP voyage_<metric_name> <description>
//   # TYPE voyage_<metric_name> {gauge|counter|histogram}
//   voyage_<metric_name>{label="value",...} <numeric_value>
//
// Hard rules:
//   - NO client-side timestamps (per research/01 — node_exporter#1284 known issue
//     where stale textfile samples re-emit with old timestamps).
//   - Allowlist-redacted records ONLY (caller must apply field-allowlist first).
//   - UTF-8 metric names normalized: dots/dashes → underscore, lowercase, prefixed `voyage_`.
//   - Empty input → empty string output (no headers, no errors).

const METRIC_PREFIX = 'voyage_';

/**
 * Normalize a JSONL field name to a Prometheus-safe metric name.
 * Per Prometheus 3.0 rules: [a-zA-Z_:][a-zA-Z0-9_:]*. Replace dot/dash/space → '_'.
 */
function normalizeMetricName(name) {
  const safe = String(name).toLowerCase().replace(/[.\-\s]+/g, '_').replace(/[^a-zA-Z0-9_:]/g, '_');
  return METRIC_PREFIX + safe;
}

/**
 * Quote a Prometheus label value per spec § Format: backslash, double-quote, newline.
 */
function escapeLabel(v) {
  return String(v).replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
}

/**
 * Determine which fields in a record are numeric metrics vs string labels.
 * Numeric → emitted as samples. String/bool → labels on the sample.
 */
function partitionRecord(record) {
  const labels = {};
  const metrics = {};
  for (const [k, v] of Object.entries(record)) {
    if (k === 'ts') continue; // never emit ts as label nor metric (Prom server timestamps)
    if (typeof v === 'number') metrics[k] = v;
    else if (typeof v === 'boolean') metrics[k] = v ? 1 : 0;
    else if (typeof v === 'string') labels[k] = v;
    // Skip arrays/objects — caller's allowlist should have flattened these.
  }
  return { labels, metrics };
}

/**
 * Group records by their schema_id (an extracted convention: schema_id is the
 * caller-provided identifier; if absent, fall back to 'unknown').
 */
function groupBySchema(records) {
  const groups = new Map();
  for (const r of records) {
    const id = (r && typeof r._schema_id === 'string') ? r._schema_id : 'unknown';
    if (!groups.has(id)) groups.set(id, []);
    groups.get(id).push(r);
  }
  return groups;
}

/**
 * Transform JSONL records into Prometheus text-format. Pure function.
 *
 * @param {Array<object>} records  Allowlist-redacted records (caller responsibility).
 * @param {{help?: object}} [opts]  Optional: help-text overrides per metric (object).
 * @returns {string}                Prometheus text-format. Empty input → empty string.
 */
export function transformToPrometheus(records, opts = {}) {
  if (!Array.isArray(records) || records.length === 0) return '';

  const lines = [];
  const helpMap = opts.help || {};
  const groups = groupBySchema(records);

  // Track which metric names we've emitted HELP/TYPE for (per Prometheus spec:
  // emit HELP/TYPE once per metric, then all samples for that metric).
  const emittedMeta = new Set();

  // First pass: collect all unique (schema, metric_field) → sample lines
  // grouped by metric so HELP/TYPE come before all samples for that metric.
  const samplesByMetric = new Map();

  for (const [schemaId, group] of groups.entries()) {
    for (const record of group) {
      const { labels, metrics } = partitionRecord(record);
      const labelStr = Object.entries(labels)
        .map(([k, v]) => `${k}="${escapeLabel(v)}"`)
        .join(',');
      const labelBlock = labelStr ? `{${labelStr}}` : '';

      for (const [metricField, value] of Object.entries(metrics)) {
        const metricName = normalizeMetricName(`${schemaId}_${metricField}`);
        if (!samplesByMetric.has(metricName)) {
          samplesByMetric.set(metricName, []);
        }
        samplesByMetric.get(metricName).push(`${metricName}${labelBlock} ${value}`);
      }
    }
  }

  // Sort metric names for deterministic output (snapshot-test-friendly)
  const sortedMetrics = [...samplesByMetric.keys()].sort();

  for (const metricName of sortedMetrics) {
    const help = helpMap[metricName] || `voyage stats — ${metricName.slice(METRIC_PREFIX.length)}`;
    const type = inferMetricType(metricName);
    if (!emittedMeta.has(metricName)) {
      lines.push(`# HELP ${metricName} ${help}`);
      lines.push(`# TYPE ${metricName} ${type}`);
      emittedMeta.add(metricName);
    }
    // Sort samples for determinism
    const samples = samplesByMetric.get(metricName).sort();
    for (const s of samples) lines.push(s);
  }

  return lines.join('\n') + (lines.length > 0 ? '\n' : '');
}

/**
 * Heuristic: counter for *_total / *_count / *_passed / *_failed; histogram for
 * *_ms / *_duration / *_p50 / *_p99; gauge for everything else (per Prometheus
 * conventions). Pure & deterministic.
 */
function inferMetricType(metricName) {
  if (/_total$|_count$|_passed$|_failed$|_skipped$/.test(metricName)) return 'counter';
  if (/_ms$|_duration|_p\d+$|_seconds$/.test(metricName)) return 'histogram';
  return 'gauge';
}

export { normalizeMetricName, partitionRecord, inferMetricType };