ktg-plugin-marketplace/plugins/voyage/lib/parsers/annotation-digest.mjs

// lib/parsers/annotation-digest.mjs
// Canonical SHA-256 digest for an annotation set (v4.2).
//
// Determinism contract: two semantically identical annotation arrays
// MUST produce the same digest, regardless of input array order or
// JS object key insertion order. The digest is the first 16 hex chars
// of SHA-256 over a canonical line-joined serialization.
//
// Canonicalization rules (per risk-assessor H3):
//   1. Sort annotations ascending by `id` (lexicographic — ANN-NNNN collates correctly)
//   2. For each annotation, serialize fields in fixed order:
//        id | target_artifact | target_anchor | intent | comment | timestamp
//      (pipe-separated, undefined/null normalized to empty string)
//   3. Join all serialized rows with "\n"
//   4. UTF-8 encode -> SHA-256 -> first 16 hex chars
//
// Brief SC4: "annotation_digest: <sha256-prefix>" — SHA-256 (not SHA-1
// from research-05; brief wins).

import { createHash } from 'node:crypto';

const FIELD_ORDER = ['id', 'target_artifact', 'target_anchor', 'intent', 'comment', 'timestamp'];
const SEPARATOR = '|';

function normalize(v) {
  if (v === null || v === undefined) return '';
  return String(v);
}

/**
 * Compute canonical SHA-256 digest of an annotation set.
 *
 * @param {Array<{id, target_artifact, target_anchor, intent?, comment?, timestamp?}>} annotations
 * @returns {string} 16-char lowercase hex prefix of SHA-256
 */
export function computeAnnotationDigest(annotations) {
  if (!Array.isArray(annotations)) {
    throw new Error('annotations must be an array');
  }
  const sorted = [...annotations].sort((a, b) => {
    const ai = normalize(a && a.id);
    const bi = normalize(b && b.id);
    return ai < bi ? -1 : ai > bi ? 1 : 0;
  });
  const rows = sorted.map(a => FIELD_ORDER.map(f => normalize(a && a[f])).join(SEPARATOR));
  const canonical = rows.join('\n');
  const hash = createHash('sha256').update(canonical, 'utf8').digest('hex');
  return hash.slice(0, 16);
}