// lib/parsers/annotation-digest.mjs // Canonical SHA-256 digest for an annotation set (v4.2). // // Determinism contract: two semantically identical annotation arrays // MUST produce the same digest, regardless of input array order or // JS object key insertion order. The digest is the first 16 hex chars // of SHA-256 over a canonical line-joined serialization. // // Canonicalization rules (per risk-assessor H3): // 1. Sort annotations ascending by `id` (lexicographic — ANN-NNNN collates correctly) // 2. For each annotation, serialize fields in fixed order: // id | target_artifact | target_anchor | intent | comment | timestamp // (pipe-separated, undefined/null normalized to empty string) // 3. Join all serialized rows with "\n" // 4. UTF-8 encode -> SHA-256 -> first 16 hex chars // // Brief SC4: "annotation_digest: " — SHA-256 (not SHA-1 // from research-05; brief wins). import { createHash } from 'node:crypto'; const FIELD_ORDER = ['id', 'target_artifact', 'target_anchor', 'intent', 'comment', 'timestamp']; const SEPARATOR = '|'; function normalize(v) { if (v === null || v === undefined) return ''; return String(v); } /** * Compute canonical SHA-256 digest of an annotation set. * * @param {Array<{id, target_artifact, target_anchor, intent?, comment?, timestamp?}>} annotations * @returns {string} 16-char lowercase hex prefix of SHA-256 */ export function computeAnnotationDigest(annotations) { if (!Array.isArray(annotations)) { throw new Error('annotations must be an array'); } const sorted = [...annotations].sort((a, b) => { const ai = normalize(a && a.id); const bi = normalize(b && b.id); return ai < bi ? -1 : ai > bi ? 1 : 0; }); const rows = sorted.map(a => FIELD_ORDER.map(f => normalize(a && a[f])).join(SEPARATOR)); const canonical = rows.join('\n'); const hash = createHash('sha256').update(canonical, 'utf8').digest('hex'); return hash.slice(0, 16); }