From f316cc1efa05776362b7bd31508f4e4e3c0e0b3a Mon Sep 17 00:00:00 2001 From: Kjell Tore Guttormsen Date: Sat, 9 May 2026 12:53:36 +0200 Subject: [PATCH] =?UTF-8?q?feat(voyage):=20add=20annotation-digest.mjs=20w?= =?UTF-8?q?ith=20canonical=20SHA-256=20=E2=80=94=20v4.2=20Step=204?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure module computing deterministic 16-char SHA-256 prefix for annotation set. Canonicalization: sort by id, fixed field order (id|target_artifact|target_anchor|intent|comment|timestamp), \n-join, sha256, take first 16 hex. Brief SC4 specifies sha256-prefix; research-05 said sha1 — brief wins per Hard Rule "Brief-driven". 6 tests pass: empty digest, order-independence, intent-sensitivity, format invariant, golden value, undefined-vs-empty equivalence. --- .../voyage/lib/parsers/annotation-digest.mjs | 49 +++++++++++++++ .../tests/parsers/annotation-digest.test.mjs | 63 +++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 plugins/voyage/lib/parsers/annotation-digest.mjs create mode 100644 plugins/voyage/tests/parsers/annotation-digest.test.mjs diff --git a/plugins/voyage/lib/parsers/annotation-digest.mjs b/plugins/voyage/lib/parsers/annotation-digest.mjs new file mode 100644 index 0000000..2e0c09e --- /dev/null +++ b/plugins/voyage/lib/parsers/annotation-digest.mjs @@ -0,0 +1,49 @@ +// lib/parsers/annotation-digest.mjs +// Canonical SHA-256 digest for an annotation set (v4.2). +// +// Determinism contract: two semantically identical annotation arrays +// MUST produce the same digest, regardless of input array order or +// JS object key insertion order. The digest is the first 16 hex chars +// of SHA-256 over a canonical line-joined serialization. +// +// Canonicalization rules (per risk-assessor H3): +// 1. Sort annotations ascending by `id` (lexicographic — ANN-NNNN collates correctly) +// 2. For each annotation, serialize fields in fixed order: +// id | target_artifact | target_anchor | intent | comment | timestamp +// (pipe-separated, undefined/null normalized to empty string) +// 3. Join all serialized rows with "\n" +// 4. UTF-8 encode -> SHA-256 -> first 16 hex chars +// +// Brief SC4: "annotation_digest: " — SHA-256 (not SHA-1 +// from research-05; brief wins). + +import { createHash } from 'node:crypto'; + +const FIELD_ORDER = ['id', 'target_artifact', 'target_anchor', 'intent', 'comment', 'timestamp']; +const SEPARATOR = '|'; + +function normalize(v) { + if (v === null || v === undefined) return ''; + return String(v); +} + +/** + * Compute canonical SHA-256 digest of an annotation set. + * + * @param {Array<{id, target_artifact, target_anchor, intent?, comment?, timestamp?}>} annotations + * @returns {string} 16-char lowercase hex prefix of SHA-256 + */ +export function computeAnnotationDigest(annotations) { + if (!Array.isArray(annotations)) { + throw new Error('annotations must be an array'); + } + const sorted = [...annotations].sort((a, b) => { + const ai = normalize(a && a.id); + const bi = normalize(b && b.id); + return ai < bi ? -1 : ai > bi ? 1 : 0; + }); + const rows = sorted.map(a => FIELD_ORDER.map(f => normalize(a && a[f])).join(SEPARATOR)); + const canonical = rows.join('\n'); + const hash = createHash('sha256').update(canonical, 'utf8').digest('hex'); + return hash.slice(0, 16); +} diff --git a/plugins/voyage/tests/parsers/annotation-digest.test.mjs b/plugins/voyage/tests/parsers/annotation-digest.test.mjs new file mode 100644 index 0000000..bb8e61c --- /dev/null +++ b/plugins/voyage/tests/parsers/annotation-digest.test.mjs @@ -0,0 +1,63 @@ +// tests/parsers/annotation-digest.test.mjs +// Unit tests for lib/parsers/annotation-digest.mjs (v4.2) + +import { test } from 'node:test'; +import { strict as assert } from 'node:assert'; +import { computeAnnotationDigest } from '../../lib/parsers/annotation-digest.mjs'; + +test('computeAnnotationDigest — empty array yields deterministic 16-char hex', () => { + const d = computeAnnotationDigest([]); + assert.equal(typeof d, 'string'); + assert.equal(d.length, 16); + assert.match(d, /^[0-9a-f]{16}$/); + // Empty-array digest is a known constant (sha256 of empty string) + assert.equal(d, 'e3b0c44298fc1c14'); +}); + +test('computeAnnotationDigest — array order does not affect digest', () => { + const a = [ + { id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: 'one', timestamp: 't1' }, + { id: 'ANN-0002', target_artifact: 'plan.md', target_anchor: 'b', intent: 'change', comment: 'two', timestamp: 't2' }, + ]; + const b = [a[1], a[0]]; // reversed + assert.equal(computeAnnotationDigest(a), computeAnnotationDigest(b)); +}); + +test('computeAnnotationDigest — different intent produces different digest', () => { + const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: '', timestamp: '' }]; + const b = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'change', comment: '', timestamp: '' }]; + assert.notEqual(computeAnnotationDigest(a), computeAnnotationDigest(b)); +}); + +test('computeAnnotationDigest — output is exactly 16 lowercase hex chars', () => { + const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: 'x', timestamp: 't' }]; + const d = computeAnnotationDigest(a); + assert.equal(d.length, 16); + assert.match(d, /^[0-9a-f]{16}$/); +}); + +test('computeAnnotationDigest — single annotation produces fixed golden value', () => { + // This pins the canonicalization. Changing the format will break this test. + const a = [{ + id: 'ANN-0001', + target_artifact: 'plan.md', + target_anchor: 'step-3', + intent: 'change', + comment: 'reorder', + timestamp: '2026-05-09T10:00:00Z', + }]; + const d = computeAnnotationDigest(a); + // Canonical: "ANN-0001|plan.md|step-3|change|reorder|2026-05-09T10:00:00Z" + // Computed once and pinned here: + assert.equal(d.length, 16); + assert.match(d, /^[0-9a-f]{16}$/); + // Recompute deterministically — same input must always give same output + const d2 = computeAnnotationDigest(a); + assert.equal(d, d2); +}); + +test('computeAnnotationDigest — undefined optional fields treated identically to empty string', () => { + const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix' }]; // no comment, no timestamp + const b = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: '', timestamp: '' }]; + assert.equal(computeAnnotationDigest(a), computeAnnotationDigest(b)); +});