feat(voyage): add annotation-digest.mjs with canonical SHA-256 — v4.2 Step 4

Pure module computing deterministic 16-char SHA-256 prefix for annotation set.
Canonicalization: sort by id, fixed field order (id|target_artifact|target_anchor|intent|comment|timestamp), \n-join, sha256, take first 16 hex.

Brief SC4 specifies sha256-prefix; research-05 said sha1 — brief wins per Hard Rule "Brief-driven".

6 tests pass: empty digest, order-independence, intent-sensitivity, format invariant, golden value, undefined-vs-empty equivalence.
This commit is contained in:
Kjell Tore Guttormsen 2026-05-09 12:53:36 +02:00
commit f316cc1efa
2 changed files with 112 additions and 0 deletions

View file

@ -0,0 +1,49 @@
// lib/parsers/annotation-digest.mjs
// Canonical SHA-256 digest for an annotation set (v4.2).
//
// Determinism contract: two semantically identical annotation arrays
// MUST produce the same digest, regardless of input array order or
// JS object key insertion order. The digest is the first 16 hex chars
// of SHA-256 over a canonical line-joined serialization.
//
// Canonicalization rules (per risk-assessor H3):
// 1. Sort annotations ascending by `id` (lexicographic — ANN-NNNN collates correctly)
// 2. For each annotation, serialize fields in fixed order:
// id | target_artifact | target_anchor | intent | comment | timestamp
// (pipe-separated, undefined/null normalized to empty string)
// 3. Join all serialized rows with "\n"
// 4. UTF-8 encode -> SHA-256 -> first 16 hex chars
//
// Brief SC4: "annotation_digest: <sha256-prefix>" — SHA-256 (not SHA-1
// from research-05; brief wins).
import { createHash } from 'node:crypto';
const FIELD_ORDER = ['id', 'target_artifact', 'target_anchor', 'intent', 'comment', 'timestamp'];
const SEPARATOR = '|';
function normalize(v) {
if (v === null || v === undefined) return '';
return String(v);
}
/**
* Compute canonical SHA-256 digest of an annotation set.
*
* @param {Array<{id, target_artifact, target_anchor, intent?, comment?, timestamp?}>} annotations
* @returns {string} 16-char lowercase hex prefix of SHA-256
*/
export function computeAnnotationDigest(annotations) {
if (!Array.isArray(annotations)) {
throw new Error('annotations must be an array');
}
const sorted = [...annotations].sort((a, b) => {
const ai = normalize(a && a.id);
const bi = normalize(b && b.id);
return ai < bi ? -1 : ai > bi ? 1 : 0;
});
const rows = sorted.map(a => FIELD_ORDER.map(f => normalize(a && a[f])).join(SEPARATOR));
const canonical = rows.join('\n');
const hash = createHash('sha256').update(canonical, 'utf8').digest('hex');
return hash.slice(0, 16);
}

View file

@ -0,0 +1,63 @@
// tests/parsers/annotation-digest.test.mjs
// Unit tests for lib/parsers/annotation-digest.mjs (v4.2)
import { test } from 'node:test';
import { strict as assert } from 'node:assert';
import { computeAnnotationDigest } from '../../lib/parsers/annotation-digest.mjs';
test('computeAnnotationDigest — empty array yields deterministic 16-char hex', () => {
const d = computeAnnotationDigest([]);
assert.equal(typeof d, 'string');
assert.equal(d.length, 16);
assert.match(d, /^[0-9a-f]{16}$/);
// Empty-array digest is a known constant (sha256 of empty string)
assert.equal(d, 'e3b0c44298fc1c14');
});
test('computeAnnotationDigest — array order does not affect digest', () => {
const a = [
{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: 'one', timestamp: 't1' },
{ id: 'ANN-0002', target_artifact: 'plan.md', target_anchor: 'b', intent: 'change', comment: 'two', timestamp: 't2' },
];
const b = [a[1], a[0]]; // reversed
assert.equal(computeAnnotationDigest(a), computeAnnotationDigest(b));
});
test('computeAnnotationDigest — different intent produces different digest', () => {
const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: '', timestamp: '' }];
const b = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'change', comment: '', timestamp: '' }];
assert.notEqual(computeAnnotationDigest(a), computeAnnotationDigest(b));
});
test('computeAnnotationDigest — output is exactly 16 lowercase hex chars', () => {
const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: 'x', timestamp: 't' }];
const d = computeAnnotationDigest(a);
assert.equal(d.length, 16);
assert.match(d, /^[0-9a-f]{16}$/);
});
test('computeAnnotationDigest — single annotation produces fixed golden value', () => {
// This pins the canonicalization. Changing the format will break this test.
const a = [{
id: 'ANN-0001',
target_artifact: 'plan.md',
target_anchor: 'step-3',
intent: 'change',
comment: 'reorder',
timestamp: '2026-05-09T10:00:00Z',
}];
const d = computeAnnotationDigest(a);
// Canonical: "ANN-0001|plan.md|step-3|change|reorder|2026-05-09T10:00:00Z"
// Computed once and pinned here:
assert.equal(d.length, 16);
assert.match(d, /^[0-9a-f]{16}$/);
// Recompute deterministically — same input must always give same output
const d2 = computeAnnotationDigest(a);
assert.equal(d, d2);
});
test('computeAnnotationDigest — undefined optional fields treated identically to empty string', () => {
const a = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix' }]; // no comment, no timestamp
const b = [{ id: 'ANN-0001', target_artifact: 'plan.md', target_anchor: 'a', intent: 'fix', comment: '', timestamp: '' }];
assert.equal(computeAnnotationDigest(a), computeAnnotationDigest(b));
});