#!/usr/bin/env node // Agent Model-Consistency guard (remediation S11). // // The source of truth for an agent's model is its own frontmatter // (`agents/.md` → `model:`). Every user-facing surface that DECLARES an // agent's model in a table — README.md, CLAUDE.md, docs/agents-capability-matrix.md, // and the skills/*/SKILL.md rosters — must declare that same model. The v4.0.0 // Opus promotion of `post-feedback-monitor` reached the agent frontmatter and // CLAUDE.md but NOT README/SKILL/matrix, so the README publicly stated a false // fact (Haiku) about a shipped Opus agent. The structure lint had version/count/ // stat guards but no per-agent model-consistency guard, so nothing failed on it. // This closes that meta-gap: agent-model drift now fails the same suite that // defines the registration contract. // // Two checks: // 1. MODEL-CORRECTNESS — every agent row in ANY model-table surface (canonical // rosters + the curated domain SKILLs) must declare the frontmatter model. // 2. ROSTER-COMPLETENESS — the canonical complete-roster surfaces must mention // EVERY agent (this is what catches the matrix frozen at "14 specialized // agents"; the domain SKILLs are deliberately curated subsets and are // exempt from completeness, checked for correctness only). // // A permanent non-vacuity self-test runs BEFORE the real scan on every // invocation (mirrors Section 8's STALE_STATS self-test): a checker that cannot // catch a deliberately-mismatched probe — or that false-flags a correct one — // is not enforcing the criterion, so it fails the suite instead of silently // certifying nothing. This is the S7→S10 lesson (a proof run once by hand and // never committed lets a survivor slip) applied to the model axis. // // Zero dependencies (node:fs only). bash 3.2-safe caller: invoked from // scripts/test-runner.sh Section 10, exit code mapped to pass/fail. import { readdirSync, readFileSync, existsSync } from "node:fs"; import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; const ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); const MODEL_RE = /\b(opus|sonnet|haiku)\b/i; // Canonical surfaces that MUST list every agent (complete rosters). const CANONICAL = [ "README.md", "CLAUDE.md", "docs/agents-capability-matrix.md", "skills/linkedin-studio/SKILL.md", ]; // Additional surfaces that declare some agent models but are curated subsets // (per-domain SKILLs). Checked for model-correctness only, not completeness. const SUBSET = [ "skills/linkedin-analytics/SKILL.md", "skills/linkedin-content-creation/SKILL.md", "skills/linkedin-networking/SKILL.md", "skills/linkedin-strategy/SKILL.md", "skills/linkedin-voice/SKILL.md", ]; // --- Truth from agent frontmatter --- function loadTruth() { const truth = {}; for (const f of readdirSync(join(ROOT, "agents")).filter((x) => x.endsWith(".md"))) { const fm = readFileSync(join(ROOT, "agents", f), "utf8").split(/^---$/m)[1] || ""; const model = ((fm.match(/^model:\s*(.+)$/m) || [])[1] || "").trim().toLowerCase(); truth[f.replace(/\.md$/, "")] = model; } return truth; } // --- Core, testable primitives (exercised by the self-test on synthetic input) --- // Every table row that names an agent AND carries a short model cell must match. function modelMismatches(text, truth) { const names = Object.keys(truth); const out = []; text.split("\n").forEach((ln, i) => { if (!ln.includes("|")) return; const cells = ln.split("|").map((c) => c.trim()); const nameCell = cells.find((c) => names.includes(c.replace(/[`*]/g, "").trim())); if (!nameCell) return; const agent = nameCell.replace(/[`*]/g, "").trim(); const modelCell = cells.find((c) => MODEL_RE.test(c) && c.length < 12); if (!modelCell) return; const declared = (modelCell.match(MODEL_RE) || [])[1].toLowerCase(); if (declared !== truth[agent]) { out.push({ line: i + 1, agent, declared, truth: truth[agent] }); } }); return out; } // Agents not mentioned anywhere in a canonical surface (word-boundaried so // `content-reviewer` does not satisfy `content-repurposer`). function missingAgents(text, names) { return names.filter((n) => !new RegExp("(^|[^a-z-])" + n + "([^a-z-]|$)").test(text)); } // --- Permanent non-vacuity self-test (runs every invocation, before the scan) --- function selfTest(truth) { const names = Object.keys(truth); const a = names[0]; const cap = (m) => m.charAt(0).toUpperCase() + m.slice(1); const other = truth[a] === "opus" ? "sonnet" : "opus"; const failures = []; // POSITIVE (correctness): a row with the WRONG model must be flagged. const wrongRow = `| \`${a}\` | ${cap(other)} | Lime | desc |`; if (modelMismatches(wrongRow, truth).length !== 1) { failures.push("model mismatch probe not caught"); } // NEGATIVE (correctness): a row with the CORRECT model must NOT be flagged. const rightRow = `| \`${a}\` | ${cap(truth[a])} | Lime | desc |`; if (modelMismatches(rightRow, truth).length !== 0) { failures.push("correct model probe false-flagged"); } // POSITIVE (completeness): a roster missing one agent must be flagged. const rosterMissing = names.slice(1).map((n) => `\`${n}\``).join(" "); if (!missingAgents(rosterMissing, names).includes(a)) { failures.push("missing-agent probe not caught"); } // NEGATIVE (completeness): a full roster must NOT be flagged. const fullRoster = names.map((n) => `\`${n}\``).join(" "); if (missingAgents(fullRoster, names).length !== 0) { failures.push("full roster false-flagged"); } return failures; } // --- Main --- function main() { const truth = loadTruth(); const names = Object.keys(truth); const stFailures = selfTest(truth); if (stFailures.length > 0) { console.log("SELFTEST FAIL — model-consistency guard is not enforcing the criterion:"); stFailures.forEach((f) => console.log(" - " + f)); process.exit(1); } console.log( `self-test OK: model-mismatch + missing-agent probes caught, correct probes ignored (truth = ${names.length} agents)`, ); const problems = []; // Check 1: model-correctness across every model-table surface. for (const rel of [...CANONICAL, ...SUBSET]) { if (!existsSync(join(ROOT, rel))) continue; const text = readFileSync(join(ROOT, rel), "utf8"); for (const m of modelMismatches(text, truth)) { problems.push(`${rel}:${m.line} ${m.agent} declared=${m.declared} but frontmatter=${m.truth}`); } } // Check 2: roster-completeness on the canonical surfaces. for (const rel of CANONICAL) { if (!existsSync(join(ROOT, rel))) { problems.push(`${rel} MISSING (canonical roster surface)`); continue; } const missing = missingAgents(readFileSync(join(ROOT, rel), "utf8"), names); if (missing.length > 0) { problems.push(`${rel} does not list ${missing.length} agent(s): ${missing.join(", ")}`); } } if (problems.length === 0) { console.log(`model-consistency OK: ${names.length} agents, all surface declarations match frontmatter`); process.exit(0); } console.log("model-consistency FAIL — agent model/roster drift:"); problems.forEach((p) => console.log(" ✗ " + p)); process.exit(1); } main();