Cold full-brief re-review (S10) reached a class the S7->S9 algorithm-stat lens never did: BLOCKER — post-feedback-monitor published as Haiku in four surfaces (README:259, skills/linkedin-studio:159 with wrong color Green too, skills/linkedin-analytics:41, agents-capability-matrix:20) while agents/post-feedback-monitor.md runs Opus. v4.0.0's Opus promotion never reached the user-facing tables. Synced all to Opus/Lime. Refreshed agents-capability-matrix.md (frozen at the v2.0 14-agent era): header 14->19, +5 missing longform agents, tier counts Opus 2->8 / Haiku 1->0, longform-gate diagram updated to the real 8-Opus-agent chain. MAJOR — de-branded docs/plan-fullspektrum-innholdsmotor.md:70 (model brand + jan-2026 asserted as fact -> no-name/no-month relevance-model phrasing). It was the only tracked survivor; the rest live in gitignored ROADMAP.md / .claude/research/ (not shipped, out of honesty scope). META — added Section 10 model-consistency guard (scripts/check-model-consistency.mjs): each agents/*.md model: must match every surface declaration AND the canonical rosters must list all 19 agents. Permanent non-vacuity self-test + e2e mutation-proven. Pre-patch sweep confirmed post-feedback-monitor was the sole drifted agent (89 model rows, 0 other mismatches). test-runner.sh 68/0/0, node --test 94/94. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
174 lines
7.1 KiB
JavaScript
174 lines
7.1 KiB
JavaScript
#!/usr/bin/env node
|
|
// Agent Model-Consistency guard (remediation S11).
|
|
//
|
|
// The source of truth for an agent's model is its own frontmatter
|
|
// (`agents/<name>.md` → `model:`). Every user-facing surface that DECLARES an
|
|
// agent's model in a table — README.md, CLAUDE.md, docs/agents-capability-matrix.md,
|
|
// and the skills/*/SKILL.md rosters — must declare that same model. The v4.0.0
|
|
// Opus promotion of `post-feedback-monitor` reached the agent frontmatter and
|
|
// CLAUDE.md but NOT README/SKILL/matrix, so the README publicly stated a false
|
|
// fact (Haiku) about a shipped Opus agent. The structure lint had version/count/
|
|
// stat guards but no per-agent model-consistency guard, so nothing failed on it.
|
|
// This closes that meta-gap: agent-model drift now fails the same suite that
|
|
// defines the registration contract.
|
|
//
|
|
// Two checks:
|
|
// 1. MODEL-CORRECTNESS — every agent row in ANY model-table surface (canonical
|
|
// rosters + the curated domain SKILLs) must declare the frontmatter model.
|
|
// 2. ROSTER-COMPLETENESS — the canonical complete-roster surfaces must mention
|
|
// EVERY agent (this is what catches the matrix frozen at "14 specialized
|
|
// agents"; the domain SKILLs are deliberately curated subsets and are
|
|
// exempt from completeness, checked for correctness only).
|
|
//
|
|
// A permanent non-vacuity self-test runs BEFORE the real scan on every
|
|
// invocation (mirrors Section 8's STALE_STATS self-test): a checker that cannot
|
|
// catch a deliberately-mismatched probe — or that false-flags a correct one —
|
|
// is not enforcing the criterion, so it fails the suite instead of silently
|
|
// certifying nothing. This is the S7→S10 lesson (a proof run once by hand and
|
|
// never committed lets a survivor slip) applied to the model axis.
|
|
//
|
|
// Zero dependencies (node:fs only). bash 3.2-safe caller: invoked from
|
|
// scripts/test-runner.sh Section 10, exit code mapped to pass/fail.
|
|
|
|
import { readdirSync, readFileSync, existsSync } from "node:fs";
|
|
import { fileURLToPath } from "node:url";
|
|
import { dirname, join } from "node:path";
|
|
|
|
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..");
|
|
const MODEL_RE = /\b(opus|sonnet|haiku)\b/i;
|
|
|
|
// Canonical surfaces that MUST list every agent (complete rosters).
|
|
const CANONICAL = [
|
|
"README.md",
|
|
"CLAUDE.md",
|
|
"docs/agents-capability-matrix.md",
|
|
"skills/linkedin-studio/SKILL.md",
|
|
];
|
|
// Additional surfaces that declare some agent models but are curated subsets
|
|
// (per-domain SKILLs). Checked for model-correctness only, not completeness.
|
|
const SUBSET = [
|
|
"skills/linkedin-analytics/SKILL.md",
|
|
"skills/linkedin-content-creation/SKILL.md",
|
|
"skills/linkedin-networking/SKILL.md",
|
|
"skills/linkedin-strategy/SKILL.md",
|
|
"skills/linkedin-voice/SKILL.md",
|
|
];
|
|
|
|
// --- Truth from agent frontmatter ---
|
|
function loadTruth() {
|
|
const truth = {};
|
|
for (const f of readdirSync(join(ROOT, "agents")).filter((x) => x.endsWith(".md"))) {
|
|
const fm = readFileSync(join(ROOT, "agents", f), "utf8").split(/^---$/m)[1] || "";
|
|
const model = ((fm.match(/^model:\s*(.+)$/m) || [])[1] || "").trim().toLowerCase();
|
|
truth[f.replace(/\.md$/, "")] = model;
|
|
}
|
|
return truth;
|
|
}
|
|
|
|
// --- Core, testable primitives (exercised by the self-test on synthetic input) ---
|
|
|
|
// Every table row that names an agent AND carries a short model cell must match.
|
|
function modelMismatches(text, truth) {
|
|
const names = Object.keys(truth);
|
|
const out = [];
|
|
text.split("\n").forEach((ln, i) => {
|
|
if (!ln.includes("|")) return;
|
|
const cells = ln.split("|").map((c) => c.trim());
|
|
const nameCell = cells.find((c) => names.includes(c.replace(/[`*]/g, "").trim()));
|
|
if (!nameCell) return;
|
|
const agent = nameCell.replace(/[`*]/g, "").trim();
|
|
const modelCell = cells.find((c) => MODEL_RE.test(c) && c.length < 12);
|
|
if (!modelCell) return;
|
|
const declared = (modelCell.match(MODEL_RE) || [])[1].toLowerCase();
|
|
if (declared !== truth[agent]) {
|
|
out.push({ line: i + 1, agent, declared, truth: truth[agent] });
|
|
}
|
|
});
|
|
return out;
|
|
}
|
|
|
|
// Agents not mentioned anywhere in a canonical surface (word-boundaried so
|
|
// `content-reviewer` does not satisfy `content-repurposer`).
|
|
function missingAgents(text, names) {
|
|
return names.filter((n) => !new RegExp("(^|[^a-z-])" + n + "([^a-z-]|$)").test(text));
|
|
}
|
|
|
|
// --- Permanent non-vacuity self-test (runs every invocation, before the scan) ---
|
|
function selfTest(truth) {
|
|
const names = Object.keys(truth);
|
|
const a = names[0];
|
|
const cap = (m) => m.charAt(0).toUpperCase() + m.slice(1);
|
|
const other = truth[a] === "opus" ? "sonnet" : "opus";
|
|
const failures = [];
|
|
|
|
// POSITIVE (correctness): a row with the WRONG model must be flagged.
|
|
const wrongRow = `| \`${a}\` | ${cap(other)} | Lime | desc |`;
|
|
if (modelMismatches(wrongRow, truth).length !== 1) {
|
|
failures.push("model mismatch probe not caught");
|
|
}
|
|
// NEGATIVE (correctness): a row with the CORRECT model must NOT be flagged.
|
|
const rightRow = `| \`${a}\` | ${cap(truth[a])} | Lime | desc |`;
|
|
if (modelMismatches(rightRow, truth).length !== 0) {
|
|
failures.push("correct model probe false-flagged");
|
|
}
|
|
// POSITIVE (completeness): a roster missing one agent must be flagged.
|
|
const rosterMissing = names.slice(1).map((n) => `\`${n}\``).join(" ");
|
|
if (!missingAgents(rosterMissing, names).includes(a)) {
|
|
failures.push("missing-agent probe not caught");
|
|
}
|
|
// NEGATIVE (completeness): a full roster must NOT be flagged.
|
|
const fullRoster = names.map((n) => `\`${n}\``).join(" ");
|
|
if (missingAgents(fullRoster, names).length !== 0) {
|
|
failures.push("full roster false-flagged");
|
|
}
|
|
return failures;
|
|
}
|
|
|
|
// --- Main ---
|
|
function main() {
|
|
const truth = loadTruth();
|
|
const names = Object.keys(truth);
|
|
|
|
const stFailures = selfTest(truth);
|
|
if (stFailures.length > 0) {
|
|
console.log("SELFTEST FAIL — model-consistency guard is not enforcing the criterion:");
|
|
stFailures.forEach((f) => console.log(" - " + f));
|
|
process.exit(1);
|
|
}
|
|
console.log(
|
|
`self-test OK: model-mismatch + missing-agent probes caught, correct probes ignored (truth = ${names.length} agents)`,
|
|
);
|
|
|
|
const problems = [];
|
|
|
|
// Check 1: model-correctness across every model-table surface.
|
|
for (const rel of [...CANONICAL, ...SUBSET]) {
|
|
if (!existsSync(join(ROOT, rel))) continue;
|
|
const text = readFileSync(join(ROOT, rel), "utf8");
|
|
for (const m of modelMismatches(text, truth)) {
|
|
problems.push(`${rel}:${m.line} ${m.agent} declared=${m.declared} but frontmatter=${m.truth}`);
|
|
}
|
|
}
|
|
|
|
// Check 2: roster-completeness on the canonical surfaces.
|
|
for (const rel of CANONICAL) {
|
|
if (!existsSync(join(ROOT, rel))) {
|
|
problems.push(`${rel} MISSING (canonical roster surface)`);
|
|
continue;
|
|
}
|
|
const missing = missingAgents(readFileSync(join(ROOT, rel), "utf8"), names);
|
|
if (missing.length > 0) {
|
|
problems.push(`${rel} does not list ${missing.length} agent(s): ${missing.join(", ")}`);
|
|
}
|
|
}
|
|
|
|
if (problems.length === 0) {
|
|
console.log(`model-consistency OK: ${names.length} agents, all surface declarations match frontmatter`);
|
|
process.exit(0);
|
|
}
|
|
console.log("model-consistency FAIL — agent model/roster drift:");
|
|
problems.forEach((p) => console.log(" ✗ " + p));
|
|
process.exit(1);
|
|
}
|
|
|
|
main();
|