test(ultraplan-local): add plan-determinism + review-determinism synthetic fixtures (SC7 floor)
Adds 6 files in tests/synthetic/ exercising the determinism pipeline at the SC7 brief floor (Jaccard >= 0.833). Plan fixture pair: 40 step titles each with 38 shared (Jaccard 0.905). Review fixture pair: 30 finding-IDs each with 28 shared (Jaccard 0.875). Reuses lib/parsers/jaccard.mjs + lib/parsers/finding-id.mjs. The new pair coexists with tests/lib/review-determinism.test.mjs which holds the older SC4 (0.70) floor against tests/fixtures/ultrareview/. The lower floor protects pipeline regressions; the higher floor anchors the speedup brief's determinism aspiration. [skip-docs]
This commit is contained in:
parent
b1738b419c
commit
0c0a87e709
6 changed files with 425 additions and 0 deletions
|
|
@ -0,0 +1,63 @@
|
|||
// tests/synthetic/plan-determinism.test.mjs
|
||||
// SC7 plan-determinism floor — Jaccard pipeline test.
|
||||
//
|
||||
// Reads two synthetic plan-run fixtures and asserts that
|
||||
// jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) >= 0.833.
|
||||
//
|
||||
// This exercises the determinism pipeline (parser + jaccard) on a known
|
||||
// input pair. It does NOT measure real-LLM determinism — that is deferred
|
||||
// to a future run of the pipeline against examples/01-add-verbose-flag/.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
||||
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = join(HERE, '..', '..');
|
||||
|
||||
const SC7_THRESHOLD = 0.833;
|
||||
|
||||
function loadSteps(rel) {
|
||||
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
||||
const doc = parseDocument(text);
|
||||
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
||||
const steps = doc.parsed.frontmatter && doc.parsed.frontmatter.steps;
|
||||
assert.ok(Array.isArray(steps), `frontmatter.steps of ${rel} is not an array`);
|
||||
return steps;
|
||||
}
|
||||
|
||||
test('plan determinism — Jaccard of synthetic plan-run-A vs plan-run-B meets SC7 threshold (0.833)', () => {
|
||||
const a = loadSteps('tests/synthetic/plan-run-A.md');
|
||||
const b = loadSteps('tests/synthetic/plan-run-B.md');
|
||||
const sim = jaccardSimilarity(a, b);
|
||||
assert.ok(
|
||||
sim >= SC7_THRESHOLD,
|
||||
`jaccardSimilarity(stepsTokens(planA), stepsTokens(planB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` +
|
||||
`Fixtures may have drifted — re-tune step titles to restore the overlap.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('plan determinism — both fixtures contain at least 30 unique step titles', () => {
|
||||
for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
|
||||
const steps = loadSteps(rel);
|
||||
assert.ok(
|
||||
new Set(steps).size >= 30,
|
||||
`${rel}: < 30 unique step titles (got ${new Set(steps).size}). Synthetic fixtures must reflect a substantial plan.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('plan determinism — no duplicate step titles within run', () => {
|
||||
for (const rel of ['tests/synthetic/plan-run-A.md', 'tests/synthetic/plan-run-B.md']) {
|
||||
const steps = loadSteps(rel);
|
||||
assert.strictEqual(
|
||||
new Set(steps).size,
|
||||
steps.length,
|
||||
`${rel}: contains duplicate step titles (${steps.length} entries vs ${new Set(steps).size} unique)`,
|
||||
);
|
||||
}
|
||||
});
|
||||
74
plugins/ultraplan-local/tests/synthetic/plan-run-A.md
Normal file
74
plugins/ultraplan-local/tests/synthetic/plan-run-A.md
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
---
|
||||
type: ultraplan-synthetic
|
||||
plan_version: "1.7"
|
||||
created: 2026-05-04
|
||||
task: "Add --verbose flag to CLI"
|
||||
slug: verbose-flag
|
||||
run_id: A
|
||||
steps:
|
||||
- "Add config entry for verbose flag in package.json"
|
||||
- "Define types for verbose mode in types.ts"
|
||||
- "Update parseArgs to recognize --verbose flag"
|
||||
- "Pass verbose context through main entry point"
|
||||
- "Add log level enum (silent, normal, verbose)"
|
||||
- "Wire log level into logger module"
|
||||
- "Replace console.log with logger.info in handler.ts"
|
||||
- "Add tests for parseArgs --verbose recognition"
|
||||
- "Add tests for log level enum mapping"
|
||||
- "Update README with --verbose flag documentation"
|
||||
- "Add CHANGELOG entry for verbose flag"
|
||||
- "Bump package.json minor version"
|
||||
- "Add lint rule blocking direct console usage"
|
||||
- "Run lint and fix new violations"
|
||||
- "Add CLI integration test for --verbose end-to-end"
|
||||
- "Add fixture file for verbose log capture"
|
||||
- "Document verbose output format in docs/cli.md"
|
||||
- "Add jsdoc for new logger API"
|
||||
- "Verify all existing tests pass with verbose disabled"
|
||||
- "Add backward-compat test for legacy quiet behavior"
|
||||
- "Add edge-case test for repeated --verbose flags"
|
||||
- "Add edge-case test for --verbose with --silent collision"
|
||||
- "Update help text to list --verbose flag"
|
||||
- "Add usage example to docs/quickstart.md"
|
||||
- "Verify CI matrix runs on Node 18 and 20"
|
||||
- "Add npm script for verbose mode debugging"
|
||||
- "Run security audit on logger dependency tree"
|
||||
- "Verify no PII leaks in verbose log output"
|
||||
- "Add manual test checklist to CONTRIBUTING.md"
|
||||
- "Update .gitignore for verbose log dump files"
|
||||
- "Add cleanup logic for stale verbose logs"
|
||||
- "Add unit test for cleanup logic"
|
||||
- "Verify exit code on verbose mode error"
|
||||
- "Add stderr routing for warnings in verbose"
|
||||
- "Add timestamp prefix in verbose log lines"
|
||||
- "Add test for timestamp format"
|
||||
- "Update troubleshooting guide with verbose flag"
|
||||
- "Verify version sync across all docs"
|
||||
- "Add benchmark for verbose log emission cost"
|
||||
- "Document benchmark methodology in PERF.md"
|
||||
---
|
||||
|
||||
# Synthetic plan run A — Add --verbose flag to CLI
|
||||
|
||||
This fixture represents one synthesized run of `/ultraplan-local` against a
|
||||
hand-calibrated brief. It is paired with `plan-run-B.md` for the
|
||||
`plan-determinism.test.mjs` Jaccard floor (≥ 0.833).
|
||||
|
||||
## How this fixture is used
|
||||
|
||||
`tests/synthetic/plan-determinism.test.mjs` reads the `steps` array from this
|
||||
file's frontmatter and computes `jaccardSimilarity(stepsA, stepsB)`. The test
|
||||
asserts the similarity is at or above the SC7 brief threshold (0.833).
|
||||
|
||||
This is a SYNTHETIC fixture — it is NOT the output of a real LLM run. The
|
||||
purpose is to exercise the determinism pipeline (parser + jaccard) on a known
|
||||
input pair so regressions in the pipeline are caught even when LLM
|
||||
determinism cannot be cheaply re-measured.
|
||||
|
||||
## Fixture math
|
||||
|
||||
- A has 40 unique step titles
|
||||
- B has 40 unique step titles
|
||||
- Intersection (shared titles): 38
|
||||
- Union: 42
|
||||
- Jaccard: 38/42 ≈ 0.9047 (well above 0.833 floor)
|
||||
77
plugins/ultraplan-local/tests/synthetic/plan-run-B.md
Normal file
77
plugins/ultraplan-local/tests/synthetic/plan-run-B.md
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
---
|
||||
type: ultraplan-synthetic
|
||||
plan_version: "1.7"
|
||||
created: 2026-05-04
|
||||
task: "Add --verbose flag to CLI"
|
||||
slug: verbose-flag
|
||||
run_id: B
|
||||
steps:
|
||||
- "Add config entry for verbose flag in package.json"
|
||||
- "Define types for verbose mode in types.ts"
|
||||
- "Update parseArgs to recognize --verbose flag"
|
||||
- "Pass verbose context through main entry point"
|
||||
- "Add log level enum (silent, normal, verbose)"
|
||||
- "Wire log level into logger module"
|
||||
- "Replace console.log with logger.info in handler.ts"
|
||||
- "Add tests for parseArgs --verbose recognition"
|
||||
- "Add tests for log level enum mapping"
|
||||
- "Update README with --verbose flag documentation"
|
||||
- "Add CHANGELOG entry for verbose flag"
|
||||
- "Bump package.json minor version"
|
||||
- "Add lint rule blocking direct console usage"
|
||||
- "Run lint and fix new violations"
|
||||
- "Add CLI integration test for --verbose end-to-end"
|
||||
- "Add fixture file for verbose log capture"
|
||||
- "Document verbose output format in docs/cli.md"
|
||||
- "Add jsdoc for new logger API"
|
||||
- "Verify all existing tests pass with verbose disabled"
|
||||
- "Add backward-compat test for legacy quiet behavior"
|
||||
- "Add edge-case test for repeated --verbose flags"
|
||||
- "Add edge-case test for --verbose with --silent collision"
|
||||
- "Update help text to list --verbose flag"
|
||||
- "Add usage example to docs/quickstart.md"
|
||||
- "Verify CI matrix runs on Node 18 and 20"
|
||||
- "Add npm script for verbose mode debugging"
|
||||
- "Run security audit on logger dependency tree"
|
||||
- "Verify no PII leaks in verbose log output"
|
||||
- "Add manual test checklist to CONTRIBUTING.md"
|
||||
- "Update .gitignore for verbose log dump files"
|
||||
- "Add cleanup logic for stale verbose logs"
|
||||
- "Add unit test for cleanup logic"
|
||||
- "Verify exit code on verbose mode error"
|
||||
- "Add stderr routing for warnings in verbose"
|
||||
- "Add timestamp prefix in verbose log lines"
|
||||
- "Add test for timestamp format"
|
||||
- "Update troubleshooting guide with verbose flag"
|
||||
- "Verify version sync across all docs"
|
||||
- "Add benchmark for verbose log capture overhead"
|
||||
- "Document overhead methodology in PERF.md"
|
||||
---
|
||||
|
||||
# Synthetic plan run B — Add --verbose flag to CLI
|
||||
|
||||
This fixture represents a second synthesized run of `/ultraplan-local` against
|
||||
the same hand-calibrated brief used for `plan-run-A.md`. The two runs differ
|
||||
on 2 step titles (modeling realistic LLM variation).
|
||||
|
||||
## How this fixture is used
|
||||
|
||||
See `plan-run-A.md` for the determinism contract.
|
||||
|
||||
## Fixture math
|
||||
|
||||
- A has 40 unique step titles
|
||||
- B has 40 unique step titles
|
||||
- Intersection (shared titles): 38
|
||||
- Union: 42
|
||||
- Jaccard: 38/42 ≈ 0.9047 (well above 0.833 floor)
|
||||
|
||||
## Differences from run A
|
||||
|
||||
- A includes "Add benchmark for verbose log emission cost" → B replaces with
|
||||
"Add benchmark for verbose log capture overhead"
|
||||
- A includes "Document benchmark methodology in PERF.md" → B replaces with
|
||||
"Document overhead methodology in PERF.md"
|
||||
|
||||
These represent the kind of paraphrase variation a stochastic planner may
|
||||
produce on consecutive runs against an identical brief.
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
// tests/synthetic/review-determinism.test.mjs
|
||||
// SC7 review-determinism floor — Jaccard pipeline test.
|
||||
//
|
||||
// Reads two synthetic review-run fixtures and asserts that
|
||||
// jaccardSimilarity(findingTokens(reviewA), findingTokens(reviewB)) >= 0.833.
|
||||
//
|
||||
// This is the SC7 (higher) floor. The companion
|
||||
// tests/lib/review-determinism.test.mjs holds the SC4 (0.70) floor against
|
||||
// tests/fixtures/ultrareview/. Both pairs coexist on purpose: the lower
|
||||
// floor protects against pipeline regressions, the higher one anchors the
|
||||
// determinism aspiration set in the speedup brief.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { jaccardSimilarity } from '../../lib/parsers/jaccard.mjs';
|
||||
import { parseFindingId } from '../../lib/parsers/finding-id.mjs';
|
||||
import { parseDocument } from '../../lib/util/frontmatter.mjs';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = join(HERE, '..', '..');
|
||||
|
||||
const SC7_THRESHOLD = 0.833;
|
||||
|
||||
function loadFindings(rel) {
|
||||
const text = readFileSync(join(ROOT, rel), 'utf-8');
|
||||
const doc = parseDocument(text);
|
||||
assert.ok(doc.valid, `frontmatter of ${rel} did not parse: ${(doc.errors || []).map(e => e.message).join(', ')}`);
|
||||
const findings = doc.parsed.frontmatter && doc.parsed.frontmatter.findings;
|
||||
assert.ok(Array.isArray(findings), `frontmatter.findings of ${rel} is not an array`);
|
||||
return findings;
|
||||
}
|
||||
|
||||
test('review determinism — Jaccard of synthetic review-run-A vs review-run-B meets SC7 threshold (0.833)', () => {
|
||||
const a = loadFindings('tests/synthetic/review-run-A.md');
|
||||
const b = loadFindings('tests/synthetic/review-run-B.md');
|
||||
const sim = jaccardSimilarity(a, b);
|
||||
assert.ok(
|
||||
sim >= SC7_THRESHOLD,
|
||||
`jaccardSimilarity(findingTokens(reviewA), findingTokens(reviewB)) = ${sim} < ${SC7_THRESHOLD} (SC7 floor). ` +
|
||||
`Fixtures may have drifted — recompute IDs via lib/parsers/finding-id.mjs.`,
|
||||
);
|
||||
});
|
||||
|
||||
test('review determinism — finding IDs are 40-char hex (parseFindingId valid)', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
for (const id of findings) {
|
||||
const parsed = parseFindingId(id);
|
||||
assert.ok(
|
||||
parsed.valid,
|
||||
`${rel}: ID ${JSON.stringify(id)} is not a 40-char lowercase hex string (parseFindingId rejected it)`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('review determinism — both fixtures contain at least 25 unique finding-IDs', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
assert.ok(
|
||||
new Set(findings).size >= 25,
|
||||
`${rel}: < 25 unique finding-IDs (got ${new Set(findings).size}). Synthetic fixtures must reflect a substantial review.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('review determinism — no duplicate IDs within run', () => {
|
||||
for (const rel of ['tests/synthetic/review-run-A.md', 'tests/synthetic/review-run-B.md']) {
|
||||
const findings = loadFindings(rel);
|
||||
assert.strictEqual(
|
||||
new Set(findings).size,
|
||||
findings.length,
|
||||
`${rel}: contains duplicate finding-IDs (${findings.length} entries vs ${new Set(findings).size} unique)`,
|
||||
);
|
||||
}
|
||||
});
|
||||
69
plugins/ultraplan-local/tests/synthetic/review-run-A.md
Normal file
69
plugins/ultraplan-local/tests/synthetic/review-run-A.md
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
---
|
||||
type: ultrareview-synthetic
|
||||
review_version: "1.0"
|
||||
created: 2026-05-04
|
||||
task: "Add JWT authentication with refresh-token rotation"
|
||||
slug: jwt-auth-synthetic
|
||||
run_id: A
|
||||
verdict: WARN
|
||||
findings:
|
||||
- 44b18cf6b84fcb23ef1d52682504c2edeed24f66
|
||||
- f7e307a427154c2c15df4c63eaff6fd846e075a7
|
||||
- 31fa81fa5bf9b84c70864ee09aa8d087870c473a
|
||||
- bfc0e3a7c1a5b13dbdc6ed8325140100b02db45d
|
||||
- be76c6dba12bfd9073b1737de5813e316a158dc6
|
||||
- f0928545e7c1dc48796fe857138fab7f100ce8c7
|
||||
- 4189ba4236119184017fd26735bfb582706994e9
|
||||
- 46f07246ff17c013740c0726b7be9a65fff10c67
|
||||
- 5501c54bda4a39df17d66938f4a7fe872e365a0f
|
||||
- 0173116735f75aabab36ecec863cb429d2f30528
|
||||
- 8f7fc683dc78d3adea8d35221915839702869af0
|
||||
- ee986665d695ca46c9a7f0d5c38bab73e73450a9
|
||||
- d863b17426ddec54bf7624405f3b64e206a73ed7
|
||||
- 64ea0bbf43c44dbf0da53f25755e0112ce2eb08b
|
||||
- 6971113644b777a8c164dfd8473739b03d1796be
|
||||
- 65f6edb11fed982b921ff018bd0fb1dcd10a1703
|
||||
- 9133851cf557f5955301803479936733b296f125
|
||||
- ffb170a0d19e4afac6379e64d26485883267bea8
|
||||
- 89f990535da373f5e97a091e5bbbf47a777c13d6
|
||||
- 664d4ec53e90ef6d24525a85b8d4071bfb037da8
|
||||
- 137db625a1ee639698c9e095e25845ef25879599
|
||||
- 6e586f167fac4cd57dc8178ceb4ca265a37404dc
|
||||
- 24671775282593381af4a8fa77eb3f7a36f9f84e
|
||||
- 71dbed32baf440d94f0ccaa6a997a6922cee7679
|
||||
- 5de9b2b26d03590845183d42387fcb22007b3f5d
|
||||
- c9aca8c3a265e2f083d75ac6da3e6d67909091b9
|
||||
- 75f32c9d304b742af2a7bafc354ec3666e53c054
|
||||
- 6547dfd19035bc012a50c19f4321fcfc9535fec8
|
||||
- 7554bc48226406e85282c7daeaba75cc732f4b35
|
||||
- 4f48547385c2d343ee0994d825321e6e6b90c89d
|
||||
---
|
||||
|
||||
# Synthetic review run A — JWT authentication with refresh-token rotation
|
||||
|
||||
This fixture represents one synthesized run of `/ultrareview-local` on a
|
||||
hand-calibrated brief. It is paired with `review-run-B.md` for the
|
||||
`review-determinism.test.mjs` Jaccard floor (≥ 0.833).
|
||||
|
||||
## How this fixture is used
|
||||
|
||||
`tests/synthetic/review-determinism.test.mjs` reads the `findings` array from
|
||||
this file's frontmatter and computes
|
||||
`jaccardSimilarity(findingsA, findingsB)`. The test asserts the similarity is
|
||||
at or above the SC7 brief threshold (0.833).
|
||||
|
||||
This fixture is distinct from `tests/fixtures/ultrareview/review-run-A.md`,
|
||||
which feeds the existing `tests/lib/review-determinism.test.mjs` against the
|
||||
v1.0 SC4 floor (0.70). The synthetic pair pushes the floor higher per SC7.
|
||||
|
||||
## Fixture math
|
||||
|
||||
- A has 30 unique finding-IDs
|
||||
- B has 30 unique finding-IDs
|
||||
- Intersection (shared IDs): 28
|
||||
- Union: 32
|
||||
- Jaccard: 28/32 = 0.875 (above 0.833 floor)
|
||||
|
||||
Each ID is the SHA-1 of a synthetic `file:line:rule_key` triple per
|
||||
`lib/parsers/finding-id.mjs`. The shared 28 represent stable findings; the
|
||||
2 unique-per-side represent paraphrase variation in `file:line` anchoring.
|
||||
63
plugins/ultraplan-local/tests/synthetic/review-run-B.md
Normal file
63
plugins/ultraplan-local/tests/synthetic/review-run-B.md
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
---
|
||||
type: ultrareview-synthetic
|
||||
review_version: "1.0"
|
||||
created: 2026-05-04
|
||||
task: "Add JWT authentication with refresh-token rotation"
|
||||
slug: jwt-auth-synthetic
|
||||
run_id: B
|
||||
verdict: WARN
|
||||
findings:
|
||||
- 44b18cf6b84fcb23ef1d52682504c2edeed24f66
|
||||
- f7e307a427154c2c15df4c63eaff6fd846e075a7
|
||||
- 31fa81fa5bf9b84c70864ee09aa8d087870c473a
|
||||
- bfc0e3a7c1a5b13dbdc6ed8325140100b02db45d
|
||||
- be76c6dba12bfd9073b1737de5813e316a158dc6
|
||||
- f0928545e7c1dc48796fe857138fab7f100ce8c7
|
||||
- 4189ba4236119184017fd26735bfb582706994e9
|
||||
- 46f07246ff17c013740c0726b7be9a65fff10c67
|
||||
- 5501c54bda4a39df17d66938f4a7fe872e365a0f
|
||||
- 0173116735f75aabab36ecec863cb429d2f30528
|
||||
- 8f7fc683dc78d3adea8d35221915839702869af0
|
||||
- ee986665d695ca46c9a7f0d5c38bab73e73450a9
|
||||
- d863b17426ddec54bf7624405f3b64e206a73ed7
|
||||
- 64ea0bbf43c44dbf0da53f25755e0112ce2eb08b
|
||||
- 6971113644b777a8c164dfd8473739b03d1796be
|
||||
- 65f6edb11fed982b921ff018bd0fb1dcd10a1703
|
||||
- 9133851cf557f5955301803479936733b296f125
|
||||
- ffb170a0d19e4afac6379e64d26485883267bea8
|
||||
- 89f990535da373f5e97a091e5bbbf47a777c13d6
|
||||
- 664d4ec53e90ef6d24525a85b8d4071bfb037da8
|
||||
- 137db625a1ee639698c9e095e25845ef25879599
|
||||
- 6e586f167fac4cd57dc8178ceb4ca265a37404dc
|
||||
- 24671775282593381af4a8fa77eb3f7a36f9f84e
|
||||
- 71dbed32baf440d94f0ccaa6a997a6922cee7679
|
||||
- 5de9b2b26d03590845183d42387fcb22007b3f5d
|
||||
- c9aca8c3a265e2f083d75ac6da3e6d67909091b9
|
||||
- 75f32c9d304b742af2a7bafc354ec3666e53c054
|
||||
- 6547dfd19035bc012a50c19f4321fcfc9535fec8
|
||||
- a5fbe85476128bb67796ecf97a42065b6a0bf9c4
|
||||
- 19ec9d34e1d6560b56f885a5a12ce491354c4b40
|
||||
---
|
||||
|
||||
# Synthetic review run B — JWT authentication with refresh-token rotation
|
||||
|
||||
Companion to `review-run-A.md`. See run A's body for the determinism
|
||||
contract.
|
||||
|
||||
## Fixture math
|
||||
|
||||
- A has 30 unique finding-IDs
|
||||
- B has 30 unique finding-IDs
|
||||
- Intersection (shared IDs): 28
|
||||
- Union: 32
|
||||
- Jaccard: 28/32 = 0.875 (above 0.833 floor)
|
||||
|
||||
## Differences from run A
|
||||
|
||||
- A's last 2 IDs come from `src/auth/jwt.ts:201:rule-1` and
|
||||
`src/auth/refresh.ts:55:rule-3`
|
||||
- B's last 2 IDs come from `src/auth/jwt.ts:202:rule-1` and
|
||||
`src/auth/refresh.ts:56:rule-3`
|
||||
|
||||
The off-by-one line anchoring models realistic post-edit drift between two
|
||||
review runs against subtly different working trees.
|
||||
Loading…
Add table
Add a link
Reference in a new issue