feat(llm-security)!: v7.0.0 commit 6 — tests, docs, version bump
Final commit in the trustworthy-scoring series. Bundles verdict cutoff
alignment, the last suite of tests, and all documentation touch-points
that quote version numbers or describe v7.0.0 behaviour.
Verdict/band co-monotonicity
- `scanners/lib/severity.mjs` — verdict cutoffs moved from 61/21 to 65/15
so `BLOCK >= 65`, `WARNING >= 15` locks onto the v2 riskBand() boundaries.
Prevents "BLOCK / Medium band" contradictions under the v2 formula.
Scanner hardening (bug fixes from v7.0.0 testing)
- `scanners/entropy-scanner.mjs` — `policy_source` now uses
`existsSync('.llm-security/policy.json')` instead of value-based check.
Old heuristic always reported 'policy.json' because DEFAULT_POLICY now
carries an `entropy.thresholds` section.
- `scanners/lib/file-discovery.mjs` — `.sass` and GPU shader extensions
(`.glsl, .frag, .vert, .shader, .wgsl`) added to TEXT_EXTENSIONS. Without
this, shader files were invisible to file-discovery, so they were never
counted as skipped by the entropy-scanner extension filter.
Tests
- `tests/scanners/entropy-context.test.mjs` (new, 24 tests) — A. File-ext
skip (4), B. Line-level rules 11-17 (8), C. Policy overrides (3).
Fixtures generate 80-char base64 payloads at runtime via
`crypto.randomBytes` to dodge the plugin's own pre-edit credential hook
on the test source.
- `tests/lib/severity.test.mjs` — rewritten with v2 scoring table (70
tests total, was 52).
- `tests/lib/output.test.mjs:243` — "1 critical = score 80" under v2
(was 25 under v1).
- Full suite: 1485/1485 green (was 1461).
Docs
- `CHANGELOG.md` — v7.0.0 entry with BREAKING CHANGES section.
- `README.md` (plugin + marketplace root) — version badge, history table,
plugin-card version string, test count.
- `CLAUDE.md` — header version, "v7.0.0 — Trustworthy scoring" summary
paragraph at the top.
- `docs/security-hardening-guide.md` — new section 6 "Calibration & false
positives" documenting v2 formula, context-aware entropy scanner,
typosquat allowlist, and §6.4 tuning workflow. Existing "Recommended
baseline" section renumbered to §7.
Version bump
- `6.6.0 -> 7.0.0` across package.json, .claude-plugin/plugin.json,
scanners/ide-extension-scanner.mjs VERSION const, README badge,
CLAUDE.md header, marketplace root README card.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
915aca69e4
commit
6f86de937a
14 changed files with 515 additions and 85 deletions
|
|
@ -10,6 +10,8 @@
|
|||
// - OWASP LLM03 (Supply Chain — obfuscated dependencies)
|
||||
// - ToxicSkills research: evasion via base64-wrapped instructions
|
||||
|
||||
import { existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { readTextFile } from './lib/file-discovery.mjs';
|
||||
import { finding, scannerResult } from './lib/output.mjs';
|
||||
import { SEVERITY } from './lib/severity.mjs';
|
||||
|
|
@ -436,9 +438,15 @@ export async function scan(targetPath, discovery) {
|
|||
let filesScanned = 0;
|
||||
|
||||
// Load policy for this target and apply overrides to module-level state.
|
||||
// Best-effort — on any error we fall back to built-in defaults.
|
||||
// Best-effort — on any error we fall back to built-in defaults. Provenance
|
||||
// tracked via file-existence check, not by comparing merged values (defaults
|
||||
// always include an entropy section so a value-based check would always
|
||||
// report 'policy.json').
|
||||
let policySource = 'defaults';
|
||||
try {
|
||||
if (existsSync(join(targetPath, '.llm-security', 'policy.json'))) {
|
||||
policySource = 'policy.json';
|
||||
}
|
||||
const policy = loadPolicy(targetPath);
|
||||
const ent = policy?.entropy || {};
|
||||
THRESHOLDS = resolveThresholds(ent.thresholds);
|
||||
|
|
@ -449,19 +457,12 @@ export async function scan(targetPath, discovery) {
|
|||
.filter((e) => typeof e === 'string')
|
||||
.map((e) => e.toLowerCase()),
|
||||
);
|
||||
if (
|
||||
ent.thresholds ||
|
||||
(ent.suppress_line_patterns && ent.suppress_line_patterns.length > 0) ||
|
||||
(ent.suppress_paths && ent.suppress_paths.length > 0) ||
|
||||
(ent.suppress_extensions && ent.suppress_extensions.length > 0)
|
||||
) {
|
||||
policySource = 'policy.json';
|
||||
}
|
||||
} catch {
|
||||
THRESHOLDS = DEFAULT_THRESHOLDS;
|
||||
USER_SUPPRESS_LINE_PATTERNS = [];
|
||||
USER_SUPPRESS_PATHS = [];
|
||||
USER_SUPPRESS_EXTENSIONS = new Set();
|
||||
policySource = 'defaults';
|
||||
}
|
||||
|
||||
let filesSkippedByExtension = 0;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ import { scan as scanTaint } from './taint-tracer.mjs';
|
|||
import { scan as scanMemoryPoisoning } from './memory-poisoning-scanner.mjs';
|
||||
import { scan as scanSupplyChain } from './supply-chain-recheck.mjs';
|
||||
|
||||
const VERSION = '6.6.0';
|
||||
const VERSION = '7.0.0';
|
||||
const SCANNER = 'IDE';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ const TEXT_EXTENSIONS = new Set([
|
|||
'.env', '.env.local', '.env.example',
|
||||
'.cfg', '.ini', '.conf',
|
||||
'.xml', '.html', '.htm', '.svg',
|
||||
'.css', '.scss', '.less',
|
||||
'.css', '.scss', '.sass', '.less',
|
||||
'.glsl', '.frag', '.vert', '.shader', '.wgsl', // GPU shader source
|
||||
'.sql',
|
||||
'.rs', '.go', '.java', '.kt', '.cs', '.c', '.cpp', '.h', '.hpp',
|
||||
'.rb', '.php', '.lua', '.swift', '.m',
|
||||
|
|
|
|||
|
|
@ -63,15 +63,18 @@ export function riskScoreV1(counts) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Derive verdict from severity counts and risk score.
|
||||
* BLOCK if Critical >= 1 OR score >= 61. WARNING if High >= 1 OR score >= 21. Otherwise ALLOW.
|
||||
* Derive verdict from severity counts and risk score (v7.0.0 thresholds).
|
||||
* Aligned to v2 riskBand cutoffs so verdict and band are co-monotonic:
|
||||
* BLOCK if critical >= 1 OR score >= 65 (Critical/Extreme band)
|
||||
* WARNING if high >= 1 OR score >= 15 (Medium/High band)
|
||||
* ALLOW otherwise (Low band)
|
||||
* @param {{ critical: number, high: number, medium: number, low: number, info: number }} counts
|
||||
* @returns {'BLOCK' | 'WARNING' | 'ALLOW'}
|
||||
*/
|
||||
export function verdict(counts) {
|
||||
const score = riskScore(counts);
|
||||
if ((counts.critical || 0) >= 1 || score >= 61) return 'BLOCK';
|
||||
if ((counts.high || 0) >= 1 || score >= 21) return 'WARNING';
|
||||
if ((counts.critical || 0) >= 1 || score >= 65) return 'BLOCK';
|
||||
if ((counts.high || 0) >= 1 || score >= 15) return 'WARNING';
|
||||
return 'ALLOW';
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue