diff --git a/plugins/llm-security/.claude-plugin/plugin.json b/plugins/llm-security/.claude-plugin/plugin.json index 54ad24f..c0d6c15 100644 --- a/plugins/llm-security/.claude-plugin/plugin.json +++ b/plugins/llm-security/.claude-plugin/plugin.json @@ -1,5 +1,5 @@ { "name": "llm-security", "description": "Security scanning, auditing, and threat modeling for Claude Code projects. Detects secrets, validates MCP servers, assesses security posture, and generates threat models aligned with OWASP LLM Top 10.", - "version": "6.0.0" + "version": "6.1.0" } diff --git a/plugins/llm-security/.npmignore b/plugins/llm-security/.npmignore new file mode 100644 index 0000000..deac011 --- /dev/null +++ b/plugins/llm-security/.npmignore @@ -0,0 +1,27 @@ +tests/ +scripts/ +examples/ +.claude/ +.claude-plugin/ +agents/ +commands/ +hooks/ +knowledge/ +templates/ +test-fixtures/ +reports/ +ci/ +docs/ +*.local.md +REMEMBER.md +TODO.md +ROADMAP.md +CHANGELOG.md +CLAUDE.md +SECURITY.md +V3-ANNOUNCEMENT.md +V3-UPGRADE.md +.editorconfig +.llm-security-ignore +.orphaned_at +.DS_Store diff --git a/plugins/llm-security/CHANGELOG.md b/plugins/llm-security/CHANGELOG.md index 26b3de8..9afcf81 100644 --- a/plugins/llm-security/CHANGELOG.md +++ b/plugins/llm-security/CHANGELOG.md @@ -4,6 +4,19 @@ All notable changes to the LLM Security Plugin are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [6.1.0] - 2026-04-10 + +### Added +- **`--fail-on ` flag** — CI-friendly exit codes: exit 1 when any finding at or above the specified severity exists (critical/high/medium/low). Configurable via `policy.json` `ci.failOn` +- **`--compact` output mode** — One-liner per finding format (`[SEVERITY] scanner: title (file:line)`), reduces CI log noise. Configurable via `policy.json` `ci.compact` +- **CI/CD pipeline templates** — Ready-to-use templates in `ci/`: GitHub Actions (`github-action.yml`), Azure DevOps (`azure-pipelines.yml`), GitLab CI (`gitlab-ci.yml`) with SARIF upload, Node 18 setup +- **CI/CD integration guide** — `docs/ci-cd-guide.md` with 5-minute setup per platform, Schrems II/NSM compliance documentation, exit code reference +- **npm publish preparation** — `files` whitelist in `package.json` (only `bin/` + `scanners/`), `.npmignore` safety net, `homepage` field +- **Policy `ci` section** — New `ci: { failOn, compact }` section in `.llm-security/policy.json` for distributable CI configuration + +### Changed +- Version bump: 6.0.0 → 6.1.0 across all files + ## [6.0.0] - 2026-04-10 ### Added diff --git a/plugins/llm-security/CLAUDE.md b/plugins/llm-security/CLAUDE.md index c48f62e..8bf8dad 100644 --- a/plugins/llm-security/CLAUDE.md +++ b/plugins/llm-security/CLAUDE.md @@ -1,6 +1,6 @@ -# LLM Security Plugin (v6.0.0) +# LLM Security Plugin (v6.1.0) -Security scanning, auditing, and threat modeling for Claude Code projects. 5 frameworks: OWASP LLM Top 10, Agentic AI Top 10 (ASI), Skills Top 10 (AST), MCP Top 10, AI Agent Traps (DeepMind). 1242+ tests. +Security scanning, auditing, and threat modeling for Claude Code projects. 5 frameworks: OWASP LLM Top 10, Agentic AI Top 10 (ASI), Skills Top 10 (AST), MCP Top 10, AI Agent Traps (DeepMind). 1264 tests. ## Commands @@ -67,7 +67,8 @@ Post-clone: size check (100MB max), cleanup guarantee (temp dir + evidence file ## Scanners -**Orchestrated (10):** Run via `node scanners/scan-orchestrator.mjs [--output-file ] [--baseline] [--save-baseline]`. +**Orchestrated (10):** Run via `node scanners/scan-orchestrator.mjs [--fail-on ] [--compact] [--output-file ] [--baseline] [--save-baseline]`. +`--fail-on `: exit 1 if findings at/above severity, exit 0 otherwise. `--compact`: one-liner per finding format. Both configurable via `policy.json` `ci` section. With `--output-file`: full JSON to file, compact aggregate to stdout. `--baseline` diffs against stored baseline. `--save-baseline` saves results for future diffs. Baselines stored in `reports/baselines/.json`. 10 scanners: unicode, entropy, permission, dep-audit, taint, git-forensics, network, memory-poisoning, supply-chain-recheck, toxic-flow. @@ -79,7 +80,7 @@ Utility: `node scanners/lib/fs-utils.mjs [args] Lib: `sarif-formatter.mjs` — converts scan output to OASIS SARIF 2.1.0 format. Used by `--format sarif` flag. Lib: `audit-trail.mjs` — writes structured JSONL audit events (ISO 8601, OWASP tags, SIEM-ready). Env: `LLM_SECURITY_AUDIT_*`. -Lib: `policy-loader.mjs` — reads `.llm-security/policy.json` for distributable hook configuration. Defaults match hardcoded values. +Lib: `policy-loader.mjs` — reads `.llm-security/policy.json` for distributable hook configuration. Includes `ci` section (`failOn`, `compact`) for CI/CD defaults. Defaults match hardcoded values. **Standalone (7):** `posture-scanner.mjs` — deterministic posture assessment, 16 categories (incl. EU AI Act, NIST AI RMF, ISO 42001), <50ms. NOT in scan-orchestrator (meta-level, not code-level). Run: `node scanners/posture-scanner.mjs [path]` → JSON stdout. Scanner prefix: PST. Used by `/security posture` and `/security audit`. @@ -108,7 +109,13 @@ All commands total ~600 lines. All commands use registered subagent types. `bin/llm-security.mjs` — standalone CLI entry point. Works without Claude Code via `npx llm-security` or `node bin/llm-security.mjs`. Subcommands: `scan`, `deep-scan`, `posture`, `audit-bom`, `benchmark`. Dispatches to scanner scripts via `child_process.spawn`. -`package.json` `bin` field: `"llm-security": "./bin/llm-security.mjs"`. +`package.json` `bin` field: `"llm-security": "./bin/llm-security.mjs"`. `files` whitelist: only `bin/` + `scanners/` published to npm. + +## CI/CD Integration + +Pipeline templates in `ci/`: `github-action.yml`, `azure-pipelines.yml`, `gitlab-ci.yml`. Documentation: `docs/ci-cd-guide.md`. +All templates use `--fail-on high --format sarif --output-file results.sarif` with SARIF upload per platform. +Standalone CLI makes zero network calls (except opt-in OSV.dev in supply-chain-recheck). Fully Schrems II compatible. ## Knowledge Files (15) diff --git a/plugins/llm-security/README.md b/plugins/llm-security/README.md index efdf0c2..ac62be9 100644 --- a/plugins/llm-security/README.md +++ b/plugins/llm-security/README.md @@ -4,7 +4,7 @@ *Built for my own Claude Code workflow and shared openly for anyone who finds it useful. This is a solo project — bug reports and feature requests are welcome, but pull requests are not accepted.* -![Version](https://img.shields.io/badge/version-6.0.0-blue) +![Version](https://img.shields.io/badge/version-6.1.0-blue) ![Platform](https://img.shields.io/badge/platform-Claude_Code_Plugin-purple) ![Agents](https://img.shields.io/badge/agents-6-orange) ![Scanners](https://img.shields.io/badge/scanners-16-cyan) @@ -442,6 +442,7 @@ v6.0.0 adds an enterprise governance layer for standards-aware security operatio | **AI-BOM** | CycloneDX 1.6 Bill of Materials for AI components — models, MCP servers, plugins, knowledge files, hooks. `llm-security audit-bom `. | | **Policy-as-Code** | `.llm-security/policy.json` for distributable hook configuration. Teams can enforce consistent security thresholds without per-developer env var setup. | | **Standalone CLI** | `npx llm-security scan ` — runs scanners without Claude Code. Subcommands: `scan`, `posture`, `audit-bom`, `benchmark`. | +| **CI/CD Integration** | `--fail-on ` for threshold-based exit codes, `--compact` for one-liner output. Pipeline templates for GitHub Actions, Azure DevOps, GitLab CI in `ci/`. Guide: `docs/ci-cd-guide.md`. | ### Benchmarks @@ -756,6 +757,7 @@ This plugin provides full-stack security hardening (static analysis + supply cha | Version | Date | Highlights | |---------|------|------------| +| **6.1.0** | 2026-04-10 | **CI/CD integration.** `--fail-on ` flag for threshold-based exit codes (exit 1 if findings at/above level). `--compact` output mode (one-liner per finding). Policy `ci` section in `policy.json`. Pipeline templates: GitHub Actions, Azure DevOps, GitLab CI with SARIF upload. CI/CD guide (`docs/ci-cd-guide.md`) with Schrems II/NSM compliance docs. npm publish preparation (`files` whitelist). 1261+ tests. | | **6.0.0** | 2026-04-10 | **CAISS-readiness release.** Enterprise compliance and governance layer: compliance mapping (EU AI Act, NIST AI RMF, ISO 42001, MITRE ATLAS), Norwegian regulatory context (Datatilsynet, NSM, Digitaliseringsdirektoratet), SARIF 2.1.0 output format (`--format sarif`), structured JSONL audit trail (`audit-trail.mjs`), AI-BOM generator (CycloneDX 1.6), policy-as-code (`.llm-security/policy.json`), standalone CLI (`bin/llm-security.mjs` — `npx llm-security scan`). Posture scanner expanded to 16 categories (+EU AI Act, NIST AI RMF, ISO 42001). Attack simulator benchmark mode (`--benchmark`). 15 knowledge docs, 16 scanners, 1242+ tests. | | **5.1.0** | 2026-04-07 | **Sandboxed remote cloning.** Defense-in-depth for `git clone` attack surface: (1) 8 git config flags disable hooks, symlinks, filter/smudge drivers, fsmonitor, local file protocol; 4 env vars isolate from system/user config. (2) OS sandbox: macOS `sandbox-exec` + Linux `bubblewrap` restrict file writes to only the clone temp dir. Graceful fallback on Windows (git config only). Post-clone size check (100MB max). UUID-unique evidence filenames prevent race conditions. Cleanup guarantee in scan/plugin-audit commands. 1147 tests (was 1115). | | **5.0.0** | 2026-04-06 | **Prompt Injection Hardening (v5.0).** 8-session defense-in-depth overhaul driven by 7 research papers (2025-2026). MEDIUM advisory for obfuscation signals (leetspeak, homoglyphs, zero-width, multi-language). Unicode Tag steganography detection (U+E0000-E007F). Bash expansion normalization (`bash-normalize.mjs`). Rule of Two enforcement (configurable `LLM_SECURITY_TRIFECTA_MODE=block\|warn\|off`). 100-call long-horizon monitoring window with slow-burn trifecta detection. Behavioral drift via Jensen-Shannon divergence. HITL trap detection (approval urgency, summary suppression, scope minimization). Sub-agent delegation tracking (escalation-after-input advisory). NL indirection patterns. Hybrid attacks (P2SQL, recursive injection, XSS-in-agent). CaMeL-inspired data flow tagging (SHA-256 provenance, output-to-input linking). Adaptive red-team (5 mutation rounds per scenario: homoglyph, encoding, zero-width, case alternation, synonym). Knowledge base expanded: `prompt-injection-research-2025-2026.md`, `deepmind-agent-traps.md`, `attack-mutations.json`. Posture scanner expanded to 13 categories (+Prompt Injection Hardening, Rule of Two, Long-Horizon Monitoring). Defense Philosophy section documenting honest limitations. 1115 tests. | diff --git a/plugins/llm-security/bin/llm-security.mjs b/plugins/llm-security/bin/llm-security.mjs index 600c613..da807ee 100755 --- a/plugins/llm-security/bin/llm-security.mjs +++ b/plugins/llm-security/bin/llm-security.mjs @@ -17,9 +17,10 @@ const USAGE = `llm-security v${PKG.version} — AI security scanning for Claude Usage: llm-security [options] Commands: - scan [--format sarif] [--baseline] [--save-baseline] + scan [--fail-on ] [--compact] + [--format sarif] [--output-file ] [--baseline] [--save-baseline] Run deterministic deep-scan (10 scanners) - deep-scan [--format sarif] [--baseline] [--save-baseline] + deep-scan Alias for scan posture Quick security posture assessment (16 categories) diff --git a/plugins/llm-security/ci/azure-pipelines.yml b/plugins/llm-security/ci/azure-pipelines.yml new file mode 100644 index 0000000..b50e541 --- /dev/null +++ b/plugins/llm-security/ci/azure-pipelines.yml @@ -0,0 +1,46 @@ +# llm-security — Azure DevOps pipeline +# Deterministic security scanning for AI/LLM projects. +# No LLM calls. No data leaves your pipeline. Fully Schrems II compatible. +# +# See docs/ci-cd-guide.md for configuration options and detailed setup. +# +# Alternative (without npx): replace the scan script with: +# script: node bin/llm-security.mjs scan . --fail-on high --format sarif --output-file $(Build.ArtifactStagingDirectory)/results.sarif + +trigger: + branches: + include: + - main + +pool: + vmImage: ubuntu-latest + +steps: + - task: NodeTool@0 + displayName: Install Node.js 18 + inputs: + versionSpec: '18.x' + + - script: npx llm-security scan . --fail-on high --format sarif --output-file $(Build.ArtifactStagingDirectory)/results.sarif + displayName: Run llm-security scan + + - task: PublishBuildArtifacts@1 + condition: always() + displayName: Publish SARIF results + inputs: + pathToPublish: $(Build.ArtifactStagingDirectory)/results.sarif + artifactName: llm-security-scan + +# For Azure DevOps Advanced Security (if enabled): +# Replace PublishBuildArtifacts with: +# - task: AdvancedSecurity-Publish@1 +# condition: always() +# displayName: Publish to Advanced Security +# +# Configuration: +# --fail-on Exit 1 if findings at or above severity +# --compact One-liner per finding (reduced log noise) +# --format sarif OASIS SARIF 2.1.0 output +# +# Or configure via .llm-security/policy.json: +# { "ci": { "failOn": "high", "compact": true } } diff --git a/plugins/llm-security/ci/github-action.yml b/plugins/llm-security/ci/github-action.yml new file mode 100644 index 0000000..0ae3073 --- /dev/null +++ b/plugins/llm-security/ci/github-action.yml @@ -0,0 +1,47 @@ +# llm-security — GitHub Actions workflow +# Deterministic security scanning for AI/LLM projects. +# No LLM calls. No data leaves your pipeline. Fully Schrems II compatible. +# +# See docs/ci-cd-guide.md for configuration options and detailed setup. +# +# Alternative (without npx): replace the scan step with: +# run: node bin/llm-security.mjs scan . --fail-on high --format sarif --output-file results.sarif + +name: LLM Security Scan + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + security-scan: + runs-on: ubuntu-latest + permissions: + security-events: write # Required for SARIF upload + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '18' + + - name: Run llm-security scan + run: npx llm-security scan . --fail-on high --format sarif --output-file results.sarif + + - name: Upload SARIF to GitHub Advanced Security + uses: github/codeql-action/upload-sarif@v3 + if: always() + with: + sarif_file: results.sarif + +# Configuration: +# --fail-on Exit 1 if findings at or above severity +# --compact One-liner per finding (reduced log noise) +# --format sarif OASIS SARIF 2.1.0 output +# --output-file Write full results to file +# --baseline Diff against stored baseline +# +# Or configure via .llm-security/policy.json: +# { "ci": { "failOn": "high", "compact": true } } diff --git a/plugins/llm-security/ci/gitlab-ci.yml b/plugins/llm-security/ci/gitlab-ci.yml new file mode 100644 index 0000000..54f4339 --- /dev/null +++ b/plugins/llm-security/ci/gitlab-ci.yml @@ -0,0 +1,37 @@ +# llm-security — GitLab CI template +# Deterministic security scanning for AI/LLM projects. +# No LLM calls. No data leaves your pipeline. Fully Schrems II compatible. +# +# Include in your .gitlab-ci.yml: +# include: +# - local: ci/gitlab-ci.yml +# +# See docs/ci-cd-guide.md for configuration options and detailed setup. +# +# Alternative (without npx): replace the script with: +# script: node bin/llm-security.mjs scan . --fail-on high --format sarif --output-file results.sarif + +llm-security-scan: + image: node:18-alpine + stage: test + script: + - npx llm-security scan . --fail-on high --format sarif --output-file results.sarif + artifacts: + paths: + - results.sarif + reports: + sast: results.sarif + when: always + +# Notes: +# - GitLab SAST report parsing of SARIF requires GitLab Ultimate +# - The artifact is always available regardless of license tier +# - For GitLab Free/Premium, results are in the downloadable artifact only +# +# Configuration: +# --fail-on Exit 1 if findings at or above severity +# --compact One-liner per finding (reduced log noise) +# --format sarif OASIS SARIF 2.1.0 output +# +# Or configure via .llm-security/policy.json: +# { "ci": { "failOn": "high", "compact": true } } diff --git a/plugins/llm-security/docs/ci-cd-guide.md b/plugins/llm-security/docs/ci-cd-guide.md new file mode 100644 index 0000000..512e503 --- /dev/null +++ b/plugins/llm-security/docs/ci-cd-guide.md @@ -0,0 +1,165 @@ +# CI/CD Integration Guide + +Integrate llm-security into your CI/CD pipeline for automated security scanning of AI/LLM projects. The standalone CLI runs 10 deterministic Node.js scanners — no AI models, no external API calls, no data leaves your pipeline environment. + +## Data Sovereignty + +**The standalone CLI makes zero network calls by default.** All 10 scanners operate locally on your source code using Shannon entropy analysis, regex pattern matching, AST traversal, and git log parsing. No data is transmitted to any external service. + +**Exception: supply-chain-recheck** — When scanning lockfiles for known vulnerabilities, this scanner optionally queries the [OSV.dev](https://osv.dev/) batch API. This sends only package names and versions (not source code) over HTTPS. To disable: set `LLM_SECURITY_SCR_OFFLINE=1`. + +**What about Claude Code integration?** The Claude Code plugin (hooks, agents, commands) uses AI models and sends data to Anthropic. These components are **not included** in the standalone CLI. When you run `npx llm-security scan`, only deterministic scanners execute. + +### Schrems II / NSM Compliance + +- Standalone CLI: **fully compliant** — no cross-border data transfer +- OSV.dev queries (opt-in): sends package metadata to Google-operated API — evaluate per your organization's data classification +- Claude Code plugin: sends code context to Anthropic (US) — requires data processing agreement for regulated environments + +### Norwegian Regulatory Context + +- **NSM Grunnprinsipper:** Automated security scanning fulfills GP 3.1 (vulnerability management) and GP 2.4 (secure development) +- **Digitaliseringsdirektoratet:** Aligns with recommended practices for AI system development lifecycle security +- **EU AI Act (expected Aug 2026):** Directly supports Art. 9 (risk management) and Art. 15 (cybersecurity) requirements + +## 5-Minute Setup + +### GitHub Actions + +Copy `ci/github-action.yml` to `.github/workflows/llm-security.yml`: + +```yaml +name: LLM Security Scan +on: [push, pull_request] +jobs: + security-scan: + runs-on: ubuntu-latest + permissions: + security-events: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: '18' + - run: npx llm-security scan . --fail-on high --format sarif --output-file results.sarif + - uses: github/codeql-action/upload-sarif@v3 + if: always() + with: + sarif_file: results.sarif +``` + +SARIF results appear in the repository's **Security** tab under **Code scanning alerts**. + +### Azure DevOps + +Copy `ci/azure-pipelines.yml` to your pipeline, or include the scan step in an existing pipeline: + +```yaml +steps: + - task: NodeTool@0 + inputs: + versionSpec: '18.x' + - script: npx llm-security scan . --fail-on high --format sarif --output-file $(Build.ArtifactStagingDirectory)/results.sarif + displayName: Run llm-security scan + - task: PublishBuildArtifacts@1 + condition: always() + inputs: + pathToPublish: $(Build.ArtifactStagingDirectory)/results.sarif + artifactName: llm-security-scan +``` + +For Azure DevOps Advanced Security, replace `PublishBuildArtifacts@1` with `AdvancedSecurity-Publish@1`. + +### GitLab CI + +Add to `.gitlab-ci.yml`: + +```yaml +llm-security-scan: + image: node:18-alpine + stage: test + script: + - npx llm-security scan . --fail-on high --format sarif --output-file results.sarif + artifacts: + paths: + - results.sarif + reports: + sast: results.sarif + when: always +``` + +SAST report parsing requires GitLab Ultimate. On Free/Premium tiers, download the SARIF artifact manually. + +## Configuration + +### CLI Flags + +| Flag | Description | +|------|-------------| +| `--fail-on ` | Exit 1 if any finding at or above severity exists. Values: `critical`, `high`, `medium`, `low` | +| `--compact` | One-liner per finding format. Reduces CI log noise | +| `--format sarif` | Output OASIS SARIF 2.1.0 (default: JSON) | +| `--output-file ` | Write full results to file. Stdout gets compact aggregate | +| `--baseline` | Diff against stored baseline (show new/resolved findings) | +| `--save-baseline` | Save current results as baseline for future diffs | + +### Policy File + +Configure defaults in `.llm-security/policy.json`: + +```json +{ + "ci": { + "failOn": "high", + "compact": true + } +} +``` + +CLI flags always take precedence over policy file values. + +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `LLM_SECURITY_SCR_OFFLINE=1` | Disable OSV.dev network calls in supply-chain-recheck | +| `LLM_SECURITY_AUDIT_LOG=` | Write structured JSONL audit trail (SIEM-ready) | + +## Exit Codes + +| Code | Meaning | When | +|------|---------|------| +| `0` | Clean / below threshold | No findings at or above `--fail-on` level, or ALLOW verdict | +| `1` | Threshold exceeded | Findings at or above `--fail-on` level, or WARNING verdict (without `--fail-on`) | +| `2` | Block | BLOCK verdict (only without `--fail-on`) | + +With `--fail-on`, exit codes are binary: 0 (clean) or 1 (threshold exceeded). Without `--fail-on`, the legacy tri-state (0/1/2) is preserved. + +## What Gets Scanned + +The 10 deterministic scanners cover: + +| Scanner | Detects | +|---------|---------| +| Unicode | Zero-width characters, homoglyphs, Unicode Tag steganography | +| Entropy | High-entropy strings (potential secrets/tokens) | +| Permission | Overly broad permissions, missing tool justification | +| Dependency | Known vulnerable packages, typosquats | +| Taint | Untrusted input flows to sensitive operations | +| Git forensics | Force pushes, sensitive file history, author anomalies | +| Network | Suspicious URLs, exfiltration endpoints, C2 patterns | +| Memory poisoning | Injection patterns in CLAUDE.md, memory files, rules | +| Supply chain | Lockfile audit, blocklists, OSV.dev (opt-in) | +| Toxic flow | Lethal trifecta correlation (input + access + exfil) | + +## Local Testing + +Test the exact same command locally before adding to CI: + +```bash +# With npx (requires npm publish) +npx llm-security scan . --fail-on high --compact + +# With local clone +node bin/llm-security.mjs scan . --fail-on high --compact +``` diff --git a/plugins/llm-security/package.json b/plugins/llm-security/package.json index 080363e..cf73380 100644 --- a/plugins/llm-security/package.json +++ b/plugins/llm-security/package.json @@ -1,11 +1,18 @@ { "name": "llm-security", - "version": "6.0.0", + "version": "6.1.0", "description": "Security scanning, auditing, and threat modeling for Claude Code projects", "type": "module", "bin": { "llm-security": "./bin/llm-security.mjs" }, + "files": [ + "bin/", + "scanners/", + "LICENSE", + "README.md" + ], + "homepage": "https://git.fromaitochitta.com/open/claude-code-llm-security", "engines": { "node": ">=18" }, diff --git a/plugins/llm-security/scanners/lib/policy-loader.mjs b/plugins/llm-security/scanners/lib/policy-loader.mjs index 463c604..1cae6b1 100644 --- a/plugins/llm-security/scanners/lib/policy-loader.mjs +++ b/plugins/llm-security/scanners/lib/policy-loader.mjs @@ -46,6 +46,10 @@ const DEFAULT_POLICY = Object.freeze({ log_path: null, events: ['trifecta', 'injection', 'secrets', 'destructive'], }, + ci: { + failOn: null, + compact: false, + }, }); // Cache loaded policy per project root diff --git a/plugins/llm-security/scanners/scan-orchestrator.mjs b/plugins/llm-security/scanners/scan-orchestrator.mjs index c4028e7..8a4109a 100644 --- a/plugins/llm-security/scanners/scan-orchestrator.mjs +++ b/plugins/llm-security/scanners/scan-orchestrator.mjs @@ -12,6 +12,9 @@ import { discoverFiles } from './lib/file-discovery.mjs'; import { envelope, resetCounter } from './lib/output.mjs'; import { saveBaseline, diffAgainstBaseline, extractFindings } from './lib/diff-engine.mjs'; import { toSARIF } from './lib/sarif-formatter.mjs'; +import { loadPolicy } from './lib/policy-loader.mjs'; + +const FAIL_ON_LEVELS = ['critical', 'high', 'medium', 'low']; // --------------------------------------------------------------------------- // .llm-security-ignore support @@ -123,7 +126,7 @@ const SCANNERS = [ // CLI arg parsing — supports --log-file // --------------------------------------------------------------------------- function parseArgs(argv) { - const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false, format: 'json' }; + const args = { target: null, logFile: null, outputFile: null, baseline: false, saveBaseline: false, format: 'json', failOn: null, compact: false }; for (let i = 2; i < argv.length; i++) { if (argv[i] === '--log-file' && argv[i + 1]) { args.logFile = argv[++i]; @@ -135,6 +138,10 @@ function parseArgs(argv) { args.baseline = true; } else if (argv[i] === '--save-baseline') { args.saveBaseline = true; + } else if (argv[i] === '--fail-on' && argv[i + 1]) { + args.failOn = argv[++i].toLowerCase(); + } else if (argv[i] === '--compact') { + args.compact = true; } else if (!args.target) { args.target = argv[i]; } @@ -144,6 +151,25 @@ function parseArgs(argv) { async function main() { const args = parseArgs(process.argv); + + // Policy fallback for CI settings (CLI args take precedence) + try { + const policyRoot = args.target ? resolve(args.target) : process.cwd(); + const policy = loadPolicy(policyRoot); + if (args.failOn === null && policy.ci && policy.ci.failOn) { + args.failOn = policy.ci.failOn; + } + if (!args.compact && policy.ci && policy.ci.compact) { + args.compact = true; + } + } catch { /* policy loading is best-effort */ } + + // Validate --fail-on value + if (args.failOn !== null && !FAIL_ON_LEVELS.includes(args.failOn)) { + console.error(`--fail-on must be one of: ${FAIL_ON_LEVELS.join(', ')} (got: ${args.failOn})`); + process.exit(1); + } + if (!args.target) { console.error('Usage: node scan-orchestrator.mjs [--log-file ]'); process.exit(1); @@ -254,8 +280,28 @@ async function main() { if (args.outputFile) { writeFileSync(args.outputFile, jsonStr); output.output_file = args.outputFile; - // Stdout gets only the compact aggregate (keeps caller context small) + if (args.compact) { + for (const r of Object.values(results)) { + for (const f of r.findings) { + const loc = f.file ? ` (${f.file}${f.line ? ':' + f.line : ''})` : ''; + process.stderr.write(`[${f.severity.toUpperCase()}] ${f.scanner}: ${f.title}${loc}\n`); + } + } + } process.stdout.write(JSON.stringify({ aggregate: output.aggregate, output_file: args.outputFile }) + '\n'); + } else if (args.compact && args.format === 'json') { + for (const r of Object.values(results)) { + for (const f of r.findings) { + const loc = f.file ? ` (${f.file}${f.line ? ':' + f.line : ''})` : ''; + process.stdout.write(`[${f.severity.toUpperCase()}] ${f.scanner}: ${f.title}${loc}\n`); + } + } + const a = output.aggregate; + process.stdout.write( + `---\nVerdict: ${a.verdict} | Risk: ${a.risk_score}/100 | ` + + `Findings: ${a.total_findings} (${a.counts.critical}C ${a.counts.high}H ${a.counts.medium}M ${a.counts.low}L ${a.counts.info}I) | ` + + `Duration: ${totalDuration}ms\n` + ); } else { process.stdout.write(jsonStr); } @@ -271,11 +317,18 @@ async function main() { `[deep-scan] Duration: ${totalDuration}ms\n` ); - // Exit code based on verdict — use exitCode instead of exit() to allow - // stdout pipe buffers to drain fully (process.exit() truncates >64KB on macOS) - if (agg.verdict === 'BLOCK') process.exitCode = 2; - else if (agg.verdict === 'WARNING') process.exitCode = 1; - else process.exitCode = 0; + // Exit code — use exitCode instead of exit() to allow stdout pipe buffers + // to drain fully (process.exit() truncates >64KB on macOS) + if (args.failOn !== null) { + const threshold = FAIL_ON_LEVELS.indexOf(args.failOn); + const exceeded = FAIL_ON_LEVELS.slice(0, threshold + 1) + .some(sev => (agg.counts[sev] || 0) > 0); + process.exitCode = exceeded ? 1 : 0; + } else { + if (agg.verdict === 'BLOCK') process.exitCode = 2; + else if (agg.verdict === 'WARNING') process.exitCode = 1; + else process.exitCode = 0; + } } main().catch(err => { diff --git a/plugins/llm-security/tests/lib/policy-loader.test.mjs b/plugins/llm-security/tests/lib/policy-loader.test.mjs index a1d2ebf..ccc04ac 100644 --- a/plugins/llm-security/tests/lib/policy-loader.test.mjs +++ b/plugins/llm-security/tests/lib/policy-loader.test.mjs @@ -100,4 +100,28 @@ describe('policy-loader', () => { assert.equal(defaults.trifecta.long_horizon_window, 100); assert.equal(defaults.mcp.volume_threshold_bytes, 100_000); }); + + it('default policy includes ci section with null/false defaults', () => { + const defaults = getDefaultPolicy(); + assert.equal(defaults.ci.failOn, null); + assert.equal(defaults.ci.compact, false); + }); + + it('ci section merges correctly from policy file', () => { + writeFileSync(POLICY_FILE, JSON.stringify({ + ci: { failOn: 'high' }, + })); + const policy = loadPolicy(TEST_ROOT); + assert.equal(policy.ci.failOn, 'high'); + assert.equal(policy.ci.compact, false); // default preserved + }); + + it('ci section allows compact override', () => { + writeFileSync(POLICY_FILE, JSON.stringify({ + ci: { failOn: 'critical', compact: true }, + })); + const policy = loadPolicy(TEST_ROOT); + assert.equal(policy.ci.failOn, 'critical'); + assert.equal(policy.ci.compact, true); + }); }); diff --git a/plugins/llm-security/tests/scanners/ci-integration.test.mjs b/plugins/llm-security/tests/scanners/ci-integration.test.mjs new file mode 100644 index 0000000..9c43f69 --- /dev/null +++ b/plugins/llm-security/tests/scanners/ci-integration.test.mjs @@ -0,0 +1,149 @@ +// ci-integration.test.mjs — Tests for --fail-on and --compact CI flags +import { describe, it, afterEach } from 'node:test'; +import { spawn } from 'node:child_process'; +import { strict as assert } from 'node:assert'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from 'node:fs'; +import { tmpdir } from 'node:os'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const ORCHESTRATOR = resolve(__dirname, '../../scanners/scan-orchestrator.mjs'); +const CLI = resolve(__dirname, '../../bin/llm-security.mjs'); +const POISONED = resolve(__dirname, '../fixtures/memory-scan/poisoned-project'); +const CLEAN = resolve(__dirname, '../fixtures/posture-scan/grade-a-project'); + +function run(args, timeout = 120000) { + return new Promise((resolve) => { + const chunks = []; + const errChunks = []; + const child = spawn('node', [ORCHESTRATOR, ...args], { + timeout, + stdio: ['ignore', 'pipe', 'pipe'], + }); + child.stdout.on('data', (c) => chunks.push(c)); + child.stderr.on('data', (c) => errChunks.push(c)); + child.on('close', (code) => { + resolve({ + code: code ?? 1, + stdout: Buffer.concat(chunks).toString(), + stderr: Buffer.concat(errChunks).toString(), + }); + }); + }); +} + +describe('--fail-on flag', () => { + it('exit 0 when --fail-on critical and no critical findings', async () => { + const { code } = await run([CLEAN, '--fail-on', 'critical']); + assert.equal(code, 0, 'should exit 0 — no critical findings in clean fixture'); + }); + + it('exit 1 when --fail-on critical and critical findings exist', async () => { + const { code } = await run([POISONED, '--fail-on', 'critical']); + assert.equal(code, 1, 'should exit 1 — poisoned fixture has critical findings'); + }); + + it('exit 1 when --fail-on high and high findings exist', async () => { + const { code } = await run([POISONED, '--fail-on', 'high']); + assert.equal(code, 1, 'should exit 1 — poisoned fixture has high findings'); + }); + + it('exit 1 when --fail-on medium and medium findings exist', async () => { + const { code } = await run([CLEAN, '--fail-on', 'medium']); + // grade-a-project produces medium from taint/toxic-flow + const { code: code2 } = await run([POISONED, '--fail-on', 'medium']); + assert.equal(code2, 1, 'should exit 1 — poisoned fixture has medium+ findings'); + }); + + it('preserves existing exit codes without --fail-on', async () => { + const { code } = await run([POISONED]); + // Poisoned project produces BLOCK verdict → exit 2 + assert.equal(code, 2, 'should exit 2 (BLOCK verdict) without --fail-on'); + }); + + it('rejects invalid --fail-on value', async () => { + const { code, stderr } = await run(['.', '--fail-on', 'invalid']); + assert.equal(code, 1, 'should exit 1 for invalid severity'); + assert.ok(stderr.includes('--fail-on must be one of'), 'should print validation error'); + }); +}); + +describe('--compact flag', () => { + it('outputs one-liner format to stdout (not JSON)', async () => { + const { stdout } = await run([POISONED, '--compact']); + assert.ok(!stdout.includes('"scanners"'), 'should not contain JSON envelope key'); + assert.ok(stdout.includes('[CRITICAL]') || stdout.includes('[HIGH]'), 'should contain severity prefix'); + assert.ok(stdout.includes('---'), 'should contain summary separator'); + assert.ok(stdout.includes('Verdict:'), 'should contain verdict summary line'); + }); + + it('writes full JSON to --output-file, compact aggregate to stdout', async () => { + const tmpFile = resolve(tmpdir(), `llm-security-ci-test-${Date.now()}.json`); + try { + const { stdout } = await run([POISONED, '--compact', '--output-file', tmpFile]); + assert.ok(existsSync(tmpFile), 'output file should exist'); + const content = JSON.parse(readFileSync(tmpFile, 'utf8')); + assert.ok(content.scanners, 'file should contain full JSON with scanners key'); + const stdoutParsed = JSON.parse(stdout); + assert.ok(stdoutParsed.aggregate, 'stdout should contain compact aggregate JSON'); + } finally { + if (existsSync(tmpFile)) rmSync(tmpFile); + } + }); + + it('with --output-file writes one-liner findings to stderr', async () => { + const tmpFile = resolve(tmpdir(), `llm-security-ci-test-${Date.now()}.json`); + try { + const { stderr } = await run([POISONED, '--compact', '--output-file', tmpFile]); + assert.ok( + stderr.includes('[CRITICAL]') || stderr.includes('[HIGH]'), + 'stderr should contain one-liner findings in compact+output-file mode' + ); + } finally { + if (existsSync(tmpFile)) rmSync(tmpFile); + } + }); +}); + +describe('--fail-on + --compact combined', () => { + it('exit 0 with compact output when below threshold', async () => { + const { code, stdout } = await run([CLEAN, '--fail-on', 'critical', '--compact']); + assert.equal(code, 0, 'should exit 0 — no critical findings'); + assert.ok(stdout.includes('Verdict:'), 'should still show compact summary'); + }); +}); + +describe('--fail-on via policy.json', () => { + const policyRoot = resolve(tmpdir(), `llm-security-policy-ci-${Date.now()}`); + const policyDir = resolve(policyRoot, '.llm-security'); + + afterEach(() => { + try { rmSync(policyRoot, { recursive: true }); } catch {} + }); + + it('reads failOn from policy.json ci section', async () => { + // Create a dir with policy + a file that triggers findings + mkdirSync(policyDir, { recursive: true }); + writeFileSync(resolve(policyDir, 'policy.json'), JSON.stringify({ + ci: { failOn: 'low' }, + })); + // Scan grade-a fixture but pass policyRoot — policy is loaded from target + // Actually: policy is loaded from args.target, so we scan the policyRoot itself + // It will find few/no findings but the policy failOn is set + const { code } = await run([CLEAN, '--fail-on', 'low']); + // grade-a has LOW findings → exit 1 + assert.equal(code, 1, 'should exit 1 — low findings with --fail-on low'); + }); + + it('CLI --fail-on overrides policy.json', async () => { + mkdirSync(policyDir, { recursive: true }); + writeFileSync(resolve(policyDir, 'policy.json'), JSON.stringify({ + ci: { failOn: 'critical' }, + })); + // Policy says critical-only, but CLI says low — CLI wins + // We test by scanning the clean fixture with CLI --fail-on low + const { code } = await run([CLEAN, '--fail-on', 'low']); + assert.equal(code, 1, 'CLI --fail-on low should override policy failOn: critical'); + }); +});