ktg-plugin-marketplace/plugins/llm-security/examples/malicious-skill-demo/run-demo.sh

119 lines
3.4 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Malicious Skill Demo — Regression test for the deep-scan pipeline
# NOTE: Unix/macOS only. Requires bash. Not available on Windows without WSL.
#
# Usage:
# cd plugins/llm-security
# ./examples/malicious-skill-demo/run-demo.sh
#
# Expected: BLOCK verdict, 40+ findings across 7 scanners, exit code 2
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
TARGET="$SCRIPT_DIR/evil-project-health"
OUTPUT_FILE="$(mktemp)"
PASS=0
FAIL=0
cleanup() { rm -f "$OUTPUT_FILE"; }
trap cleanup EXIT
assert() {
local desc="$1" result="$2"
if [ "$result" -eq 0 ]; then
echo " PASS: $desc"
PASS=$((PASS + 1))
else
echo " FAIL: $desc"
FAIL=$((FAIL + 1))
fi
}
echo "=== LLM Security Deep-Scan Regression Test ==="
echo ""
echo "Target: $TARGET"
echo "Scanners: $PLUGIN_DIR/scanners/scan-orchestrator.mjs"
echo ""
if [ ! -d "$TARGET" ]; then
echo "ERROR: Target directory not found: $TARGET"
exit 1
fi
if [ ! -f "$PLUGIN_DIR/scanners/scan-orchestrator.mjs" ]; then
echo "ERROR: Scan orchestrator not found. Run from the llm-security plugin root."
exit 1
fi
echo "Running 7 deterministic scanners..."
echo ""
# Run scan, capture exit code
node "$PLUGIN_DIR/scanners/scan-orchestrator.mjs" "$TARGET" --output-file "$OUTPUT_FILE" 2>/dev/null
SCAN_EXIT=$?
echo ""
echo "--- Assertions ---"
# 1. Exit code should be 2 (BLOCK)
[ "$SCAN_EXIT" -eq 2 ]
assert "Exit code is 2 (BLOCK)" $?
# 2. Output file exists and has content
[ -s "$OUTPUT_FILE" ]
assert "Output file has content" $?
# 3. Parse JSON and check verdict
VERDICT=$(node -e "const d=JSON.parse(require('fs').readFileSync('$OUTPUT_FILE','utf-8')); console.log(d.aggregate.verdict)" 2>/dev/null)
[ "$VERDICT" = "BLOCK" ]
assert "Verdict is BLOCK" $?
# 4. Total findings >= 40
TOTAL=$(node -e "const d=JSON.parse(require('fs').readFileSync('$OUTPUT_FILE','utf-8')); console.log(d.aggregate.total_findings)" 2>/dev/null)
[ "$TOTAL" -ge 40 ]
assert "Total findings >= 40 (got: $TOTAL)" $?
# 5. Risk score >= 90
SCORE=$(node -e "const d=JSON.parse(require('fs').readFileSync('$OUTPUT_FILE','utf-8')); console.log(d.aggregate.risk_score)" 2>/dev/null)
[ "$SCORE" -ge 90 ]
assert "Risk score >= 90 (got: $SCORE)" $?
# 6. All 7 scanner prefixes present
for PREFIX in UNI ENT PRM DEP TNT GIT NET; do
HAS=$(node -e "
const d=JSON.parse(require('fs').readFileSync('$OUTPUT_FILE','utf-8'));
const has = Object.values(d.scanners).some(s => s.scanner && s.scanner.toLowerCase().includes('${PREFIX}'.toLowerCase()));
if (!has) {
// Check findings for the prefix
const inFindings = Object.values(d.scanners).some(s => s.findings && s.findings.some(f => f.id && f.id.startsWith('DS-${PREFIX}-')));
console.log(inFindings);
} else {
console.log(has);
}
" 2>/dev/null)
[ "$HAS" = "true" ]
assert "Scanner $PREFIX present in output" $?
done
# 7. At least one CRITICAL finding
CRITS=$(node -e "const d=JSON.parse(require('fs').readFileSync('$OUTPUT_FILE','utf-8')); console.log(d.aggregate.counts.critical)" 2>/dev/null)
[ "$CRITS" -ge 1 ]
assert "At least 1 CRITICAL finding (got: $CRITS)" $?
echo ""
echo "--- Results ---"
echo " Passed: $PASS"
echo " Failed: $FAIL"
echo " Total: $((PASS + FAIL))"
echo ""
if [ "$FAIL" -eq 0 ]; then
echo "=== ALL ASSERTIONS PASSED ==="
exit 0
else
echo "=== $FAIL ASSERTION(S) FAILED ==="
exit 1
fi