114 lines
3.9 KiB
Bash
Executable file
114 lines
3.9 KiB
Bash
Executable file
#!/bin/bash
|
||
# v5-runner.sh — Automated v5.0 Prompt Injection Hardening runner
|
||
# Runs all 8 sessions sequentially with fresh context between each.
|
||
# Stops on first failure (tests or commit).
|
||
#
|
||
# Usage: bash scripts/v5-runner.sh [start_session]
|
||
# start_session: S1-S8 (default: S1). Resume from a specific session.
|
||
#
|
||
# Requirements: claude CLI in PATH, Forgejo remote configured.
|
||
|
||
set -euo pipefail
|
||
|
||
# Force OAuth/subscription billing — unset API key so claude -p uses Max plan
|
||
unset ANTHROPIC_API_KEY
|
||
|
||
REPO="/Users/ktg/.claude/plugins/marketplaces/plugin-marketplace/plugins/llm-security"
|
||
PLAN="/Users/ktg/.claude/plans/ethereal-waddling-rainbow.md"
|
||
LOG="$REPO/scripts/v5-runner.log"
|
||
START="${1:-S1}"
|
||
STARTED=false
|
||
|
||
sessions=(S1 S2 S3 S4 S5 S6 S7 S8)
|
||
|
||
echo "=== v5.0 Runner started at $(date) ===" | tee "$LOG"
|
||
echo "Starting from: $START" | tee -a "$LOG"
|
||
|
||
for session in "${sessions[@]}"; do
|
||
# Skip until we reach the start session
|
||
if [ "$STARTED" = false ] && [ "$session" != "$START" ]; then
|
||
continue
|
||
fi
|
||
STARTED=true
|
||
|
||
echo "" | tee -a "$LOG"
|
||
echo "=== $session: Starting at $(date) ===" | tee -a "$LOG"
|
||
|
||
# Record pre-session state
|
||
cd "$REPO"
|
||
BEFORE_COMMIT=$(git rev-parse HEAD)
|
||
BEFORE_TESTS=$(node --test 2>&1 | grep "^ℹ tests" | awk '{print $3}')
|
||
|
||
# Run the session with full permissions
|
||
cd "$REPO"
|
||
claude -p "$(cat <<PROMPT
|
||
Working directory: $REPO
|
||
|
||
Read the v5.0 plan at $PLAN. Execute session $session completely.
|
||
|
||
Steps:
|
||
1. Read the plan's $session section carefully
|
||
2. Implement ALL code changes described there
|
||
3. Write ALL tests described there
|
||
4. Run the full test suite: node --test
|
||
5. Fix any failures until all tests pass (both new and existing 782+)
|
||
6. Update CLAUDE.md hook/test counts in the same commit
|
||
7. Commit: git add <specific files> && git commit -m "feat(llm-security): $session - <description>"
|
||
8. Push: git push origin main
|
||
|
||
CRITICAL RULES:
|
||
- Implement ONLY $session — do NOT start other sessions
|
||
- All 782+ existing tests MUST still pass
|
||
- Do NOT use subtree push (will be done at release)
|
||
- If pathguard blocks a write, write to a .tmp file and use: mv file.tmp file
|
||
- Report what you implemented and test results at the end
|
||
PROMPT
|
||
)" \
|
||
--dangerously-skip-permissions \
|
||
2>&1 | tee -a "$LOG"
|
||
|
||
CLAUDE_EXIT=$?
|
||
echo "claude -p exit code: $CLAUDE_EXIT" | tee -a "$LOG"
|
||
|
||
# Verify: tests pass
|
||
cd "$REPO"
|
||
echo "--- Verifying tests after $session ---" | tee -a "$LOG"
|
||
TEST_OUTPUT=$(node --test 2>&1)
|
||
TEST_RESULT=$?
|
||
AFTER_TESTS=$(echo "$TEST_OUTPUT" | grep "^ℹ tests" | awk '{print $3}')
|
||
FAILURES=$(echo "$TEST_OUTPUT" | grep "^ℹ fail" | awk '{print $3}')
|
||
|
||
if [ "$TEST_RESULT" -ne 0 ] || [ "$FAILURES" != "0" ]; then
|
||
echo "FAILED: $session — tests did not pass ($FAILURES failures)" | tee -a "$LOG"
|
||
echo "$TEST_OUTPUT" >> "$LOG"
|
||
exit 1
|
||
fi
|
||
|
||
# Verify: new commit exists with matching message
|
||
AFTER_COMMIT=$(git rev-parse HEAD)
|
||
COMMIT_MSG=$(git log --oneline -1)
|
||
if [ "$BEFORE_COMMIT" = "$AFTER_COMMIT" ]; then
|
||
echo "FAILED: $session — no commit was created" | tee -a "$LOG"
|
||
exit 1
|
||
fi
|
||
|
||
# Verify commit is actually for this session (not from another process)
|
||
if ! echo "$COMMIT_MSG" | grep -qi "llm-security"; then
|
||
echo "FAILED: $session — commit '$COMMIT_MSG' does not appear to be from this session" | tee -a "$LOG"
|
||
exit 1
|
||
fi
|
||
|
||
# Verify new tests were added
|
||
NEW_TESTS=$((AFTER_TESTS - BEFORE_TESTS))
|
||
if [ "$NEW_TESTS" -lt 5 ]; then
|
||
echo "WARNING: $session — only $NEW_TESTS new tests (expected 15+)" | tee -a "$LOG"
|
||
fi
|
||
|
||
echo "$session COMPLETE: $AFTER_TESTS tests (+$NEW_TESTS new), commit $COMMIT_MSG" | tee -a "$LOG"
|
||
echo "=== $session: Done at $(date) ===" | tee -a "$LOG"
|
||
done
|
||
|
||
echo "" | tee -a "$LOG"
|
||
echo "=== ALL SESSIONS COMPLETE at $(date) ===" | tee -a "$LOG"
|
||
echo "Final test count: $(node --test 2>&1 | grep '^ℹ tests' | awk '{print $3}')" | tee -a "$LOG"
|
||
echo "Final commit: $(git log --oneline -1)" | tee -a "$LOG"
|