diff --git a/scripts/templates/budget/BUDGET.md b/scripts/templates/budget/BUDGET.md new file mode 100644 index 0000000..f872efd --- /dev/null +++ b/scripts/templates/budget/BUDGET.md @@ -0,0 +1,26 @@ +# Budget Policy: {{PROJECT_NAME}} + +## Company Budget +- window: {{BUDGET_WINDOW}} +- limit: {{BUDGET_LIMIT_CENTS}} cents +- warn_percent: 80 +- hard_stop: true + +## Agent Budgets +- {{AGENT_NAME}}: {{AGENT_BUDGET_CENTS}} cents/{{BUDGET_WINDOW}} + +## Notification +- on_warn: log +- on_hard_stop: pause + +## Notes + +Budget enforcement is POST-HOC (checked after each run, not before). +This matches Paperclip's proven approach: check SUM(cost) after run, +pause if exceeded. No pre-run reservation needed. + +Cost estimation uses token counts × published pricing. For accurate +cost data, organizations can use the Admin API: +`/v1/organizations/cost_report` (requires Admin API key: sk-ant-admin...). + +For headless runs, use `claude -p --max-budget-usd N` as a per-run cap. diff --git a/scripts/templates/budget/README.md b/scripts/templates/budget/README.md new file mode 100644 index 0000000..1cd0dc0 --- /dev/null +++ b/scripts/templates/budget/README.md @@ -0,0 +1,46 @@ +# Budget Tracking + +Post-hoc budget enforcement inspired by Paperclip's budget system. + +## How it works + +1. `budget-hook.sh` runs as a PostToolUse hook after every tool call +2. Each call is logged to `budget/cost-events.jsonl` +3. After logging, cumulative cost is compared against `BUDGET.md` policy +4. If soft threshold (default 80%) exceeded: warning to stderr +5. If hard threshold (100%) exceeded and hard_stop=true: creates `budget/PAUSED` + flag file, subsequent tool calls are blocked (exit 2) + +## Why post-hoc, not pre-run? + +Paperclip uses the same approach. Pre-run budget reservation requires a +persistent service or lock file coordination. Post-hoc checking is simpler +and robust enough in practice — the worst case is one extra run before pause. + +## Cost estimation + +The current implementation counts events as a rough proxy for cost. For +accurate cost tracking, you have two options: + +1. **Admin API** (org accounts only): Query `/v1/organizations/cost_report` + with an Admin API key (`sk-ant-admin...`). This gives actual USD costs. +2. **Token estimation**: Parse token counts from Claude's responses and + multiply by published per-token prices. More accurate than event counting + but still an estimate. + +For headless runs, `claude -p --max-budget-usd N` provides a per-run +budget cap directly in the CLI. + +## Integration + +Add to `.claude/settings.json`: +```json +{ + "hooks": { + "PostToolUse": [{ + "matcher": "*", + "hooks": [{"type": "command", "command": "bash budget/budget-hook.sh"}] + }] + } +} +``` diff --git a/scripts/templates/budget/budget-hook.sh b/scripts/templates/budget/budget-hook.sh new file mode 100644 index 0000000..d548cf1 --- /dev/null +++ b/scripts/templates/budget/budget-hook.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# PostToolUse hook: Log cost events and enforce budget. +# Bash 3.2 compatible. Uses python3 for JSON parsing. +# +# Follows Paperclip's post-hoc enforcement pattern: +# 1. Log cost event after each tool call +# 2. Check cumulative cost against budget policy +# 3. Warn at soft threshold, pause at hard threshold +# +# Placeholders: +# {{WORKING_DIR}} - absolute path to project directory + +WORKING_DIR="{{WORKING_DIR}}" +BUDGET_DIR="$WORKING_DIR/budget" +COST_LOG="$BUDGET_DIR/cost-events.jsonl" +BUDGET_FILE="$WORKING_DIR/BUDGET.md" +PAUSED_FLAG="$BUDGET_DIR/PAUSED" + +mkdir -p "$BUDGET_DIR" + +# Read hook input +INPUT=$(cat) +TOOL_NAME=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('tool_name',''))" 2>/dev/null) + +# Log cost event +python3 -c " +import json, sys, time, os + +try: + data = json.loads('''$INPUT''') +except: + sys.exit(0) + +tool_name = data.get('tool_name', '') +event = { + 'timestamp': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), + 'tool_name': tool_name, + 'agent': os.environ.get('AGENT_NAME', 'unknown'), + 'estimated_tokens': 0 +} + +cost_log = '$COST_LOG' +with open(cost_log, 'a') as f: + f.write(json.dumps(event) + '\n') +" 2>/dev/null + +# Check budget if BUDGET.md exists +if [ -f "$BUDGET_FILE" ] && [ -f "$COST_LOG" ]; then + BUDGET_RESULT=$(BUDGET_FILE="$BUDGET_FILE" COST_LOG="$COST_LOG" PAUSED_FLAG="$PAUSED_FLAG" python3 -c " +import re, json, os +budget_file = os.environ.get('BUDGET_FILE', '') +cost_log = os.environ.get('COST_LOG', '') +paused_flag = os.environ.get('PAUSED_FLAG', '') +try: + content = open(budget_file).read() + limit_m = re.search(r'limit:\s*(\d+)\s*cents', content) + if not limit_m: print('ok'); exit(0) + limit = int(limit_m.group(1)) + warn_m = re.search(r'warn_percent:\s*(\d+)', content) + warn_pct = int(warn_m.group(1)) if warn_m else 80 + hard_m = re.search(r'hard_stop:\s*(\w+)', content) + hard_stop = hard_m.group(1).lower() == 'true' if hard_m else True + event_count = sum(1 for _ in open(cost_log)) + estimated_cents = event_count + pct = (estimated_cents / limit * 100) if limit > 0 else 0 + if pct >= 100 and hard_stop: + open(paused_flag, 'w').write('Budget exceeded: ' + str(estimated_cents) + '/' + str(limit) + ' cents') + print('hard_stop') + elif pct >= warn_pct: + print('warn') + else: + print('ok') +except Exception as e: + print('ok') +" 2>/dev/null) + + if [ "$BUDGET_RESULT" = "hard_stop" ]; then + echo "BUDGET EXCEEDED — agent paused. Check $PAUSED_FLAG" >&2 + elif [ "$BUDGET_RESULT" = "warn" ]; then + echo "BUDGET WARNING — approaching limit" >&2 + fi +fi + +# Check if agent is paused +if [ -f "$PAUSED_FLAG" ]; then + echo '{"decision": "block", "reason": "Agent paused: budget exceeded. Remove '"$PAUSED_FLAG"' to resume."}' + exit 2 +fi + +exit 0 diff --git a/scripts/templates/budget/budget-report.sh b/scripts/templates/budget/budget-report.sh new file mode 100644 index 0000000..4113eca --- /dev/null +++ b/scripts/templates/budget/budget-report.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Budget report: summarize cost events and compare against policy. +# Bash 3.2 compatible. Uses python3 for aggregation. +# +# Usage: ./budget-report.sh +# +# Placeholders: +# {{WORKING_DIR}} - absolute path to project directory + +WORKING_DIR="{{WORKING_DIR}}" +COST_LOG="$WORKING_DIR/budget/cost-events.jsonl" +BUDGET_FILE="$WORKING_DIR/BUDGET.md" +PAUSED_FLAG="$WORKING_DIR/budget/PAUSED" + +if [ ! -f "$COST_LOG" ]; then + echo "No cost events recorded yet." + exit 0 +fi + +COST_LOG="$COST_LOG" BUDGET_FILE="$BUDGET_FILE" PAUSED_FLAG="$PAUSED_FLAG" python3 -c " +import json, re, os +from collections import defaultdict + +cost_log = os.environ.get('COST_LOG', '') +budget_file = os.environ.get('BUDGET_FILE', '') +paused_flag = os.environ.get('PAUSED_FLAG', '') + +# Read events +events = [] +with open(cost_log) as f: + for line in f: + line = line.strip() + if line: + try: + events.append(json.loads(line)) + except: + pass + +# Aggregate +by_agent = defaultdict(int) +by_day = defaultdict(int) +by_tool = defaultdict(int) + +for e in events: + agent = e.get('agent', 'unknown') + day = e.get('timestamp', '')[:10] + tool = e.get('tool_name', 'unknown') + by_agent[agent] += 1 + by_day[day] += 1 + by_tool[tool] += 1 + +print('BUDGET REPORT') +print('=' * 50) +print('Total events: ' + str(len(events))) +print() + +# Per-agent breakdown +print('By Agent:') +for agent, count in sorted(by_agent.items(), key=lambda x: -x[1]): + print(' ' + agent + ': ' + str(count) + ' events') +print() + +# Per-day breakdown (last 7 days) +print('By Day (last 7):') +for day, count in sorted(by_day.items())[-7:]: + print(' ' + day + ': ' + str(count) + ' events') +print() + +# Budget comparison +if os.path.exists(budget_file): + content = open(budget_file).read() + limit_m = re.search(r'limit:\s*(\d+)\s*cents', content) + if limit_m: + limit = int(limit_m.group(1)) + est_cents = len(events) # rough proxy + pct = (est_cents / limit * 100) if limit > 0 else 0 + print('Budget: ~' + str(est_cents) + '/' + str(limit) + ' cents (' + str(round(pct)) + '%)') + +# Paused status +if os.path.exists(paused_flag): + print('') + print('!! AGENT PAUSED: ' + open(paused_flag).read().strip()) + print(' Remove ' + paused_flag + ' to resume') +"