feat(templates): add feedback loop and performance scoring templates
Session 5 step 20 — templates for recurring feedback patterns with VFM-compatible scoring. Adds FEEDBACK.md append-only log, PostToolUse hook that detects 3+ recurring pattern tags, and per-agent scoring that tracks trends against prior window. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2451dd9dfd
commit
d743ec7fbf
4 changed files with 404 additions and 0 deletions
120
scripts/templates/feedback/feedback-collector.sh
Normal file
120
scripts/templates/feedback/feedback-collector.sh
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
#!/bin/bash
|
||||
# PostToolUse hook: Collect feedback after pipeline completion.
|
||||
# Bash 3.2 compatible. Uses python3 for JSON parsing and CSV/MD append.
|
||||
#
|
||||
# Triggered after a designated "review" tool call completes.
|
||||
# Reads pipeline output and reviewer score, appends to FEEDBACK.md,
|
||||
# and detects recurring patterns (3+ rows with same tag = recurring).
|
||||
#
|
||||
# Placeholders:
|
||||
# {{WORKING_DIR}} - absolute path to project directory
|
||||
# {{PIPELINE_NAME}} - name of the pipeline being tracked
|
||||
# {{SCORE_THRESHOLD}} - minimum acceptable score (default: 60)
|
||||
|
||||
WORKING_DIR="{{WORKING_DIR}}"
|
||||
PIPELINE_NAME="{{PIPELINE_NAME}}"
|
||||
SCORE_THRESHOLD="${SCORE_THRESHOLD:-60}"
|
||||
FEEDBACK_FILE="$WORKING_DIR/FEEDBACK.md"
|
||||
HOOK_INPUT=$(cat)
|
||||
|
||||
# Only act on review tool calls
|
||||
TOOL_NAME=$(echo "$HOOK_INPUT" | python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
data = json.load(sys.stdin)
|
||||
print(data.get('tool_name', ''))
|
||||
except:
|
||||
print('')
|
||||
" 2>/dev/null)
|
||||
|
||||
if [ "$TOOL_NAME" != "review_pipeline" ] && [ "$TOOL_NAME" != "score_output" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Extract score, agent, issue, resolution, pattern from hook input
|
||||
python3 << PYEOF
|
||||
import sys, json, re, os
|
||||
from datetime import datetime
|
||||
|
||||
hook_input = """$HOOK_INPUT"""
|
||||
feedback_file = "$FEEDBACK_FILE"
|
||||
pipeline_name = "$PIPELINE_NAME"
|
||||
score_threshold = int("$SCORE_THRESHOLD")
|
||||
|
||||
try:
|
||||
data = json.loads(hook_input)
|
||||
except Exception:
|
||||
sys.exit(0)
|
||||
|
||||
tool_result = data.get('tool_result', '')
|
||||
if isinstance(tool_result, dict):
|
||||
tool_result = json.dumps(tool_result)
|
||||
|
||||
# Parse structured fields from tool result (expects JSON or key:value)
|
||||
agent_name = os.environ.get('AGENT_NAME', 'unknown')
|
||||
score = 0
|
||||
issue = ''
|
||||
resolution = ''
|
||||
pattern = ''
|
||||
|
||||
try:
|
||||
result_data = json.loads(tool_result)
|
||||
agent_name = result_data.get('agent', agent_name)
|
||||
score = int(result_data.get('score', 0))
|
||||
issue = result_data.get('issue', '')
|
||||
resolution = result_data.get('resolution', '')
|
||||
pattern = result_data.get('pattern', '')
|
||||
except Exception:
|
||||
# Fallback: look for score: N in plain text
|
||||
m = re.search(r'score[:\s]+(\d+)', tool_result, re.IGNORECASE)
|
||||
if m:
|
||||
score = int(m.group(1))
|
||||
m = re.search(r'pattern[:\s]+(\S+)', tool_result, re.IGNORECASE)
|
||||
if m:
|
||||
pattern = m.group(1)
|
||||
|
||||
if score == 0 and not issue:
|
||||
sys.exit(0)
|
||||
|
||||
date_str = datetime.utcnow().strftime('%Y-%m-%d')
|
||||
row = f"| {date_str} | {pipeline_name} | {agent_name} | {score}/100 | {issue} | {resolution} | {pattern} |"
|
||||
|
||||
# Append to feedback table
|
||||
if not os.path.exists(feedback_file):
|
||||
print(f"Warning: {feedback_file} not found -- skipping feedback append")
|
||||
sys.exit(0)
|
||||
|
||||
with open(feedback_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Insert row after the header row of the table
|
||||
table_header = '| Date | Pipeline | Agent | Score | Issue | Resolution | Pattern |'
|
||||
separator = '|------|----------|-------|-------|-------|------------|---------|'
|
||||
placeholder_row = '| {{DATE}} | {{PIPELINE_NAME}} | {{AGENT_NAME}} | {{SCORE}}/100 | {{ISSUE_DESCRIPTION}} | {{RESOLUTION}} | {{PATTERN_TAG}} |'
|
||||
|
||||
if placeholder_row in content:
|
||||
# Replace placeholder with real row + keep placeholder for next time
|
||||
content = content.replace(placeholder_row, row + '\n' + placeholder_row)
|
||||
elif separator in content:
|
||||
content = content.replace(separator, separator + '\n' + row)
|
||||
else:
|
||||
content += '\n' + row + '\n'
|
||||
|
||||
with open(feedback_file, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"Feedback recorded: score={score}, pattern={pattern}")
|
||||
|
||||
# Detect recurring patterns
|
||||
if pattern:
|
||||
pattern_count = content.count(f'| {pattern} |')
|
||||
if pattern_count >= 3:
|
||||
print(f"RECURRING PATTERN DETECTED: '{pattern}' appears {pattern_count} times")
|
||||
print(f"Action required: review prompt or pipeline for '{pipeline_name}'")
|
||||
|
||||
# Flag low scores
|
||||
if score < score_threshold and score > 0:
|
||||
print(f"LOW SCORE: {score} < threshold {score_threshold} for agent {agent_name}")
|
||||
PYEOF
|
||||
|
||||
exit 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue