mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-08 18:39:50 +00:00
feat: track consecutive fixture recording failures and auto-escalate (#986)
When a cloud's fixture recording fails 3+ consecutive QA cycles, the system now auto-creates a GitHub issue flagging the persistent failure. This catches stale API keys, changed endpoints, and other silent regressions that would otherwise go unnoticed. - Persistent tracker at .docs/qa-record-failures.json (git-ignored) - Counter increments on failure, resets on success - Deduplicates: skips issue creation if one already exists for that cloud Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
353f20d53a
commit
4e3f77f9bb
1 changed files with 105 additions and 1 deletions
|
|
@ -35,7 +35,7 @@ cleanup() {
|
|||
cd "${REPO_ROOT}" 2>/dev/null || true
|
||||
git worktree prune 2>/dev/null || true
|
||||
rm -rf "${WORKTREE_BASE}" 2>/dev/null || true
|
||||
rm -f "${RESULTS_PHASE2}" "${RESULTS_PHASE4}" "/tmp/spawn-qa-record-output.txt" 2>/dev/null || true
|
||||
rm -f "${RESULTS_PHASE2}" "${RESULTS_PHASE4}" "/tmp/spawn-qa-record-output.txt" "/tmp/spawn-qa-escalate.txt" 2>/dev/null || true
|
||||
log "=== QA Cycle Done (exit_code=${exit_code}) ==="
|
||||
exit $exit_code
|
||||
}
|
||||
|
|
@ -222,8 +222,14 @@ check_timeout || exit 0
|
|||
log "=== Phase 1: Record fixtures ==="
|
||||
|
||||
RECORD_OUTPUT="/tmp/spawn-qa-record-output.txt"
|
||||
RECORD_FAILURES_FILE="${REPO_ROOT}/.docs/qa-record-failures.json"
|
||||
rm -f "${RECORD_OUTPUT}"
|
||||
|
||||
# Initialize persistent failure tracker if missing
|
||||
if [[ ! -f "${RECORD_FAILURES_FILE}" ]]; then
|
||||
printf '{}' > "${RECORD_FAILURES_FILE}"
|
||||
fi
|
||||
|
||||
RECORD_EXIT=0
|
||||
bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" | tee "${RECORD_OUTPUT}" || RECORD_EXIT=$?
|
||||
|
||||
|
|
@ -402,6 +408,104 @@ Only modify ${cloud}/lib/common.sh and test/record.sh if the recording infrastru
|
|||
fi
|
||||
fi
|
||||
|
||||
# --- Track consecutive Phase 1 failures per cloud ---
|
||||
# Parse the final recording output to determine which clouds failed vs succeeded
|
||||
FINAL_RECORD_FAILED=""
|
||||
FINAL_RECORD_SUCCEEDED=""
|
||||
if [[ -f "${RECORD_OUTPUT}" ]]; then
|
||||
_current_cloud=""
|
||||
_cloud_had_error=""
|
||||
while IFS= read -r line; do
|
||||
clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g')
|
||||
case "$clean" in
|
||||
*"Recording "*" ━━━"*)
|
||||
# Save previous cloud result
|
||||
if [[ -n "${_current_cloud}" ]]; then
|
||||
if [[ "${_cloud_had_error}" == "true" ]]; then
|
||||
FINAL_RECORD_FAILED="${FINAL_RECORD_FAILED} ${_current_cloud}"
|
||||
else
|
||||
FINAL_RECORD_SUCCEEDED="${FINAL_RECORD_SUCCEEDED} ${_current_cloud}"
|
||||
fi
|
||||
fi
|
||||
_current_cloud=$(printf '%s' "$clean" | sed 's/.*Recording //; s/ ━━━.*//')
|
||||
_cloud_had_error=""
|
||||
;;
|
||||
*"fail "*)
|
||||
_cloud_had_error="true"
|
||||
;;
|
||||
esac
|
||||
done < "${RECORD_OUTPUT}"
|
||||
# Handle last cloud
|
||||
if [[ -n "${_current_cloud}" ]]; then
|
||||
if [[ "${_cloud_had_error}" == "true" ]]; then
|
||||
FINAL_RECORD_FAILED="${FINAL_RECORD_FAILED} ${_current_cloud}"
|
||||
else
|
||||
FINAL_RECORD_SUCCEEDED="${FINAL_RECORD_SUCCEEDED} ${_current_cloud}"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
FINAL_RECORD_FAILED=$(printf '%s' "${FINAL_RECORD_FAILED}" | sed 's/^ //')
|
||||
FINAL_RECORD_SUCCEEDED=$(printf '%s' "${FINAL_RECORD_SUCCEEDED}" | sed 's/^ //')
|
||||
|
||||
# Update the persistent failure tracker and escalate if threshold hit
|
||||
if [[ -f "${RECORD_FAILURES_FILE}" ]]; then
|
||||
python3 -c "
|
||||
import json, sys
|
||||
|
||||
tracker_path = sys.argv[1]
|
||||
failed = sys.argv[2].split() if sys.argv[2] else []
|
||||
succeeded = sys.argv[3].split() if sys.argv[3] else []
|
||||
|
||||
try:
|
||||
with open(tracker_path) as f:
|
||||
tracker = json.load(f)
|
||||
except (json.JSONDecodeError, FileNotFoundError):
|
||||
tracker = {}
|
||||
|
||||
# Increment consecutive failures for failed clouds
|
||||
for cloud in failed:
|
||||
tracker[cloud] = tracker.get(cloud, 0) + 1
|
||||
|
||||
# Reset counter for clouds that succeeded
|
||||
for cloud in succeeded:
|
||||
tracker[cloud] = 0
|
||||
|
||||
with open(tracker_path, 'w') as f:
|
||||
json.dump(tracker, f, indent=2, sort_keys=True)
|
||||
|
||||
# Output clouds that hit the threshold (3+ consecutive failures)
|
||||
escalate = [c for c, count in tracker.items() if count >= 3]
|
||||
if escalate:
|
||||
print(' '.join(escalate))
|
||||
" "${RECORD_FAILURES_FILE}" "${FINAL_RECORD_FAILED}" "${FINAL_RECORD_SUCCEEDED}" > /tmp/spawn-qa-escalate.txt 2>/dev/null || true
|
||||
|
||||
ESCALATE_CLOUDS=$(cat /tmp/spawn-qa-escalate.txt 2>/dev/null || true)
|
||||
rm -f /tmp/spawn-qa-escalate.txt
|
||||
|
||||
if [[ -n "${ESCALATE_CLOUDS}" ]]; then
|
||||
for cloud in ${ESCALATE_CLOUDS}; do
|
||||
consecutive=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1])).get(sys.argv[2], 0))" "${RECORD_FAILURES_FILE}" "${cloud}" 2>/dev/null || printf "3+")
|
||||
log "Phase 1: ESCALATION — ${cloud} has failed ${consecutive} consecutive cycles"
|
||||
|
||||
# Check if an issue already exists for this cloud
|
||||
existing_issue=$(gh issue list --repo OpenRouterTeam/spawn --state open \
|
||||
--search "fixture recording failing ${cloud}" \
|
||||
--json number --jq '.[0].number' 2>/dev/null) || existing_issue=""
|
||||
|
||||
if [[ -z "${existing_issue}" ]]; then
|
||||
gh issue create --repo OpenRouterTeam/spawn \
|
||||
--title "QA: ${cloud} fixture recording failing for ${consecutive} consecutive cycles" \
|
||||
--body "$(printf 'The automated QA cycle has detected that fixture recording for **%s** has failed for **%s consecutive cycles**.\n\nThis likely indicates a persistent issue with the cloud provider'\''s API or our integration.\n\n## What to check\n- Has the %s API changed? (new auth requirements, endpoint changes, rate limits)\n- Are the API credentials still valid?\n- Check `%s/lib/common.sh` for outdated API calls\n- Run `bash test/record.sh %s` locally to reproduce\n\n## Auto-generated\nThis issue was created automatically by the QA cycle (`qa-cycle.sh`).\n\n-- qa/cycle' "${cloud}" "${consecutive}" "${cloud}" "${cloud}" "${cloud}")" \
|
||||
--label "bug" \
|
||||
2>&1 | tee -a "${LOG_FILE}" || true
|
||||
log "Phase 1: Created GitHub issue for ${cloud} persistent failure"
|
||||
else
|
||||
log "Phase 1: Issue #${existing_issue} already open for ${cloud}, skipping duplicate"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f "${RECORD_OUTPUT}"
|
||||
check_timeout || exit 0
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue