feat: track consecutive fixture recording failures and auto-escalate (#986)

When a cloud's fixture recording fails 3+ consecutive QA cycles, the system now auto-creates a GitHub issue flagging the persistent failure. This catches stale API keys, changed endpoints, and other silent regressions that would otherwise go unnoticed. - Persistent tracker at .docs/qa-record-failures.json (git-ignored) - Counter increments on failure, resets on success - Deduplicates: skips issue creation if one already exists for that cloud Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-08 18:39:50 +00:00 · 2026-02-13 11:42:17 -08:00 · 2026-02-13 11:42:17 -08:00 · 4e3f77f9bb
commit 4e3f77f9bb
parent 353f20d53a
1 changed files with 105 additions and 1 deletions
--- a/.claude/skills/setup-agent-team/qa-cycle.sh
+++ b/.claude/skills/setup-agent-team/qa-cycle.sh
@ -35,7 +35,7 @@ cleanup() {
    cd "${REPO_ROOT}" 2>/dev/null || true
    git worktree prune 2>/dev/null || true
    rm -rf "${WORKTREE_BASE}" 2>/dev/null || true
-    rm -f "${RESULTS_PHASE2}" "${RESULTS_PHASE4}" "/tmp/spawn-qa-record-output.txt" 2>/dev/null || true
+    rm -f "${RESULTS_PHASE2}" "${RESULTS_PHASE4}" "/tmp/spawn-qa-record-output.txt" "/tmp/spawn-qa-escalate.txt" 2>/dev/null || true
    log "=== QA Cycle Done (exit_code=${exit_code}) ==="
    exit $exit_code
 }
@ -222,8 +222,14 @@ check_timeout || exit 0
 log "=== Phase 1: Record fixtures ==="

 RECORD_OUTPUT="/tmp/spawn-qa-record-output.txt"
+RECORD_FAILURES_FILE="${REPO_ROOT}/.docs/qa-record-failures.json"
 rm -f "${RECORD_OUTPUT}"

+# Initialize persistent failure tracker if missing
+if [[ ! -f "${RECORD_FAILURES_FILE}" ]]; then
+    printf '{}' > "${RECORD_FAILURES_FILE}"
+fi
+
 RECORD_EXIT=0
 bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" | tee "${RECORD_OUTPUT}" || RECORD_EXIT=$?

@ -402,6 +408,104 @@ Only modify ${cloud}/lib/common.sh and test/record.sh if the recording infrastru
    fi
 fi

+# --- Track consecutive Phase 1 failures per cloud ---
+# Parse the final recording output to determine which clouds failed vs succeeded
+FINAL_RECORD_FAILED=""
+FINAL_RECORD_SUCCEEDED=""
+if [[ -f "${RECORD_OUTPUT}" ]]; then
+    _current_cloud=""
+    _cloud_had_error=""
+    while IFS= read -r line; do
+        clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g')
+        case "$clean" in
+            *"Recording "*" ━━━"*)
+                # Save previous cloud result
+                if [[ -n "${_current_cloud}" ]]; then
+                    if [[ "${_cloud_had_error}" == "true" ]]; then
+                        FINAL_RECORD_FAILED="${FINAL_RECORD_FAILED} ${_current_cloud}"
+                    else
+                        FINAL_RECORD_SUCCEEDED="${FINAL_RECORD_SUCCEEDED} ${_current_cloud}"
+                    fi
+                fi
+                _current_cloud=$(printf '%s' "$clean" | sed 's/.*Recording //; s/ ━━━.*//')
+                _cloud_had_error=""
+                ;;
+            *"fail "*)
+                _cloud_had_error="true"
+                ;;
+        esac
+    done < "${RECORD_OUTPUT}"
+    # Handle last cloud
+    if [[ -n "${_current_cloud}" ]]; then
+        if [[ "${_cloud_had_error}" == "true" ]]; then
+            FINAL_RECORD_FAILED="${FINAL_RECORD_FAILED} ${_current_cloud}"
+        else
+            FINAL_RECORD_SUCCEEDED="${FINAL_RECORD_SUCCEEDED} ${_current_cloud}"
+        fi
+    fi
+fi
+FINAL_RECORD_FAILED=$(printf '%s' "${FINAL_RECORD_FAILED}" | sed 's/^ //')
+FINAL_RECORD_SUCCEEDED=$(printf '%s' "${FINAL_RECORD_SUCCEEDED}" | sed 's/^ //')
+
+# Update the persistent failure tracker and escalate if threshold hit
+if [[ -f "${RECORD_FAILURES_FILE}" ]]; then
+    python3 -c "
+import json, sys
+
+tracker_path = sys.argv[1]
+failed = sys.argv[2].split() if sys.argv[2] else []
+succeeded = sys.argv[3].split() if sys.argv[3] else []
+
+try:
+    with open(tracker_path) as f:
+        tracker = json.load(f)
+except (json.JSONDecodeError, FileNotFoundError):
+    tracker = {}
+
+# Increment consecutive failures for failed clouds
+for cloud in failed:
+    tracker[cloud] = tracker.get(cloud, 0) + 1
+
+# Reset counter for clouds that succeeded
+for cloud in succeeded:
+    tracker[cloud] = 0
+
+with open(tracker_path, 'w') as f:
+    json.dump(tracker, f, indent=2, sort_keys=True)
+
+# Output clouds that hit the threshold (3+ consecutive failures)
+escalate = [c for c, count in tracker.items() if count >= 3]
+if escalate:
+    print(' '.join(escalate))
+" "${RECORD_FAILURES_FILE}" "${FINAL_RECORD_FAILED}" "${FINAL_RECORD_SUCCEEDED}" > /tmp/spawn-qa-escalate.txt 2>/dev/null || true
+
+    ESCALATE_CLOUDS=$(cat /tmp/spawn-qa-escalate.txt 2>/dev/null || true)
+    rm -f /tmp/spawn-qa-escalate.txt
+
+    if [[ -n "${ESCALATE_CLOUDS}" ]]; then
+        for cloud in ${ESCALATE_CLOUDS}; do
+            consecutive=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1])).get(sys.argv[2], 0))" "${RECORD_FAILURES_FILE}" "${cloud}" 2>/dev/null || printf "3+")
+            log "Phase 1: ESCALATION — ${cloud} has failed ${consecutive} consecutive cycles"
+
+            # Check if an issue already exists for this cloud
+            existing_issue=$(gh issue list --repo OpenRouterTeam/spawn --state open \
+                --search "fixture recording failing ${cloud}" \
+                --json number --jq '.[0].number' 2>/dev/null) || existing_issue=""
+
+            if [[ -z "${existing_issue}" ]]; then
+                gh issue create --repo OpenRouterTeam/spawn \
+                    --title "QA: ${cloud} fixture recording failing for ${consecutive} consecutive cycles" \
+                    --body "$(printf 'The automated QA cycle has detected that fixture recording for **%s** has failed for **%s consecutive cycles**.\n\nThis likely indicates a persistent issue with the cloud provider'\''s API or our integration.\n\n## What to check\n- Has the %s API changed? (new auth requirements, endpoint changes, rate limits)\n- Are the API credentials still valid?\n- Check `%s/lib/common.sh` for outdated API calls\n- Run `bash test/record.sh %s` locally to reproduce\n\n## Auto-generated\nThis issue was created automatically by the QA cycle (`qa-cycle.sh`).\n\n-- qa/cycle' "${cloud}" "${consecutive}" "${cloud}" "${cloud}" "${cloud}")" \
+                    --label "bug" \
+                    2>&1 | tee -a "${LOG_FILE}" || true
+                log "Phase 1: Created GitHub issue for ${cloud} persistent failure"
+            else
+                log "Phase 1: Issue #${existing_issue} already open for ${cloud}, skipping duplicate"
+            fi
+        done
+    fi
+fi
+
 rm -f "${RECORD_OUTPUT}"
 check_timeout || exit 0