diff --git a/.claude/skills/setup-agent-team/qa-cycle.sh b/.claude/skills/setup-agent-team/qa-cycle.sh
index 31807738..f5539544 100644
--- a/.claude/skills/setup-agent-team/qa-cycle.sh
+++ b/.claude/skills/setup-agent-team/qa-cycle.sh
@@ -875,10 +875,12 @@ if [[ -f "${RESULTS_PHASE4}" ]]; then
     RETRY_FAIL=$(grep -c ':fail$' "${RESULTS_PHASE4}" || true)
     log "Phase 4: ${RETRY_PASS} passed, ${RETRY_FAIL} failed"
 
-    python3 test/update-readme.py "${RESULTS_PHASE4}" 2>&1 | tee -a "${LOG_FILE}"
+    # TODO: Rewrite update-readme.py as TypeScript utility
+    # For now, skip README auto-update (removed test/update-readme.py - security theater cleanup)
+    log "Phase 4: Skipping README auto-update (needs TypeScript rewrite)"
 
     # Commit + push if README changed (using PR workflow to avoid race conditions)
-    if [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then
+    if false && [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then
         # Create feature branch for README update (timestamped to avoid collisions)
         README_BRANCH="qa/readme-update-$(date +%s)"
         git checkout -b "${README_BRANCH}" 2>&1 | tee -a "${LOG_FILE}"
diff --git a/test/e2e.sh b/test/e2e.sh
deleted file mode 100644
index 675b22a4..00000000
--- a/test/e2e.sh
+++ /dev/null
@@ -1,1268 +0,0 @@
-#!/bin/bash
-set -eo pipefail
-
-# E2E Tests — Real server provisioning, agent install, and verification
-# By default runs ONE agent per cloud (smoke test). Use --all for the full matrix.
-#
-# Usage:
-#   bash test/e2e.sh                    # One agent per cloud (smoke test)
-#   bash test/e2e.sh --all              # All agents on all clouds (full matrix)
-#   bash test/e2e.sh fly                # One agent on fly
-#   bash test/e2e.sh fly openclaw       # Single combo
-#   bash test/e2e.sh fly --all          # All agents on fly
-#   bash test/e2e.sh --cleanup          # Destroy stale e2e-* servers
-#   bash test/e2e.sh --history          # Show timing history
-#   bash test/e2e.sh --compare openclaw # Compare agent across clouds
-#
-# Environment:
-#   OPENROUTER_API_KEY  — Required for all tests
-#   E2E_CANARY_AGENT    — Agent to use for smoke tests (default: openclaw)
-#   E2E_AUTO_FIX        — Set to "1" to spawn Claude agents for failures (default: 0)
-#   E2E_OPTIMIZE        — Set to "1" to spawn Claude agents for slow-but-passing tests (default: 0)
-#   E2E_TIMEOUT         — Per-combo timeout in seconds (default: 900)
-#
-# Each agent script runs with SPAWN_NON_INTERACTIVE=1 so safe_read() fails
-# immediately instead of hanging on /dev/tty.  Cloud-specific env vars
-# (HETZNER_LOCATION, FLY_REGION, etc.) are auto-set to sane defaults.
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-E2E_TIMEOUT="${E2E_TIMEOUT:-900}"
-E2E_AUTO_FIX="${E2E_AUTO_FIX:-0}"
-E2E_OPTIMIZE="${E2E_OPTIMIZE:-0}"
-E2E_ALL=0
-E2E_CANARY_AGENT="${E2E_CANARY_AGENT:-openclaw}"
-E2E_RESULTS_DIR=""
-E2E_SERVER_PREFIX="e2e"
-E2E_PIDS=""
-E2E_TIMINGS_FILE="${REPO_ROOT}/.docs/e2e-timings.json"
-E2E_SLOW_THRESHOLD=180  # seconds — flag as slow even if passing
-
-# --- Logging ---
-
-_e2e_log() {
-    printf '[%s] [e2e] %s\n' "$(date +'%H:%M:%S')" "$*"
-}
-
-_e2e_pass() {
-    printf '  \033[32m✓\033[0m %s\n' "$*"
-}
-
-_e2e_fail() {
-    printf '  \033[31m✗\033[0m %s\n' "$*"
-}
-
-# --- Cloud config lookup (bash 3.2 compatible — no associative arrays) ---
-
-# Get the env var name used for server/app name
-_get_name_env_var() {
-    case "$1" in
-        fly)          echo "FLY_APP_NAME" ;;
-        hetzner)      echo "HETZNER_SERVER_NAME" ;;
-        digitalocean) echo "DO_DROPLET_NAME" ;;
-        aws)          echo "LIGHTSAIL_SERVER_NAME" ;;
-        daytona)      echo "DAYTONA_SANDBOX_NAME" ;;
-        gcp)          echo "GCP_INSTANCE_NAME" ;;
-
-        sprite)       echo "SPRITE_NAME" ;;
-        *)            echo "" ;;
-    esac
-}
-
-# Get the env var name used for cloud token
-_get_token_env_var() {
-    case "$1" in
-        fly)          echo "FLY_API_TOKEN" ;;
-        hetzner)      echo "HCLOUD_TOKEN" ;;
-        digitalocean) echo "DO_API_TOKEN" ;;
-        daytona)      echo "DAYTONA_API_KEY" ;;
-        *)            echo "" ;;
-    esac
-}
-
-# --- Credential helpers ---
-
-# Try to load a token from the spawn config file into the env var.
-# Returns 0 if token was loaded, 1 if not.
-_load_token_from_config() {
-    local cloud="$1"
-    local token_var
-    token_var=$(_get_token_env_var "$cloud")
-    [[ -z "$token_var" ]] && return 1
-
-    # Already set — nothing to do
-    local current="${!token_var:-}"
-    [[ -n "$current" ]] && return 0
-
-    local config_file="${HOME}/.config/spawn/${cloud}.json"
-    [[ -f "$config_file" ]] || return 1
-
-    local saved
-    saved=$(python3 -c "import json, sys; data=json.load(open(sys.argv[1])); print(data.get('api_key','') or data.get('token',''))" "$config_file" 2>/dev/null)
-    if [[ -n "$saved" ]]; then
-        export "$token_var=$saved"
-        return 0
-    fi
-    return 1
-}
-
-# Interactive credential collection — runs BEFORE non-interactive tests.
-# For each token-based cloud, ensures the env var is set by:
-#   1. Checking the env var
-#   2. Loading from ~/.config/spawn/{cloud}.json
-#   3. Prompting the user (Enter to skip)
-_collect_credentials() {
-    local clouds="$1"
-    local collected=""
-    local skipped=""
-
-    for cloud in $clouds; do
-        local token_var
-        token_var=$(_get_token_env_var "$cloud")
-
-        # CLI-auth clouds (aws, gcp, sprite) — no token to collect
-        [[ -z "$token_var" ]] && continue
-
-        # Already in env?
-        if [[ -n "${!token_var:-}" ]]; then
-            collected="${collected} ${cloud}"
-            continue
-        fi
-
-        # Try config file
-        if _load_token_from_config "$cloud"; then
-            _e2e_log "Loaded ${token_var} from ~/.config/spawn/${cloud}.json"
-            collected="${collected} ${cloud}"
-            continue
-        fi
-
-        # Fly: try CLI auth (fly auth token)
-        if [[ "$cloud" == "fly" ]] && _try_fly_cli_token; then
-            _e2e_log "Loaded FLY_API_TOKEN from fly CLI auth"
-            collected="${collected} ${cloud}"
-            continue
-        fi
-
-        # No TTY? Can't prompt — skip
-        if ! echo -n "" > /dev/tty 2>/dev/null; then
-            skipped="${skipped} ${cloud}"
-            continue
-        fi
-
-        # Interactive prompt
-        printf '  %s: paste %s (Enter to skip): ' "$cloud" "$token_var"
-        local token=""
-        read -r token </dev/tty
-        if [[ -n "$token" ]]; then
-            export "$token_var=$token"
-            collected="${collected} ${cloud}"
-        else
-            skipped="${skipped} ${cloud}"
-        fi
-    done
-
-    if [[ -n "$skipped" ]]; then
-        _e2e_log "Skipped (no credentials):${skipped}"
-    fi
-}
-
-# Try to get FLY_API_TOKEN from the flyctl CLI (fly auth token)
-_try_fly_cli_token() {
-    local fly_cmd=""
-    if command -v fly &>/dev/null; then
-        fly_cmd="fly"
-    elif command -v flyctl &>/dev/null; then
-        fly_cmd="flyctl"
-    else
-        return 1
-    fi
-    local token
-    token=$("$fly_cmd" auth token 2>/dev/null) || return 1
-    if [[ -n "$token" ]]; then
-        export FLY_API_TOKEN="$token"
-        return 0
-    fi
-    return 1
-}
-
-# --- Credential check ---
-
-# Check if a cloud has credentials available (non-interactive)
-_cloud_has_credentials() {
-    local cloud="$1"
-    local token_var
-    token_var=$(_get_token_env_var "$cloud")
-
-    # Clouds that use CLI auth rather than env var tokens
-    case "$cloud" in
-        aws)    command -v aws &>/dev/null && aws sts get-caller-identity &>/dev/null 2>&1; return $? ;;
-        gcp)    command -v gcloud &>/dev/null && gcloud auth print-access-token &>/dev/null 2>&1; return $? ;;
-
-        sprite) command -v sprite &>/dev/null; return $? ;;
-        local)  return 0 ;;
-    esac
-
-    # Token-based clouds: check env var, then spawn config file, then CLI
-    if [[ -n "$token_var" ]]; then
-        local token_val="${!token_var:-}"
-        if [[ -n "$token_val" ]]; then
-            return 0
-        fi
-        # Check spawn config file
-        local config_file="${HOME}/.config/spawn/${cloud}.json"
-        if [[ -f "$config_file" ]]; then
-            return 0
-        fi
-        # Fly: also check CLI auth
-        if [[ "$cloud" == "fly" ]]; then
-            _try_fly_cli_token &>/dev/null && return 0
-        fi
-    fi
-    return 1
-}
-
-# --- Cleanup ---
-
-_cleanup_e2e() {
-    local exit_code=$?
-    # Kill any remaining background test jobs
-    if [[ -n "${E2E_PIDS:-}" ]]; then
-        for pid in ${E2E_PIDS}; do
-            kill "$pid" 2>/dev/null || true
-        done
-    fi
-    # Clean up results dir
-    if [[ -n "${E2E_RESULTS_DIR:-}" ]] && [[ -d "${E2E_RESULTS_DIR}" ]]; then
-        rm -rf "${E2E_RESULTS_DIR}"
-    fi
-    exit "$exit_code"
-}
-trap _cleanup_e2e EXIT SIGTERM SIGINT
-
-# --- macOS-compatible timeout ---
-
-_run_with_timeout() {
-    local secs="$1"; shift
-    "$@" &
-    local pid=$!
-    local elapsed=0
-    while kill -0 "$pid" 2>/dev/null; do
-        if [[ "$elapsed" -ge "$secs" ]]; then
-            kill "$pid" 2>/dev/null
-            sleep 1
-            kill -9 "$pid" 2>/dev/null || true
-            wait "$pid" 2>/dev/null || true
-            return 124
-        fi
-        sleep 1
-        elapsed=$((elapsed + 1))
-    done
-    wait "$pid" 2>/dev/null
-}
-
-# --- Stale server cleanup ---
-
-_cleanup_stale_servers() {
-    _e2e_log "Skipping bash-based cleanup (clouds use TypeScript)"
-    return 0
-}
-
-# Destroy a specific e2e test server by name.
-# Clouds that take a name directly are easy; others need a name→ID lookup.
-_destroy_e2e_server() {
-    return 0
-}
-
-# --- Non-interactive env setup ---
-
-# Export all env vars needed to run agent scripts without any interactive prompts.
-# Called by both preflight and per-combo tests.
-_setup_noninteractive_env() {
-    local cloud="$1"
-
-    export SPAWN_NON_INTERACTIVE=1
-    export MODEL_ID="${MODEL_ID:-openrouter/auto}"
-    export SPAWN_SKIP_GITHUB_AUTH=1
-
-    case "$cloud" in
-        hetzner)
-            export HETZNER_LOCATION="${HETZNER_LOCATION:-fsn1}"
-            export HETZNER_SERVER_TYPE="${HETZNER_SERVER_TYPE:-cx23}"
-            ;;
-        fly)
-            export FLY_REGION="${FLY_REGION:-iad}"
-            export FLY_VM_SIZE="${FLY_VM_SIZE:-shared-cpu-1x}"
-            export FLY_VM_MEMORY="${FLY_VM_MEMORY:-1024}"
-            ;;
-        gcp)
-            export GCP_ZONE="${GCP_ZONE:-us-central1-a}"
-            export GCP_MACHINE_TYPE="${GCP_MACHINE_TYPE:-e2-micro}"
-            ;;
-    esac
-}
-
-# --- Per-cloud preflight ---
-
-# Run cloud_authenticate() once per cloud BEFORE parallel agent tests.
-# This installs CLIs, imports SSH keys, and validates tokens so that
-# 15 parallel agent scripts don't race on the same shared resources.
-_preflight_cloud() {
-    local cloud="$1"
-    local log_file="${E2E_RESULTS_DIR}/preflight_${cloud}.log"
-    local env_file="${E2E_RESULTS_DIR}/preflight_${cloud}.env"
-
-    _e2e_log "Pre-flight: ${cloud}..."
-
-    # Run cloud_authenticate in a subshell, then dump the validated token
-    # so the parent can export it for agent scripts.
-    local token_var
-    token_var=$(_get_token_env_var "$cloud")
-
-    (
-        _setup_noninteractive_env "$cloud"
-
-        # Write token from env to env file for parent to pick up
-        if [[ -n "$token_var" ]] && [[ -n "${!token_var:-}" ]]; then
-            printf '%s' "${!token_var}" > "$env_file"
-        fi
-    ) > "$log_file" 2>&1
-
-    local rc=$?
-    if [[ $rc -ne 0 ]]; then
-        local last_err
-        last_err=$(grep -iE "error|fail|cannot|not found|invalid" "$log_file" 2>/dev/null | tail -1 || true)
-        _e2e_fail "pre-flight ${cloud}: ${last_err:-exit code $rc}"
-        return 1
-    fi
-
-    # Import validated token into parent so agent scripts skip re-validation
-    if [[ -n "$token_var" ]] && [[ -f "$env_file" ]] && [[ -s "$env_file" ]]; then
-        local token_val
-        token_val=$(cat "$env_file")
-        export "$token_var=$token_val"
-        rm -f "$env_file"
-    fi
-
-    _e2e_pass "pre-flight ${cloud}"
-    return 0
-}
-
-# --- Per-combo test function ---
-
-run_e2e_test() {
-    local cloud="$1" agent="$2"
-    local server_name="${E2E_SERVER_PREFIX}-${agent}-$(date +%s)-$$"
-    local log_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.log"
-    local start_time
-    start_time=$(date +%s)
-
-    _e2e_log "  ▶ ${cloud}/${agent} starting..."
-
-    # Set the cloud-specific server name env var so the script skips interactive prompt
-    local name_var
-    name_var=$(_get_name_env_var "$cloud")
-    if [[ -n "$name_var" ]]; then
-        export "$name_var"="$server_name"
-    fi
-
-    _setup_noninteractive_env "$cloud"
-
-    # Run the agent script with stdin from /dev/null (no interactive prompts)
-    local exit_code=0
-    _run_with_timeout "$E2E_TIMEOUT" bash "${REPO_ROOT}/${cloud}/${agent}.sh" \
-        < /dev/null > "$log_file" 2>&1 || exit_code=$?
-
-    local elapsed=$(( $(date +%s) - start_time ))
-
-    # Determine result
-    # The script will always "fail" at the interactive session step (no TTY),
-    # but "setup completed successfully" printed before that means everything
-    # up to session launch worked.
-    local result="fail"
-    local reason=""
-
-    if [[ "$exit_code" -eq 124 ]]; then
-        reason="timeout (${E2E_TIMEOUT}s)"
-    elif grep -q "setup completed successfully" "$log_file" 2>/dev/null; then
-        result="pass"
-        reason="setup complete (session expected to fail without TTY)"
-    else
-        reason="exit code ${exit_code}"
-        # Try to extract last meaningful error
-        local last_error
-        last_error=$(grep -iE "error|fail|fatal|cannot|not found" "$log_file" 2>/dev/null | tail -3 || true)
-        if [[ -n "$last_error" ]]; then
-            reason="${reason}: $(printf '%s' "$last_error" | head -1)"
-        fi
-    fi
-
-    # Write results
-    printf '%s\n' "$result" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-    printf '%s\n' "$elapsed" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-    printf '%s\n' "$reason" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.reason"
-
-    # Destroy the test server — don't leak cloud resources
-    _destroy_e2e_server "$cloud" "$server_name"
-
-    # Progress output
-    if [[ "$result" == "pass" ]]; then
-        _e2e_pass "${cloud}/${agent}  ${elapsed}s"
-    else
-        _e2e_fail "${cloud}/${agent}  ${elapsed}s  (${reason})"
-    fi
-}
-
-# --- Auto-fix function ---
-
-_find_working_reference() {
-    local agent="$1" exclude_cloud="$2"
-    for cloud_dir in "${REPO_ROOT}"/*/; do
-        local cloud_name
-        cloud_name=$(basename "$cloud_dir")
-        [[ "$cloud_name" == "$exclude_cloud" ]] && continue
-        [[ -f "${cloud_dir}${agent}.sh" ]] || continue
-        printf '%s' "${cloud_dir}${agent}.sh"
-        return 0
-    done
-    return 1
-}
-
-# Build the prompt for a single failing combo (used by per-cloud agent)
-_build_failure_context() {
-    local cloud="$1" agent="$2"
-    local log_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.log"
-    local script="${REPO_ROOT}/${cloud}/${agent}.sh"
-
-    printf '### %s/%s\n\n' "$cloud" "$agent"
-
-    printf 'Last 50 lines of output:\n```\n'
-    if [[ -f "$log_file" ]]; then
-        tail -50 "$log_file"
-    else
-        printf '(no log file)\n'
-    fi
-    printf '```\n\n'
-
-    printf 'Script (%s/%s.sh):\n```bash\n' "$cloud" "$agent"
-    if [[ -f "$script" ]]; then
-        cat "$script"
-    fi
-    printf '```\n\n'
-
-    local ref_script=""
-    ref_script=$(_find_working_reference "$agent" "$cloud" 2>/dev/null) || true
-    if [[ -n "$ref_script" ]] && [[ -f "$ref_script" ]]; then
-        printf 'Reference (working on another cloud — %s):\n```bash\n' "$(basename "$(dirname "$ref_script")")"
-        cat "$ref_script"
-        printf '```\n\n'
-    fi
-}
-
-# Spawn one Claude agent to fix a single failing combo
-auto_fix_combo() {
-    local cloud="$1" agent="$2"
-
-    if ! command -v claude &>/dev/null; then
-        _e2e_log "claude CLI not found — skipping auto-fix for ${cloud}/${agent}"
-        return 1
-    fi
-
-    local prompt
-    prompt=$(_build_failure_context "$cloud" "$agent")
-
-    local cloud_lib=""
-    if [[ -f "${REPO_ROOT}/${cloud}/lib/common.sh" ]]; then
-        cloud_lib=$(cat "${REPO_ROOT}/${cloud}/lib/common.sh")
-    fi
-
-    _e2e_log "Spawning Claude agent for ${cloud}/${agent}..."
-
-    claude -p "You are fixing an E2E test failure for **${cloud}/${agent}**.
-
-## Cloud Library (${cloud}/lib/common.sh)
-\`\`\`bash
-${cloud_lib}
-\`\`\`
-
-## Failure
-
-${prompt}
-
-## Instructions
-
-Fix the failing script: ${cloud}/${agent}.sh
-
-1. Read the error output to understand what went wrong
-2. Compare with the reference script (working on another cloud) if available
-3. Fix the issue — common problems: wrong install command, missing PATH, timeout in non-TTY
-4. Run \`bash -n\` on every modified file
-
-Only modify files under ${cloud}/. Do not modify lib/common.sh or shared/." 2>&1 | tee -a "${E2E_RESULTS_DIR}/autofix_${cloud}_${agent}.log" || true
-}
-
-# --- Timing history ---
-
-# Save a test result to the timings JSON file
-# Usage: _save_timing cloud/agent elapsed status
-_save_timing() {
-    local combo="$1" elapsed="$2" status="$3"
-    local today
-    today=$(date +%Y-%m-%d)
-
-    mkdir -p "$(dirname "$E2E_TIMINGS_FILE")"
-
-    python3 -c "
-import json, sys, os
-
-combo = sys.argv[1]
-elapsed = int(sys.argv[2])
-status = sys.argv[3]
-today = sys.argv[4]
-path = sys.argv[5]
-
-data = {}
-if os.path.exists(path):
-    try:
-        with open(path) as f:
-            data = json.load(f)
-    except (json.JSONDecodeError, IOError):
-        data = {}
-
-if combo not in data:
-    data[combo] = {'runs': [], 'best': {}}
-
-entry = {'date': today, 'total': elapsed, 'status': status}
-data[combo]['runs'].insert(0, entry)
-# Keep last 10 runs
-data[combo]['runs'] = data[combo]['runs'][:10]
-
-# Update best if this is a pass and faster
-if status == 'pass':
-    best = data[combo].get('best', {})
-    if not best.get('total') or elapsed < best['total']:
-        data[combo]['best'] = {'total': elapsed, 'date': today}
-
-with open(path, 'w') as f:
-    json.dump(data, f, indent=2)
-" "$combo" "$elapsed" "$status" "$today" "$E2E_TIMINGS_FILE" 2>/dev/null || true
-}
-
-# Show timing history from the JSON file
-_show_history() {
-    if [[ ! -f "$E2E_TIMINGS_FILE" ]]; then
-        _e2e_log "No timing history found at ${E2E_TIMINGS_FILE}"
-        return 0
-    fi
-
-    python3 -c "
-import json, sys
-
-path = sys.argv[1]
-with open(path) as f:
-    data = json.load(f)
-
-if not data:
-    print('No timing data recorded yet.')
-    sys.exit(0)
-
-for combo in sorted(data.keys()):
-    info = data[combo]
-    best = info.get('best', {})
-    best_total = best.get('total', '-')
-    best_date = best.get('date', '-')
-    runs = info.get('runs', [])
-    print(f'\\n━━━ {combo} ━━━')
-    print(f'  Best: {best_total}s ({best_date})')
-    print(f'  Recent runs:')
-    for r in runs[:5]:
-        status_icon = '✓' if r['status'] == 'pass' else '✗'
-        print(f'    {status_icon} {r[\"date\"]}  {r[\"total\"]}s  ({r[\"status\"]})')
-" "$E2E_TIMINGS_FILE"
-}
-
-# Compare a single agent across all clouds
-_show_compare() {
-    local agent="$1"
-    if [[ ! -f "$E2E_TIMINGS_FILE" ]]; then
-        _e2e_log "No timing history found at ${E2E_TIMINGS_FILE}"
-        return 0
-    fi
-
-    python3 -c "
-import json, sys
-
-agent = sys.argv[1]
-path = sys.argv[2]
-with open(path) as f:
-    data = json.load(f)
-
-matches = {k: v for k, v in data.items() if k.endswith('/' + agent)}
-if not matches:
-    print(f'No timing data for agent: {agent}')
-    sys.exit(0)
-
-print(f'\\n━━━ {agent} across clouds ━━━')
-print(f'{\"CLOUD\":<15} {\"BEST\":<10} {\"LATEST\":<10} {\"STATUS\":<8}')
-print('-' * 45)
-
-for combo in sorted(matches.keys()):
-    cloud = combo.split('/')[0]
-    info = matches[combo]
-    best = info.get('best', {}).get('total', '-')
-    runs = info.get('runs', [])
-    if runs:
-        latest = runs[0]['total']
-        status = runs[0]['status']
-    else:
-        latest = '-'
-        status = '-'
-    best_s = f'{best}s' if isinstance(best, int) else best
-    latest_s = f'{latest}s' if isinstance(latest, int) else latest
-    print(f'{cloud:<15} {best_s:<10} {latest_s:<10} {status:<8}')
-" "$agent" "$E2E_TIMINGS_FILE"
-}
-
-# Check if a passing combo is slow and needs optimization
-# Returns 0 (true) if optimization is needed, 1 if not
-# Prints the reason to stdout
-_check_slow() {
-    local combo="$1" elapsed="$2"
-
-    python3 -c "
-import json, sys, os
-
-combo = sys.argv[1]
-elapsed = int(sys.argv[2])
-threshold = int(sys.argv[3])
-path = sys.argv[4]
-agent = combo.split('/')[1]
-cloud = combo.split('/')[0]
-
-reasons = []
-
-# Trigger 1: Absolute slow
-if elapsed > threshold:
-    reasons.append(f'absolute_slow: {elapsed}s exceeds {threshold}s threshold')
-
-# Load history for regression + peer comparison
-data = {}
-if os.path.exists(path):
-    try:
-        with open(path) as f:
-            data = json.load(f)
-    except (json.JSONDecodeError, IOError):
-        pass
-
-# Trigger 2: Regression vs best
-if combo in data:
-    best = data[combo].get('best', {}).get('total')
-    if best and elapsed > best * 1.5:
-        reasons.append(f'regression: {elapsed}s is >50%% slower than best {best}s')
-
-# Trigger 3: Slow vs peers (same agent on other clouds)
-peer_times = []
-for key, val in data.items():
-    if key.endswith('/' + agent) and key != combo:
-        peer_best = val.get('best', {}).get('total')
-        if peer_best:
-            peer_times.append((key.split('/')[0], peer_best))
-
-if peer_times:
-    fastest_cloud, fastest_time = min(peer_times, key=lambda x: x[1])
-    if elapsed > fastest_time * 2:
-        reasons.append(f'slow_vs_peers: {elapsed}s is >2x slower than {fastest_cloud} ({fastest_time}s)')
-
-if reasons:
-    print('|'.join(reasons))
-    sys.exit(0)
-else:
-    sys.exit(1)
-" "$combo" "$elapsed" "$E2E_SLOW_THRESHOLD" "$E2E_TIMINGS_FILE" 2>/dev/null
-}
-
-# Build context for optimization agent (peer timings, history)
-_build_optimization_context() {
-    local combo="$1" elapsed="$2"
-
-    python3 -c "
-import json, sys, os
-
-combo = sys.argv[1]
-elapsed = int(sys.argv[2])
-path = sys.argv[3]
-agent = combo.split('/')[1]
-cloud = combo.split('/')[0]
-
-data = {}
-if os.path.exists(path):
-    try:
-        with open(path) as f:
-            data = json.load(f)
-    except (json.JSONDecodeError, IOError):
-        pass
-
-lines = []
-
-# Best time
-best = '-'
-if combo in data:
-    b = data[combo].get('best', {}).get('total')
-    if b:
-        best = f'{b}s'
-lines.append(f'- Total time: {elapsed}s (best ever: {best})')
-
-# Peer timings
-lines.append(f'- Same agent on other clouds:')
-for key in sorted(data.keys()):
-    if key.endswith('/' + agent) and key != combo:
-        peer_cloud = key.split('/')[0]
-        peer_best = data[key].get('best', {}).get('total', '?')
-        lines.append(f'  - {peer_cloud}: {peer_best}s')
-
-# History
-if combo in data:
-    runs = data[combo].get('runs', [])
-    if runs:
-        lines.append(f'- History:')
-        for r in runs[:5]:
-            lines.append(f'  - {r[\"date\"]}: {r[\"total\"]}s ({r[\"status\"]})')
-
-print('\\n'.join(lines))
-" "$combo" "$elapsed" "$E2E_TIMINGS_FILE" 2>/dev/null || true
-}
-
-# Build optimization context for a single slow combo (used by per-cloud agent)
-_build_slow_context() {
-    local cloud="$1" agent="$2" elapsed="$3" reasons="$4"
-    local script="${REPO_ROOT}/${cloud}/${agent}.sh"
-
-    printf '### %s/%s (%ss)\n\n' "$cloud" "$agent" "$elapsed"
-
-    printf 'Why flagged:\n'
-    printf '%s\n' "$reasons" | while IFS= read -r r; do
-        printf '- %s\n' "$r"
-    done
-    printf '\n'
-
-    local timing_context
-    timing_context=$(_build_optimization_context "${cloud}/${agent}" "$elapsed")
-    printf 'Timings:\n%s\n\n' "$timing_context"
-
-    printf 'Script (%s/%s.sh):\n```bash\n' "$cloud" "$agent"
-    if [[ -f "$script" ]]; then
-        cat "$script"
-    fi
-    printf '```\n\n'
-
-    local ref_script=""
-    ref_script=$(_find_working_reference "$agent" "$cloud" 2>/dev/null) || true
-    if [[ -n "$ref_script" ]] && [[ -f "$ref_script" ]]; then
-        printf 'Reference (fastest peer — %s):\n```bash\n' "$(basename "$(dirname "$ref_script")")"
-        cat "$ref_script"
-        printf '```\n\n'
-    fi
-}
-
-# Spawn one Claude agent to optimize a single slow combo
-optimize_slow_combo() {
-    local cloud="$1" agent="$2" elapsed="$3" reasons="$4"
-
-    if ! command -v claude &>/dev/null; then
-        _e2e_log "claude CLI not found — skipping optimization for ${cloud}/${agent}"
-        return 1
-    fi
-
-    local prompt
-    prompt=$(_build_slow_context "$cloud" "$agent" "$elapsed" "$reasons")
-
-    local cloud_lib=""
-    if [[ -f "${REPO_ROOT}/${cloud}/lib/common.sh" ]]; then
-        cloud_lib=$(cat "${REPO_ROOT}/${cloud}/lib/common.sh")
-    fi
-
-    _e2e_log "Spawning Claude agent for ${cloud}/${agent} (${elapsed}s)..."
-
-    claude -p "You are optimizing a slow E2E test for **${cloud}/${agent}**.
-The script PASSES but is too slow.
-
-## Cloud Library (${cloud}/lib/common.sh)
-\`\`\`bash
-${cloud_lib}
-\`\`\`
-
-## Slow Script
-
-${prompt}
-
-## Instructions
-
-Optimize the script: ${cloud}/${agent}.sh
-
-1. Compare timings with the fastest peer cloud for the same agent
-2. Identify what makes it slow (heavy installer, compiling native deps, unnecessary steps)
-3. Make it faster — use lighter install methods, skip unnecessary setup, parallelize where possible
-4. Run \`bash -n\` on every modified file
-5. Don't break anything — the script must still pass E2E
-
-Only modify files under ${cloud}/. Do not modify lib/common.sh or shared/." 2>&1 | tee -a "${E2E_RESULTS_DIR}/optimize_${cloud}_${agent}.log" || true
-}
-
-# --- Main ---
-
-main() {
-    local filter_cloud="" filter_agent=""
-
-    # Parse args: strip --all flag, assign positional cloud/agent
-    for arg in "$@"; do
-        case "$arg" in
-            --all) E2E_ALL=1 ;;
-            *)
-                if [[ -z "$filter_cloud" ]]; then
-                    filter_cloud="$arg"
-                else
-                    filter_agent="$arg"
-                fi
-                ;;
-        esac
-    done
-
-    # Handle --cleanup
-    if [[ "$filter_cloud" == "--cleanup" ]]; then
-        _e2e_log "Running stale server cleanup..."
-        for cloud in fly hetzner digitalocean; do
-            if _cloud_has_credentials "$cloud"; then
-                _cleanup_stale_servers "$cloud"
-            fi
-        done
-        _e2e_log "Cleanup complete"
-        return 0
-    fi
-
-    # Handle --history
-    if [[ "$filter_cloud" == "--history" ]]; then
-        _show_history
-        return 0
-    fi
-
-    # Handle --compare AGENT
-    if [[ "$filter_cloud" == "--compare" ]]; then
-        if [[ -z "$filter_agent" ]]; then
-            _e2e_log "Usage: bash test/e2e.sh --compare AGENT_NAME"
-            return 1
-        fi
-        _show_compare "$filter_agent"
-        return 0
-    fi
-
-    # Get OPENROUTER_API_KEY
-    if [[ -z "${OPENROUTER_API_KEY:-}" ]]; then
-        # Non-interactive: fail fast with a clear message
-        if ! echo -n "" > /dev/tty 2>/dev/null; then
-            _e2e_log "ERROR: OPENROUTER_API_KEY not set and no TTY available"
-            _e2e_log "Export it before running:  export OPENROUTER_API_KEY=sk-or-v1-..."
-            return 1
-        fi
-
-        # Interactive: offer OAuth or paste
-        source "${REPO_ROOT}/shared/common.sh" 2>/dev/null || true
-
-        _e2e_log "OPENROUTER_API_KEY not set — let's grab one"
-        echo ""
-        printf '  1) Open browser (OAuth)  — quickest, logs you in via openrouter.ai\n'
-        printf '  2) Paste a key           — get one from https://openrouter.ai/settings/keys\n'
-        printf '  3) Quit\n'
-        echo ""
-        printf '  Pick [1/2/3]: '
-        read -r _choice </dev/tty
-
-        case "${_choice}" in
-            1)
-                _e2e_log "Starting OAuth flow..."
-                OPENROUTER_API_KEY=$(try_oauth_flow 5180) || {
-                    _e2e_log "OAuth failed — falling back to manual paste"
-                    printf '  Paste your API key: '
-                    read -r OPENROUTER_API_KEY </dev/tty
-                }
-                ;;
-            2)
-                printf '  Paste your API key: '
-                read -r OPENROUTER_API_KEY </dev/tty
-                ;;
-            *)
-                _e2e_log "Aborted."
-                return 1
-                ;;
-        esac
-
-        if [[ -z "${OPENROUTER_API_KEY:-}" ]]; then
-            _e2e_log "ERROR: No API key provided"
-            return 1
-        fi
-        export OPENROUTER_API_KEY
-        _e2e_log "API key set — continuing"
-    fi
-
-    # Create results directory
-    E2E_RESULTS_DIR=$(mktemp -d "${TMPDIR:-/tmp}/e2e-results-XXXXXX")
-
-    # Testable clouds (excludes local, sprite which don't provision real servers the same way)
-    local testable_clouds="fly hetzner digitalocean aws daytona gcp"
-
-    # --- Credential collection (interactive) ---
-    # Load tokens from config files and prompt for any missing ones
-    # BEFORE we go non-interactive. This lets the user provide tokens
-    # that aren't in env vars or config files.
-    echo ""
-    _e2e_log "━━━ Credential Collection ━━━"
-    echo ""
-    _collect_credentials "$testable_clouds"
-    echo ""
-
-    # Discover clouds with available credentials
-    local available_clouds=""
-    if [[ -n "$filter_cloud" ]]; then
-        if _cloud_has_credentials "$filter_cloud"; then
-            available_clouds="$filter_cloud"
-        else
-            _e2e_log "ERROR: No credentials found for ${filter_cloud}"
-            _e2e_log "Set the appropriate token env var or configure via the cloud's CLI"
-            return 1
-        fi
-    else
-        for cloud in $testable_clouds; do
-            if _cloud_has_credentials "$cloud"; then
-                available_clouds="${available_clouds} ${cloud}"
-            fi
-        done
-        available_clouds=$(printf '%s' "$available_clouds" | sed 's/^ //')
-    fi
-
-    if [[ -z "$available_clouds" ]]; then
-        _e2e_log "No cloud credentials available. Set token env vars for at least one cloud."
-        _e2e_log "Supported clouds: ${testable_clouds}"
-        return 1
-    fi
-
-    _e2e_log "Available clouds: ${available_clouds}"
-
-    # --- Pre-flight: validate each cloud once ---
-    # Installs CLIs, imports SSH keys, validates tokens sequentially so that
-    # the parallel agent tests don't race on shared resources.
-    echo ""
-    _e2e_log "━━━ Pre-flight ━━━"
-    echo ""
-    local ready_clouds=""
-    local preflight_skipped=""
-    for cloud in $available_clouds; do
-        if _preflight_cloud "$cloud"; then
-            ready_clouds="${ready_clouds} ${cloud}"
-        else
-            preflight_skipped="${preflight_skipped} ${cloud}"
-        fi
-    done
-    ready_clouds=$(printf '%s' "$ready_clouds" | sed 's/^ //')
-
-    if [[ -n "$preflight_skipped" ]]; then
-        echo ""
-        _e2e_log "Skipped clouds (pre-flight failed):${preflight_skipped}"
-        _e2e_log "Check logs in ${E2E_RESULTS_DIR}/preflight_*.log"
-    fi
-
-    if [[ -z "$ready_clouds" ]]; then
-        _e2e_log "All clouds failed pre-flight. Check credentials and CLIs."
-        return 1
-    fi
-
-    # Collect combos for clouds that passed pre-flight.
-    # Default: one canary agent per cloud.  --all or explicit agent: full set.
-    local combos=""
-    local combo_count=0
-    for cloud in $ready_clouds; do
-        if [[ -n "$filter_agent" ]]; then
-            # Explicit agent requested
-            if [[ -f "${REPO_ROOT}/${cloud}/${filter_agent}.sh" ]]; then
-                combos="${combos} ${cloud}/${filter_agent}"
-                combo_count=$((combo_count + 1))
-            fi
-        elif [[ "$E2E_ALL" == "1" ]]; then
-            # --all: every agent on this cloud
-            for script in "${REPO_ROOT}/${cloud}"/*.sh; do
-                [[ -f "$script" ]] || continue
-                local agent
-                agent=$(basename "$script" .sh)
-                [[ "$agent" == "lib" ]] && continue
-                combos="${combos} ${cloud}/${agent}"
-                combo_count=$((combo_count + 1))
-            done
-        else
-            # Smoke test: one canary agent per cloud
-            local canary="${E2E_CANARY_AGENT}"
-            if [[ ! -f "${REPO_ROOT}/${cloud}/${canary}.sh" ]]; then
-                # Canary not available on this cloud — pick the first agent
-                canary=""
-                for script in "${REPO_ROOT}/${cloud}"/*.sh; do
-                    [[ -f "$script" ]] || continue
-                    local a
-                    a=$(basename "$script" .sh)
-                    [[ "$a" == "lib" ]] && continue
-                    canary="$a"
-                    break
-                done
-            fi
-            if [[ -n "$canary" ]]; then
-                combos="${combos} ${cloud}/${canary}"
-                combo_count=$((combo_count + 1))
-            fi
-        fi
-    done
-    combos=$(printf '%s' "$combos" | sed 's/^ //')
-
-    if [[ -z "$combos" ]]; then
-        _e2e_log "No test combos found for ready clouds: ${ready_clouds}"
-        return 1
-    fi
-
-    local mode_label="smoke test"
-    [[ "$E2E_ALL" == "1" ]] && mode_label="full matrix"
-    [[ -n "$filter_agent" ]] && mode_label="filtered"
-    _e2e_log "Testing ${combo_count} combo(s) [${mode_label}]: ${combos}"
-    echo ""
-
-    # Pre-cleanup: destroy stale e2e-* servers
-    for cloud in $ready_clouds; do
-        _cleanup_stale_servers "$cloud" 2>/dev/null || true
-    done
-
-    # Run all combos in parallel (background subshells)
-    E2E_PIDS=""
-    for combo in $combos; do
-        local cloud="${combo%%/*}"
-        local agent="${combo##*/}"
-        (
-            run_e2e_test "$cloud" "$agent"
-        ) &
-        E2E_PIDS="${E2E_PIDS} $!"
-    done
-
-    # Wait for all to finish
-    _e2e_log "Waiting for ${combo_count} test(s) to complete (timeout: ${E2E_TIMEOUT}s each)..."
-    for pid in ${E2E_PIDS}; do
-        wait "$pid" 2>/dev/null || true
-    done
-    E2E_PIDS=""
-
-    # Collect and report results
-    echo ""
-    _e2e_log "━━━ E2E Results ━━━"
-    echo ""
-
-    local total_pass=0
-    local total_fail=0
-    local failed_combos=""
-
-    for combo in $combos; do
-        local cloud="${combo%%/*}"
-        local agent="${combo##*/}"
-        local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-        local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-        local reason_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.reason"
-
-        local result="fail"
-        local elapsed="?"
-        local reason="no result file"
-
-        [[ -f "$result_file" ]] && result=$(cat "$result_file")
-        [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file")
-        [[ -f "$reason_file" ]] && reason=$(cat "$reason_file")
-
-        if [[ "$result" == "pass" ]]; then
-            _e2e_pass "${cloud}/${agent}  ${elapsed}s"
-            total_pass=$((total_pass + 1))
-        else
-            _e2e_fail "${cloud}/${agent}  ${elapsed}s  (${reason})"
-            total_fail=$((total_fail + 1))
-            failed_combos="${failed_combos} ${combo}"
-        fi
-    done
-
-    echo ""
-    local summary="Total: ${total_pass} passed, ${total_fail} failed out of ${combo_count}"
-    if [[ -n "${preflight_skipped:-}" ]]; then
-        summary="${summary} (skipped:${preflight_skipped})"
-    fi
-    _e2e_log "$summary"
-
-    # Save timings to history
-    for combo in $combos; do
-        local cloud="${combo%%/*}"
-        local agent="${combo##*/}"
-        local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-        local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-        local result="fail"
-        local elapsed="0"
-        [[ -f "$result_file" ]] && result=$(cat "$result_file")
-        [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file")
-        _save_timing "$combo" "$elapsed" "$result"
-    done
-
-    # Optimization phase: check passing combos for slowness
-    local slow_combos=""
-    if [[ "$E2E_OPTIMIZE" == "1" ]]; then
-        for combo in $combos; do
-            local cloud="${combo%%/*}"
-            local agent="${combo##*/}"
-            local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-            local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-            local result="fail"
-            local elapsed="0"
-            [[ -f "$result_file" ]] && result=$(cat "$result_file")
-            [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file")
-
-            if [[ "$result" == "pass" ]]; then
-                local slow_reasons=""
-                slow_reasons=$(_check_slow "$combo" "$elapsed") || true
-                if [[ -n "$slow_reasons" ]]; then
-                    slow_combos="${slow_combos} ${combo}:${elapsed}:${slow_reasons}"
-                fi
-            fi
-        done
-    fi
-
-    if [[ -n "${slow_combos}" ]]; then
-        echo ""
-        _e2e_log "━━━ Optimization Phase ━━━"
-        echo ""
-
-        # Print all slow combos
-        for entry in $slow_combos; do
-            local combo="${entry%%:*}"
-            local rest="${entry#*:}"
-            local elapsed="${rest%%:*}"
-            local reasons="${rest#*:}"
-            printf '  \033[33m⚡\033[0m %s  %ss  (%s)\n' "$combo" "$elapsed" "$(printf '%s' "$reasons" | tr '|' ', ')"
-        done
-        echo ""
-
-        # Spawn one Claude agent per slow combo, all in parallel
-        local opt_pids=""
-        for entry in $slow_combos; do
-            local combo="${entry%%:*}"
-            local rest="${entry#*:}"
-            local elapsed="${rest%%:*}"
-            local reasons
-            reasons=$(printf '%s' "${rest#*:}" | tr '|' '\n')
-            local cloud="${combo%%/*}"
-            local agent="${combo##*/}"
-
-            (
-                optimize_slow_combo "$cloud" "$agent" "$elapsed" "$reasons"
-            ) &
-            opt_pids="${opt_pids} $!"
-        done
-
-        # Wait for all optimization agents
-        for pid in $opt_pids; do
-            wait "$pid" 2>/dev/null || true
-        done
-
-        # Re-run optimized combos to verify
-        echo ""
-        _e2e_log "━━━ Re-running Optimized Combos ━━━"
-        echo ""
-
-        for entry in $slow_combos; do
-            local combo="${entry%%:*}"
-            local old_elapsed="${entry#*:}"
-            old_elapsed="${old_elapsed%%:*}"
-            local cloud="${combo%%/*}"
-            local agent="${combo##*/}"
-
-            run_e2e_test "$cloud" "$agent" || true
-
-            local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-            local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-            local result="fail"
-            local new_elapsed="?"
-            [[ -f "$result_file" ]] && result=$(cat "$result_file")
-            [[ -f "$timing_file" ]] && new_elapsed=$(cat "$timing_file")
-
-            if [[ "$result" == "pass" ]]; then
-                _e2e_pass "${combo}  ${new_elapsed}s  (was ${old_elapsed}s)"
-                _save_timing "$combo" "$new_elapsed" "$result"
-            else
-                _e2e_fail "${combo}  ${new_elapsed}s  (optimization broke it — was ${old_elapsed}s)"
-            fi
-        done
-    fi
-
-    # Auto-fix failures — one Claude agent per combo, all in parallel
-    if [[ "$total_fail" -gt 0 ]] && [[ "$E2E_AUTO_FIX" == "1" ]]; then
-        echo ""
-        _e2e_log "━━━ Auto-Fix Phase ━━━"
-        echo ""
-
-        # Spawn one agent per failing combo in parallel
-        local fix_pids=""
-        for combo in $failed_combos; do
-            local cloud="${combo%%/*}"
-            local agent="${combo##*/}"
-
-            (
-                auto_fix_combo "$cloud" "$agent"
-            ) &
-            fix_pids="${fix_pids} $!"
-        done
-
-        # Wait for all fix agents
-        for pid in $fix_pids; do
-            wait "$pid" 2>/dev/null || true
-        done
-
-        # Re-run fixed combos
-        echo ""
-        _e2e_log "━━━ Re-running Fixed Combos ━━━"
-        echo ""
-
-        local rerun_pass=0
-        local rerun_fail=0
-
-        for combo in $failed_combos; do
-            local cloud="${combo%%/*}"
-            local agent="${combo##*/}"
-
-            run_e2e_test "$cloud" "$agent" || true
-
-            local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result"
-            local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing"
-            local result="fail"
-            local elapsed="?"
-
-            [[ -f "$result_file" ]] && result=$(cat "$result_file")
-            [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file")
-
-            if [[ "$result" == "pass" ]]; then
-                _e2e_pass "${cloud}/${agent}  ${elapsed}s  (FIXED)"
-                rerun_pass=$((rerun_pass + 1))
-            else
-                _e2e_fail "${cloud}/${agent}  ${elapsed}s  (still failing)"
-                rerun_fail=$((rerun_fail + 1))
-            fi
-        done
-
-        echo ""
-        _e2e_log "Auto-fix: ${rerun_pass} fixed, ${rerun_fail} still failing"
-    fi
-
-    echo ""
-    _e2e_log "━━━ E2E Complete ━━━"
-
-    # Exit with failure if any tests failed (and weren't fixed)
-    if [[ "$total_fail" -gt 0 ]]; then
-        if [[ "$E2E_AUTO_FIX" == "1" ]] && [[ "${rerun_fail:-0}" -eq 0 ]]; then
-            return 0
-        fi
-        return 1
-    fi
-    return 0
-}
-
-main "$@"
diff --git a/test/mock-curl-script.sh b/test/mock-curl-script.sh
deleted file mode 100644
index 64c96e37..00000000
--- a/test/mock-curl-script.sh
+++ /dev/null
@@ -1,222 +0,0 @@
-#!/bin/bash
-# Mock curl — returns fixture data based on URL
-# Env vars from parent: MOCK_LOG, MOCK_FIXTURE_DIR, MOCK_CLOUD
-
-# --- Helper functions ---
-
-_parse_args() {
-    METHOD="GET"
-    URL=""
-    BODY=""
-    HAS_WRITE_OUT=false
-    local prev_flag=""
-
-    for arg in "$@"; do
-        case "$prev_flag" in
-            -X) METHOD="$arg"; prev_flag=""; continue ;;
-            -w)
-                case "$arg" in
-                    *http_code*) HAS_WRITE_OUT=true ;;
-                esac
-                prev_flag=""; continue
-                ;;
-            -d) BODY="$arg"; prev_flag=""; continue ;;
-            -H|-o|-u|-K|--connect-timeout|--max-time|--retry|--retry-delay) prev_flag=""; continue ;;
-        esac
-        case "$arg" in
-            -X|-w|-d|-H|-o|-u|-K|--connect-timeout|--max-time|--retry|--retry-delay) prev_flag="$arg"; continue ;;
-            -s|-f|-S|-L|-k|-#|-fsSL|-fsS|-sS) continue ;;
-            http://*|https://*) URL="$arg" ;;
-        esac
-    done
-}
-
-_maybe_inject_error() {
-    [ -n "${MOCK_ERROR_SCENARIO:-}" ] || return 1
-    case "$URL" in
-        *openrouter.ai*|*raw.githubusercontent.com*|*claude.ai/install*|*bun.sh*|*nodesource*|*nodejs.org*|*openclaw.ai*|*opencode*|*zeroclaw*|*pip.pypa.io*|*get.docker.com*|*npmjs.org*|*github.com/*/releases*)
-            return 1 ;;
-    esac
-    case "${MOCK_ERROR_SCENARIO}" in
-        auth_failure)
-            printf '{"error":"Unauthorized"}'
-            if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n401'; fi
-            exit 1 ;;
-        rate_limit)
-            printf '{"error":"Rate limit exceeded"}'
-            if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n429'; fi
-            exit 1 ;;
-        server_error)
-            printf '{"error":"Internal server error"}'
-            if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n500'; fi
-            exit 1 ;;
-        create_failure)
-            if [ "$METHOD" = "POST" ]; then
-                case "$URL" in
-                    *servers*|*droplets*|*instances*|*machines*)
-                        printf '{"error":"Unprocessable entity"}'
-                        if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n422'; fi
-                        exit 1 ;;
-                esac
-            fi ;;
-    esac
-    return 1
-}
-
-_handle_special_urls() {
-    case "$URL" in
-        *claude.ai/install*|*bun.sh*|*nodesource*|*nodejs.org*|*openclaw.ai*|*opencode*install*|*zeroclaw*install*|\
-        *pip.pypa.io*|*get.docker.com*|*install.python-poetry.org*|\
-        *npmjs.org*|*deb.nodesource.com*|*github.com/*/releases*|*cli.github.com*)
-            printf '#!/bin/bash\nexit 0\n'
-            exit 0 ;;
-        *raw.githubusercontent.com/OpenRouterTeam/spawn/*)
-            local_path="${MOCK_REPO_ROOT}/${URL##*spawn/main/}"
-            if [ -f "$local_path" ]; then cat "$local_path"; fi
-            exit 0 ;;
-        *openrouter.ai*)
-            printf '{"key":"sk-or-v1-mock"}\n'
-            if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n200'; fi
-            exit 0 ;;
-    esac
-}
-
-_strip_api_base() {
-    ENDPOINT="$URL"
-    case "$URL" in
-        https://api.hetzner.cloud/v1*)     ENDPOINT="${URL#https://api.hetzner.cloud/v1}" ;;
-        https://api.digitalocean.com/v2*)   ENDPOINT="${URL#https://api.digitalocean.com/v2}" ;;
-        https://api.machines.dev/v1*)       ENDPOINT="${URL#https://api.machines.dev/v1}" ;;
-    esac
-    EP_CLEAN=$(echo "$ENDPOINT" | sed 's|?.*||')
-}
-
-_check_fields() {
-    local fields="$1"
-    for field in $fields; do
-        if ! printf '%s' "$BODY" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then
-            echo "BODY_ERROR:missing_field:${field}:${URL}" >> "${MOCK_LOG}"
-        fi
-    done
-}
-
-_validate_body() {
-    [ "${MOCK_VALIDATE_BODY:-}" = "1" ] && [ -n "$BODY" ] && [ "$METHOD" = "POST" ] || return 0
-    if ! printf '%s' "$BODY" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then
-        echo "BODY_ERROR:invalid_json:${URL}" >> "${MOCK_LOG}"
-        return 0
-    fi
-    case "${MOCK_CLOUD}" in
-        hetzner)     case "$EP_CLEAN" in /servers)          _check_fields "name server_type image location" ;; esac ;;
-        digitalocean) case "$EP_CLEAN" in /droplets)        _check_fields "name region size image" ;; esac ;;
-        fly)         case "$EP_CLEAN" in */machines)        _check_fields "name region config" ;; esac ;;
-    esac
-}
-
-_try_fixture() {
-    local f="${MOCK_FIXTURE_DIR}/$1.json"
-    if [ -f "$f" ]; then cat "$f"; return 0; fi
-    return 1
-}
-
-_synthetic_active_response() {
-    case "$MOCK_CLOUD" in
-        digitalocean) printf '{"droplet":{"id":12345678,"name":"test-srv","status":"active","networks":{"v4":[{"ip_address":"10.0.0.1","type":"public"}]}}}' ;;
-        hetzner)      printf '{"server":{"id":99999,"name":"test-srv","status":"running","public_net":{"ipv4":{"ip":"10.0.0.1"}}}}' ;;
-        fly)          printf '{"id":"d890e84b0d3089","name":"test-app","state":"started","region":"iad","private_ip":"fdaa:0:0:0:a7b:0:0:2"}' ;;
-        *)            printf '{}' ;;
-    esac
-}
-
-_respond_get() {
-    local FIXTURE_NAME
-    FIXTURE_NAME=$(echo "$EP_CLEAN" | sed 's|^/||; s|/|_|g')
-
-    local LAST_SEG HAS_ID_SUFFIX=false
-    LAST_SEG=$(echo "$EP_CLEAN" | sed 's|.*/||')
-    case "$LAST_SEG" in *[0-9]*) HAS_ID_SUFFIX=true ;; esac
-
-    if _try_fixture "$FIXTURE_NAME"; then
-        :
-    elif [ "$HAS_ID_SUFFIX" = "false" ]; then
-        local FIXTURE_NAME_BASE
-        FIXTURE_NAME_BASE=$(echo "$FIXTURE_NAME" | sed 's|_[0-9a-f-]*$||')
-        if ! _try_fixture "$FIXTURE_NAME_BASE"; then
-            echo "NO_FIXTURE:GET:${EP_CLEAN}:${FIXTURE_NAME}" >> "${MOCK_LOG}"
-            printf '{}'
-        fi
-    else
-        # ID-suffixed GET (e.g., /servers/12345) — use synthetic for status polling
-        _synthetic_active_response
-    fi
-}
-
-_respond_post() {
-    case "$EP_CLEAN" in
-        /ssh_keys|/ssh-keys|/account/keys|/profile/sshkeys|/sshkeys|*/sshkey)
-            printf '{"ssh_key":{"id":99999,"name":"test-key","fingerprint":"af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d"}}'
-            ;;
-        /apps)
-            printf '{"id":"test-app","name":"test-app","status":"deployed","organization":{"slug":"personal"}}'
-            ;;
-        *)
-            if _try_fixture "create_server"; then
-                :
-            else
-                echo "NO_FIXTURE:POST:${EP_CLEAN}:create_server" >> "${MOCK_LOG}"
-                case "$MOCK_CLOUD" in
-                    hetzner)      printf '{"server":{"id":99999,"name":"test-srv","public_net":{"ipv4":{"ip":"10.0.0.1"}}},"action":{"id":1,"status":"running"}}' ;;
-                    digitalocean) printf '{"droplet":{"id":12345678,"name":"test-srv","status":"new","networks":{"v4":[{"ip_address":"10.0.0.1","type":"public"}]}}}' ;;
-                    *)            printf '{"id":"test-id","status":"active","ip":"10.0.0.1"}' ;;
-                esac
-            fi
-            ;;
-    esac
-}
-
-_track_state() {
-    [ "${MOCK_TRACK_STATE:-}" = "1" ] && [ -n "${MOCK_STATE_FILE:-}" ] || return 0
-    local TS
-    TS=$(date +%s)
-    case "$METHOD" in
-        POST)
-            case "$EP_CLEAN" in
-                /servers|/droplets|/instances|/instance-operations/launch|*/machines)
-                    echo "CREATED:${MOCK_CLOUD}:${TS}" >> "${MOCK_STATE_FILE}" ;;
-            esac ;;
-        DELETE)
-            echo "DELETED:${MOCK_CLOUD}:${TS}" >> "${MOCK_STATE_FILE}" ;;
-    esac
-}
-
-# --- Main logic ---
-
-_parse_args "$@"
-
-echo "curl ${METHOD} ${URL}" >> "${MOCK_LOG}"
-if [ -n "$BODY" ]; then
-    echo "BODY:${BODY}" >> "${MOCK_LOG}"
-fi
-
-_maybe_inject_error
-_handle_special_urls
-
-if [ -z "$URL" ]; then exit 0; fi
-
-_strip_api_base
-_validate_body
-
-case "$METHOD" in
-    GET)    _respond_get ;;
-    POST)   _respond_post ;;
-    DELETE) _try_fixture "delete_server" || printf '{}' ;;
-    *)      printf '{}' ;;
-esac
-
-_track_state
-
-if [ "$HAS_WRITE_OUT" = "true" ]; then
-    printf '\n200'
-fi
-
-exit 0
diff --git a/test/mock.sh b/test/mock.sh
deleted file mode 100644
index 0977d48c..00000000
--- a/test/mock.sh
+++ /dev/null
@@ -1,1036 +0,0 @@
-#!/bin/bash
-# Fixture-based mock test suite for cloud provider agent scripts
-#
-# Uses recorded API responses from test/fixtures/{cloud}/ to test
-# every agent script without making real API calls.
-#
-# Usage:
-#   bash test/mock.sh                    # Test all clouds with fixtures
-#   bash test/mock.sh hetzner            # Test all agents on one cloud
-#   bash test/mock.sh hetzner claude     # Test one agent on one cloud
-
-set -eo pipefail
-
-if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
-    printf 'WARNING: bash %s detected. Some features may need bash 4+.\n' "${BASH_VERSION}" >&2
-fi
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-FIXTURES_DIR="${REPO_ROOT}/test/fixtures"
-TEST_DIR=$(mktemp -d)
-MOCK_LOG="${TEST_DIR}/mock_calls.log"
-
-# Colors (respect NO_COLOR standard: https://no-color.org/)
-if [[ -n "${NO_COLOR:-}" ]]; then
-    RED='' GREEN='' YELLOW='' CYAN='' NC=''
-else
-    RED='\033[0;31m'
-    GREEN='\033[0;32m'
-    YELLOW='\033[1;33m'
-    CYAN='\033[0;36m'
-    NC='\033[0m'
-fi
-
-# Counters
-PASSED=0
-FAILED=0
-SKIPPED=0
-
-# Cleanup on exit
-cleanup() {
-    rm -rf "${TEST_DIR}"
-    rm -f /tmp/spawn_* 2>/dev/null || true
-}
-trap cleanup EXIT
-
-# ============================================================
-# Assertions (same pattern as test/run.sh)
-# ============================================================
-
-assert_exit_code() {
-    local actual="$1"
-    local expected="$2"
-    local msg="$3"
-    if [[ "${actual}" -eq "${expected}" ]]; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} ${msg} (got exit code ${actual})"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_log_contains() {
-    local pattern="$1"
-    local msg="$2"
-    if grep -qE "${pattern}" "${MOCK_LOG}" 2>/dev/null; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_api_called() {
-    local method="$1"
-    local endpoint_pattern="$2"
-    local msg="${3:-calls ${method} ${endpoint_pattern}}"
-    if grep -qE "curl ${method} .*${endpoint_pattern}" "${MOCK_LOG}" 2>/dev/null; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_env_injected() {
-    local var_name="$1"
-    local msg="${2:-injects ${var_name}}"
-    # Check mock log (ssh/scp commands may reference the var) and output log.
-    # Also check case-insensitively: OPENROUTER_API_KEY → "openrouter" appears
-    # in output like "Using OpenRouter API key from environment".
-    local first_word
-    first_word=$(printf '%s' "$var_name" | sed 's/_.*//' | tr '[:upper:]' '[:lower:]')
-    if grep -qE "${var_name}" "${MOCK_LOG}" 2>/dev/null || \
-       grep -qE "${var_name}" "${TEST_DIR}/output.log" 2>/dev/null || \
-       grep -qi "${first_word}" "${TEST_DIR}/output.log" 2>/dev/null || \
-       grep -qi "${first_word}" "${MOCK_LOG}" 2>/dev/null; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_file_created() {
-    local path_pattern="$1"
-    local msg="${2:-creates file matching ${path_pattern}}"
-    if grep -qE "(scp|upload|file).*${path_pattern}" "${MOCK_LOG}" 2>/dev/null; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_no_body_errors() {
-    local msg="${1:-no request body validation errors}"
-    if grep -qE "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null; then
-        local errors
-        errors=$(grep "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null)
-        printf '%b\n' "    ${RED}✗${NC} ${msg}"
-        printf '%b\n' "    ${RED}  Errors:${NC}"
-        printf '%s\n' "$errors" | while IFS= read -r line; do
-            printf '      %s\n' "$line"
-        done
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    fi
-}
-
-assert_server_cleaned_up() {
-    local state_file="$1"
-    local msg="${2:-server lifecycle tracked}"
-    if [[ ! -f "$state_file" ]]; then
-        printf '%b\n' "    ${YELLOW}⚠${NC} ${msg} (no state file)"
-        return 0
-    fi
-    local created deleted
-    created=$(grep -c "^CREATED:" "$state_file" 2>/dev/null || true)
-    deleted=$(grep -c "^DELETED:" "$state_file" 2>/dev/null || true)
-    if [[ "$created" -gt 0 ]]; then
-        printf '%b\n' "    ${GREEN}✓${NC} ${msg} (created=${created}, deleted=${deleted})"
-        PASSED=$((PASSED + 1))
-        if [[ "$deleted" -lt "$created" ]]; then
-            printf '%b\n' "    ${YELLOW}⚠${NC} warning: ${created} created but only ${deleted} deleted (expected — user takes over)"
-        fi
-    else
-        printf '%b\n' "    ${YELLOW}⚠${NC} ${msg} (no server creation tracked)"
-    fi
-}
-
-# ============================================================
-# Mock setup
-# ============================================================
-
-setup_mock_curl() {
-    local SCRIPT_DIR
-    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-    cp "${SCRIPT_DIR}/mock-curl-script.sh" "${TEST_DIR}/curl"
-    chmod +x "${TEST_DIR}/curl"
-}
-
-setup_mock_ssh() {
-    # Mock ssh — log and succeed
-    cat > "${TEST_DIR}/ssh" << 'MOCKSSH'
-#!/bin/bash
-echo "ssh $*" >> "${MOCK_LOG}"
-exit 0
-MOCKSSH
-    chmod +x "${TEST_DIR}/ssh"
-
-    # Mock scp — log and succeed
-    cat > "${TEST_DIR}/scp" << 'MOCKSCP'
-#!/bin/bash
-echo "scp $*" >> "${MOCK_LOG}"
-exit 0
-MOCKSCP
-    chmod +x "${TEST_DIR}/scp"
-}
-
-# Create a mock that logs its invocation and exits 0
-# Usage: _create_logging_mock NAME [NAME...]
-_create_logging_mock() {
-    local name
-    for name in "$@"; do
-        cat > "${TEST_DIR}/${name}" << MOCK
-#!/bin/bash
-echo "${name} \$*" >> "\${MOCK_LOG}"
-exit 0
-MOCK
-        chmod +x "${TEST_DIR}/${name}"
-    done
-}
-
-# Create a mock that silently exits 0 (no logging)
-# Usage: _create_silent_mock NAME [NAME...]
-_create_silent_mock() {
-    local name
-    for name in "$@"; do
-        cat > "${TEST_DIR}/${name}" << 'MOCK'
-#!/bin/bash
-exit 0
-MOCK
-        chmod +x "${TEST_DIR}/${name}"
-    done
-}
-
-# Create the ssh-keygen mock script
-_create_ssh_keygen_mock() {
-    cat > "${TEST_DIR}/ssh-keygen" << 'MOCK'
-#!/bin/bash
-echo "ssh-keygen $*" >> "${MOCK_LOG}"
-# Check for -l flag (fingerprint listing)
-for arg in "$@"; do
-    case "$arg" in
-        -l*) echo "256 MD5:af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d test@test (ED25519)"; exit 0 ;;
-    esac
-done
-# Parse -f flag for key creation
-KEY_PATH=""
-prev=""
-for arg in "$@"; do
-    if [ "$prev" = "-f" ]; then
-        KEY_PATH="$arg"
-    fi
-    prev="$arg"
-done
-if [ -n "$KEY_PATH" ]; then
-    mkdir -p "$(dirname "$KEY_PATH")"
-    touch "$KEY_PATH"
-    echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${KEY_PATH}.pub"
-fi
-exit 0
-MOCK
-    chmod +x "${TEST_DIR}/ssh-keygen"
-}
-
-setup_mock_agents() {
-    # Agent binaries
-    _create_logging_mock claude openclaw zeroclaw codex opencode kilocode q
-
-    # Tools used during agent install and file upload
-    _create_logging_mock pip pip3 npm npx node openssl shred cargo go git base64
-
-    # bun: pass `bun -e "..."` (JSON processing) through to the real binary;
-    # log all other invocations as no-ops.
-    # Fallback chain: real bun → node (with Bun.stdin polyfill) → exit 0
-    # CI (GitHub Actions ubuntu-latest) has node but not bun, so the node
-    # fallback is essential for _fly_json / _fly_list_orgs / list_servers.
-    cat > "${TEST_DIR}/bun" << 'MOCKBUN'
-#!/bin/bash
-echo "bun $*" >> "${MOCK_LOG}"
-
-# Find the real bun binary (skip our mock directory)
-_find_real_bun() {
-    local _self_dir
-    _self_dir="$(cd "$(dirname "$0")" && pwd)"
-    IFS=: read -ra _path_dirs <<< "$PATH"
-    for _d in "${_path_dirs[@]}"; do
-        if [[ "$_d" != "$_self_dir" && -x "$_d/bun" ]]; then
-            echo "$_d/bun"
-            return 0
-        fi
-    done
-    return 1
-}
-
-# Delegate `bun run <file>` and `bun test <file>` to the real bun.
-# fly/ agent shims use `bun run main.ts` — must pass through.
-if [[ "$1" == "run" || "$1" == "test" ]]; then
-    _real_bun=$(_find_real_bun) || { echo "real bun not found" >&2; exit 1; }
-    exec "$_real_bun" "$@"
-fi
-
-if [[ "$1" == "-e" ]]; then
-    _code="$2"
-    shift 2  # remove -e and the code, leaving extra args (e.g. -- field default)
-    _real_bun=$(_find_real_bun)
-    if [[ -n "$_real_bun" ]]; then
-        exec "$_real_bun" -e "$_code" "$@"
-    fi
-    # No real bun found — try node with a Bun.stdin polyfill
-    _self_dir="$(cd "$(dirname "$0")" && pwd)"
-    IFS=: read -ra _path_dirs <<< "$PATH"
-    _real_node=""
-    for _d in "${_path_dirs[@]}"; do
-        if [[ "$_d" != "$_self_dir" && -x "$_d/node" ]]; then
-            _real_node="$_d/node"
-            break
-        fi
-    done
-    if [[ -n "$_real_node" ]]; then
-        # Polyfill Bun.stdin.text() for node: read all of stdin as a string.
-        # --input-type=module enables top-level await (used by fly/lib scripts).
-        _polyfill='globalThis.Bun={stdin:{text:()=>new Promise(r=>{let d="";process.stdin.setEncoding("utf8");process.stdin.on("data",c=>d+=c);process.stdin.on("end",()=>r(d))})}};'
-        # Strip TypeScript type annotations for node compatibility.
-        _js_code=$(printf '%s' "$_code" | sed -E 's/: (any\[\]|any|string|number|void)//g; s/ as any//g')
-        exec "$_real_node" --input-type=module -e "${_polyfill}${_js_code}" "$@"
-    fi
-fi
-exit 0
-MOCKBUN
-    chmod +x "${TEST_DIR}/bun"
-
-    # Silent mocks (no logging needed)
-    _create_silent_mock clear sleep
-
-    # Mock timeout/gtimeout to just run the command (skip the timeout value)
-    cat > "${TEST_DIR}/timeout" << 'MOCK'
-#!/bin/bash
-shift
-exec "$@"
-MOCK
-    chmod +x "${TEST_DIR}/timeout"
-    cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout"
-
-    # Mock python3 — delegate to real python3 for JSON parsing
-    cat > "${TEST_DIR}/python3" << 'MOCK'
-#!/bin/bash
-exec /usr/bin/python3 "$@"
-MOCK
-    chmod +x "${TEST_DIR}/python3"
-
-    # Mock 'ssh-keygen' — returns MD5 fingerprint matching fixture data
-    _create_ssh_keygen_mock
-
-    # Mock fly/flyctl CLI — handles ssh console, auth token, version
-    _create_fly_mock
-}
-
-_create_fly_mock() {
-    cat > "${TEST_DIR}/fly" << 'MOCK'
-#!/bin/bash
-echo "fly $*" >> "${MOCK_LOG}"
-
-# Simulate fly CLI failures when MOCK_ERROR_SCENARIO is set
-case "${MOCK_ERROR_SCENARIO:-}" in
-    ssh_tunnel_failure)
-        case "$1" in
-            ssh)
-                echo "Error: failed to connect to tunnel: context deadline exceeded" >&2
-                exit 1 ;;
-            machine)
-                case "${2:-}" in
-                    exec)
-                        echo "Error: machine not reachable" >&2
-                        exit 1 ;;
-                esac ;;
-        esac ;;
-    ssh_timeout)
-        case "$1" in
-            ssh|machine)
-                # Never return "ok" — simulates SSH not becoming ready
-                exit 1 ;;
-        esac ;;
-esac
-
-case "$1" in
-    auth)
-        case "${2:-}" in
-            token) echo "test-token-fly" ;;
-        esac ;;
-    machine)
-        case "${2:-}" in
-            exec)
-                # fly machine exec MACHINE_ID --app APP -- bash -c CMD
-                all_args="$*"
-                if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
-                    echo "ok"
-                fi
-                ;;
-            list) echo "[]" ;;
-        esac ;;
-    ssh)
-        # fly ssh console -a APP -C "bash -c CMD" --quiet
-        # Extract the command and simulate its output
-        all_args="$*"
-        # Check for "echo ok" (may be escaped as echo\ ok by printf %q)
-        if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
-            echo "ok"
-        fi
-        ;;
-    version)
-        echo "fly v0.3.50" ;;
-esac
-exit 0
-MOCK
-    chmod +x "${TEST_DIR}/fly"
-    cp "${TEST_DIR}/fly" "${TEST_DIR}/flyctl"
-}
-
-setup_fake_home() {
-    local fake_home="${TEST_DIR}/fakehome"
-    mkdir -p "${fake_home}/.ssh"
-    mkdir -p "${fake_home}/.config/spawn"
-    mkdir -p "${fake_home}/.claude"
-    mkdir -p "${fake_home}/.local/bin"
-    # Create dummy SSH key pair
-    echo "-----BEGIN OPENSSH PRIVATE KEY-----" > "${fake_home}/.ssh/id_ed25519"
-    echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${fake_home}/.ssh/id_ed25519.pub"
-    chmod 600 "${fake_home}/.ssh/id_ed25519"
-    echo "${fake_home}"
-}
-
-# ============================================================
-# Cloud API helpers (for use by test infra tests)
-# ============================================================
-
-# Strip API base URL to get just the endpoint path.
-# Used by test/test-infra-sync.test.ts to validate cloud coverage.
-_strip_simple_base() {
-    local url="$1" pattern="$2"
-    echo "$url" | sed "s|${pattern}||"
-}
-
-_strip_pattern_base() {
-    local url="$1" sed_pattern="$2"
-    echo "$url" | sed "$sed_pattern"
-}
-
-
-_strip_api_base() {
-    local url="$1"
-    local endpoint="$url"
-
-    case "$url" in
-        https://api.hetzner.cloud/v1*)
-            endpoint="${url#https://api.hetzner.cloud/v1}" ;;
-        https://api.digitalocean.com/v2*)
-            endpoint="${url#https://api.digitalocean.com/v2}" ;;
-        https://api.machines.dev/v1*)
-            endpoint="${url#https://api.machines.dev/v1}" ;;
-    esac
-
-    echo "$endpoint" | sed 's|?.*||'
-}
-
-# Get required POST body fields for a cloud endpoint.
-_get_required_fields() {
-    local cloud="$1"
-    local endpoint="$2"
-
-    case "${cloud}:${endpoint}" in
-        hetzner:/servers) echo "name server_type image location" ;;
-        digitalocean:/droplets) echo "name region size image" ;;
-        fly:*/machines) echo "name region config" ;;
-    esac
-}
-
-# Validate POST request body contains required fields for major clouds.
-# Used during mock script execution to catch invalid API requests.
-# Args: cloud method endpoint body
-_validate_body() {
-    local cloud="$1"
-    local method="$2"
-    local endpoint="$3"
-    local body="$4"
-
-    [[ "$method" != "POST" ]] && return 0
-    [[ -z "$body" ]] && return 0
-
-    local required_fields
-    required_fields=$(_get_required_fields "$cloud" "$endpoint")
-    [[ -z "$required_fields" ]] && return 0
-
-    # Check if body is valid JSON
-    if ! printf '%s' "$body" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then
-        echo "BODY_ERROR:invalid_json:${endpoint}" >> "${MOCK_LOG}"
-        return 1
-    fi
-
-    # Check for required fields
-    for field in $required_fields; do
-        if ! printf '%s' "$body" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then
-            echo "BODY_ERROR:missing_field:${field}:${endpoint}" >> "${MOCK_LOG}"
-        fi
-    done
-
-    return 0
-}
-
-# ============================================================
-# Cloud-specific env var setup
-# ============================================================
-
-setup_env_for_cloud() {
-    local cloud="$1"
-
-    # Universal env vars
-    export OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000"
-    export INSTANCE_STATUS_POLL_DELAY=0
-
-    # Cloud-specific env vars from fixture data
-    local env_file="${FIXTURES_DIR}/${cloud}/_env.sh"
-    if [[ -f "$env_file" ]]; then
-        # shellcheck disable=SC1090
-        source "$env_file"
-    fi
-}
-
-# ============================================================
-# Discovery
-# ============================================================
-
-discover_clouds() {
-    for fixture_dir in "${FIXTURES_DIR}"/*/; do
-        local cloud
-        cloud=$(basename "$fixture_dir")
-        if [[ -f "${fixture_dir}/_metadata.json" ]]; then
-            echo "$cloud"
-        fi
-    done
-}
-
-discover_agents() {
-    local cloud="$1"
-    for script in "${REPO_ROOT}/${cloud}"/*.sh; do
-        [[ -f "$script" ]] || continue
-        local agent
-        agent=$(basename "$script" .sh)
-        echo "$agent"
-    done
-}
-
-# ============================================================
-# Test runner helpers
-# ============================================================
-
-# Wait for a process to complete or timeout
-# Args: pid timeout_seconds exit_code_var
-_wait_with_timeout() {
-    local pid="$1"
-    local timeout="$2"
-    local exit_code_var="$3"
-    local i=0
-
-    while kill -0 "$pid" 2>/dev/null; do
-        if [[ "$i" -ge "$timeout" ]]; then
-            kill -9 "$pid" 2>/dev/null
-            wait "$pid" 2>/dev/null || true
-            eval "${exit_code_var}=124"
-            return
-        fi
-        sleep 1
-        i=$((i + 1))
-    done
-    wait "$pid" 2>/dev/null || eval "${exit_code_var}=$?"
-}
-
-# Run a script in a sandboxed environment with a 4-second timeout.
-# Sets exit_code variable in the caller's scope.
-# Args: script_path cloud state_file fake_home
-run_script_with_timeout() {
-    local script_path="$1"
-    local cloud="$2"
-    local state_file="$3"
-    local fake_home="$4"
-
-    exit_code=0
-
-    MOCK_LOG="${MOCK_LOG}" \
-    MOCK_FIXTURE_DIR="${FIXTURES_DIR}/${cloud}" \
-    MOCK_CLOUD="${cloud}" \
-    MOCK_REPO_ROOT="${REPO_ROOT}" \
-    MOCK_VALIDATE_BODY="${MOCK_VALIDATE_BODY:-}" \
-    MOCK_TRACK_STATE="${MOCK_TRACK_STATE:-}" \
-    MOCK_STATE_FILE="${state_file}" \
-    MOCK_ERROR_SCENARIO="${MOCK_ERROR_SCENARIO:-}" \
-    PATH="${TEST_DIR}:${PATH}" \
-    HOME="${fake_home}" \
-        bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
-    local pid=$!
-    _wait_with_timeout "$pid" 4 "exit_code"
-}
-
-# Print last 20 lines of output on script failure.
-# Args: exit_code
-show_failure_output() {
-    local exit_code="$1"
-    if [[ "${exit_code}" -ne 0 ]]; then
-        printf '%b\n' "    ${RED}--- output (last 20 lines) ---${NC}"
-        tail -20 "${TEST_DIR}/output.log" 2>/dev/null | while IFS= read -r line; do
-            printf '    %s\n' "$line"
-        done
-        printf '%b\n' "    ${RED}--- end output ---${NC}"
-    fi
-}
-
-# Assert that the script failed when an error scenario was injected.
-# Returns 0 (with result recorded) if an error scenario is active, 1 otherwise.
-# Args: exit_code cloud agent
-assert_error_scenario() {
-    local exit_code="$1"
-    local cloud="$2"
-    local agent="$3"
-
-    [[ -n "${MOCK_ERROR_SCENARIO:-}" ]] || return 1
-
-    if [[ "${exit_code}" -ne 0 ]]; then
-        printf '%b\n' "    ${GREEN}✓${NC} fails on ${MOCK_ERROR_SCENARIO} (exit code ${exit_code})"
-        PASSED=$((PASSED + 1))
-        record_test_result "${cloud}" "${agent}" "pass"
-    else
-        printf '%b\n' "    ${RED}✗${NC} should fail on ${MOCK_ERROR_SCENARIO} but exited 0"
-        FAILED=$((FAILED + 1))
-        record_test_result "${cloud}" "${agent}" "fail"
-    fi
-    return 0
-}
-
-# Assert that the expected cloud-specific API calls were made.
-# Reads assertions from test/fixtures/{cloud}/_api_assertions.sh if present,
-# otherwise falls back to a generic API call check.
-# Args: cloud
-assert_cloud_api_calls() {
-    local cloud="$1"
-    local assertions_file="${FIXTURES_DIR}/${cloud}/_api_assertions.sh"
-    if [[ -f "$assertions_file" ]]; then
-        # shellcheck disable=SC1090
-        source "$assertions_file"
-    else
-        assert_log_contains "curl (GET|POST) https://" "makes API calls"
-    fi
-}
-
-# Write pass/fail result to RESULTS_FILE if set.
-# Args: cloud agent result [reason]
-# Result format: cloud/agent:pass or cloud/agent:fail[:reason]
-# Reasons: exit_code, missing_api_call, missing_env, no_fixture
-record_test_result() {
-    local cloud="$1"
-    local agent="$2"
-    local result="$3"
-    local reason="${4:-}"
-    [[ -n "${RESULTS_FILE:-}" ]] || return 0
-    if [[ -n "$reason" ]]; then
-        printf '%s/%s:%s:%s\n' "${cloud}" "${agent}" "${result}" "${reason}" >> "${RESULTS_FILE}"
-    else
-        printf '%s/%s:%s\n' "${cloud}" "${agent}" "${result}" >> "${RESULTS_FILE}"
-    fi
-}
-
-# ============================================================
-# Test runner
-# ============================================================
-
-# Run an assertion and store the number of new failures in _ASSERT_DELTA.
-# Usage: _tracked_assert <assertion_command> [args...]
-# The assertion runs in the current shell so PASSED/FAILED propagate.
-_tracked_assert() {
-    local _before=$FAILED
-    "$@"
-    _ASSERT_DELTA=$(( FAILED - _before ))
-}
-
-# Determine the primary failure reason from tracked failure counts.
-# Args: has_no_fixture exit_fails api_fails ssh_fails env_fails
-# Prints the reason string to stdout.
-_categorize_failure() {
-    local has_no_fixture="$1" exit_fails="$2" api_fails="$3" ssh_fails="$4" env_fails="$5"
-    if [[ "$has_no_fixture" -gt 0 ]]; then echo "no_fixture"
-    elif [[ "$exit_fails" -gt 0 ]]; then echo "exit_code"
-    elif [[ "$api_fails" -gt 0 ]]; then echo "missing_api_call"
-    elif [[ "$env_fails" -gt 0 ]]; then echo "missing_env"
-    elif [[ "$ssh_fails" -gt 0 ]]; then echo "missing_ssh"
-    else echo "unknown"
-    fi
-}
-
-# Run assertions for a script and track which categories failed.
-# Outputs: _exit_failed, _api_failed, _ssh_failed, _env_failed (as 0/1)
-_run_assertions_and_track() {
-    local exit_code="$1" cloud="$2"
-    local _ASSERT_DELTA=0
-
-    _tracked_assert assert_exit_code "${exit_code}" 0 "exits successfully"
-    _exit_failed=$_ASSERT_DELTA
-
-    _tracked_assert assert_cloud_api_calls "$cloud"
-    _api_failed=$_ASSERT_DELTA
-
-    _tracked_assert assert_log_contains "ssh " "uses SSH"
-    _ssh_failed=$_ASSERT_DELTA
-
-    _tracked_assert assert_env_injected "OPENROUTER_API_KEY"
-    _env_failed=$_ASSERT_DELTA
-
-    if [[ "${MOCK_VALIDATE_BODY:-}" == "1" ]]; then
-        assert_no_body_errors
-    fi
-    if [[ "${MOCK_TRACK_STATE:-}" == "1" ]]; then
-        assert_server_cleaned_up "$3"
-    fi
-}
-
-# Check for missing fixtures in the mock log.
-_has_missing_fixture() {
-    grep -q "NO_FIXTURE:" "${MOCK_LOG}" 2>/dev/null && echo 1 || echo 0
-}
-
-# Setup test environment for a script
-# Args: cloud state_file
-_setup_test_env() {
-    local cloud="$1"
-    local state_file="$2"
-    : > "${MOCK_LOG}"
-    setup_env_for_cloud "$cloud"
-    : > "${state_file}"
-}
-
-# Record test result based on failure categories
-# Args: cloud agent pre_failed
-_record_categorized_result() {
-    local cloud="$1"
-    local agent="$2"
-    local pre_failed="$3"
-
-    local pre_fail=$((FAILED - pre_failed))
-    if [[ "$pre_fail" -gt 0 ]]; then
-        local _has_no_fixture
-        _has_no_fixture=$(_has_missing_fixture)
-        local _reason
-        _reason=$(_categorize_failure "$_has_no_fixture" "$_exit_failed" "$_api_failed" "$_ssh_failed" "$_env_failed")
-        record_test_result "${cloud}" "${agent}" "fail" "${_reason}"
-    else
-        record_test_result "${cloud}" "${agent}" "pass"
-    fi
-}
-
-# Run per-agent install assertions.
-# Sources shared assertions file and optional per-cloud overrides.
-_run_agent_assertions() {
-    local cloud="$1"
-    local agent="$2"
-    local shared_file="${FIXTURES_DIR}/_shared_agent_assertions.sh"
-    local cloud_file="${FIXTURES_DIR}/${cloud}/_agent_assertions.sh"
-
-    if [[ -f "$shared_file" ]]; then
-        # shellcheck disable=SC1090
-        source "$shared_file"
-        # Apply per-cloud overrides if they exist
-        if [[ -f "$cloud_file" ]]; then
-            # shellcheck disable=SC1090
-            source "$cloud_file"
-        fi
-        assert_agent_install "$cloud" "$agent"
-    fi
-}
-
-run_test() {
-    local cloud="$1"
-    local agent="$2"
-    local script_path="${REPO_ROOT}/${cloud}/${agent}.sh"
-
-    if [[ ! -f "$script_path" ]]; then
-        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud}/${agent}.sh — file not found"
-        SKIPPED=$((SKIPPED + 1))
-        return 0
-    fi
-
-    # TypeScript-based providers (fly, digitalocean) use bun with native fetch()
-    # for API calls. Fixture-based mock tests (which intercept curl) don't apply.
-    # Coverage comes from: bun test + failure mode tests.
-    if [[ ("$cloud" == "fly" || "$cloud" == "digitalocean") && -z "${MOCK_ERROR_SCENARIO:-}" ]]; then
-        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud}/${agent}.sh — TS provider (tested via bun test + failure modes)"
-        SKIPPED=$((SKIPPED + 1))
-        return 0
-    fi
-
-    printf '%b\n' "  ${CYAN}test${NC} ${cloud}/${agent}.sh"
-
-    local _pre_failed="${FAILED}"
-    local fake_home
-    fake_home=$(setup_fake_home)
-    local state_file="${TEST_DIR}/state_${cloud}_${agent}.log"
-
-    _setup_test_env "$cloud" "$state_file"
-
-    local exit_code
-    run_script_with_timeout "${script_path}" "${cloud}" "${state_file}" "${fake_home}"
-    show_failure_output "${exit_code}"
-
-    # Error scenario mode: just check that script failed, then return
-    if assert_error_scenario "${exit_code}" "${cloud}" "${agent}"; then
-        printf '\n'
-        return 0
-    fi
-
-    # Normal mode: run standard assertions and track failures per category
-    _run_assertions_and_track "${exit_code}" "${cloud}" "${state_file}"
-
-    # Per-agent install assertions
-    _run_agent_assertions "$cloud" "$agent"
-
-    _record_categorized_result "${cloud}" "${agent}" "$_pre_failed"
-
-    printf '\n'
-}
-
-# ============================================================
-# Fly.io failure-mode tests (#1579)
-# ============================================================
-
-# Run a single Fly.io agent script under a specific error scenario.
-# Expects MOCK_ERROR_SCENARIO to trigger error injection in mock curl and/or fly CLI.
-# Args: scenario agent
-_run_fly_error_test() {
-    local scenario="$1"
-    local agent="$2"
-    local script_path="${REPO_ROOT}/fly/${agent}.sh"
-
-    [[ -f "$script_path" ]] || return 0
-
-    printf '%b\n' "  ${CYAN}test${NC} fly/${agent}.sh [${scenario}]"
-
-    local fake_home
-    fake_home=$(setup_fake_home)
-    local state_file="${TEST_DIR}/state_fly_${agent}_${scenario}.log"
-
-    : > "${MOCK_LOG}"
-    setup_env_for_cloud "fly"
-    : > "${state_file}"
-
-    # Re-create fly mock so it picks up the error scenario
-    _create_fly_mock
-
-    local exit_code=0
-    MOCK_LOG="${MOCK_LOG}" \
-    MOCK_FIXTURE_DIR="${FIXTURES_DIR}/fly" \
-    MOCK_CLOUD="fly" \
-    MOCK_REPO_ROOT="${REPO_ROOT}" \
-    MOCK_ERROR_SCENARIO="${scenario}" \
-    MOCK_STATE_FILE="${state_file}" \
-    SPAWN_NON_INTERACTIVE=1 \
-    PATH="${TEST_DIR}:${PATH}" \
-    HOME="${fake_home}" \
-        bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
-    local pid=$!
-    _wait_with_timeout "$pid" 10 "exit_code"
-
-    if [[ "${exit_code}" -ne 0 ]]; then
-        printf '%b\n' "    ${GREEN}✓${NC} fails on ${scenario} (exit code ${exit_code})"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "    ${RED}✗${NC} should fail on ${scenario} but exited 0"
-        FAILED=$((FAILED + 1))
-    fi
-    printf '\n'
-}
-
-# Run all Fly.io failure-mode tests using a single representative agent.
-# Uses claude.sh as the test subject since it exercises the full provisioning path.
-run_fly_failure_tests() {
-    printf '%b\n' "${CYAN}━━━ fly failure modes (#1579) ━━━${NC}"
-
-    local test_agent="claude"
-    if [[ ! -f "${REPO_ROOT}/fly/${test_agent}.sh" ]]; then
-        printf '%b\n' "  ${YELLOW}skip${NC} fly/${test_agent}.sh not found"
-        SKIPPED=$((SKIPPED + 1))
-        return 0
-    fi
-
-    # 1. API rate limit (429) — mock curl returns 429 for cloud API calls
-    _run_fly_error_test "rate_limit" "$test_agent"
-
-    # 2. Machine creation failure (422) — mock curl returns 422 for POST to */machines*
-    _run_fly_error_test "create_failure" "$test_agent"
-
-    # 3. SSH tunnel failure — fly ssh console / fly machine exec exit non-zero
-    _run_fly_error_test "ssh_tunnel_failure" "$test_agent"
-
-    # 4. SSH timeout — fly CLI never returns "ok", _fly_wait_for_ssh exhausts retries
-    _run_fly_error_test "ssh_timeout" "$test_agent"
-}
-
-# ============================================================
-# Main
-# ============================================================
-
-printf '%b\n' "${CYAN}===============================${NC}"
-printf '%b\n' "${CYAN} Spawn Mock Test Suite${NC}"
-printf '%b\n' "${CYAN}===============================${NC}"
-printf '\n'
-
-# Parse arguments
-FILTER_CLOUD="${1:-}"
-FILTER_AGENT="${2:-}"
-
-# Set up mocks once
-setup_mock_curl
-setup_mock_ssh
-setup_mock_agents
-
-# Discover what to test
-if [[ -n "$FILTER_CLOUD" ]]; then
-    CLOUDS="$FILTER_CLOUD"
-    if [[ ! -d "${FIXTURES_DIR}/${FILTER_CLOUD}" ]]; then
-        printf '%b\n' "${RED}No fixtures for cloud: ${FILTER_CLOUD}${NC}"
-        printf "Available: %s\n" "$(discover_clouds | tr '\n' ' ')"
-        exit 1
-    fi
-else
-    CLOUDS=$(discover_clouds)
-fi
-
-if [[ -z "$CLOUDS" ]]; then
-    printf '%b\n' "${YELLOW}No fixture data found in ${FIXTURES_DIR}/${NC}"
-    printf "Run test/record.sh first to record API fixtures.\n"
-    exit 0
-fi
-
-printf "Fixtures dir: %s\n" "${FIXTURES_DIR}"
-printf "Clouds:       %s\n" "$CLOUDS"
-printf '\n'
-
-# --- Run clouds in parallel ---
-CLOUD_RESULTS_DIR="${TEST_DIR}/cloud_results"
-mkdir -p "${CLOUD_RESULTS_DIR}"
-
-CLOUD_PIDS=""
-for cloud in $CLOUDS; do
-    (
-        # Isolated per-cloud state
-        CLOUD_TEST_DIR=$(mktemp -d)
-        MOCK_LOG="${CLOUD_TEST_DIR}/mock_calls.log"
-        CLOUD_PASSED=0
-        CLOUD_FAILED=0
-        CLOUD_SKIPPED=0
-
-        # Re-create mocks in per-cloud temp dir (curl/ssh/agents need own copies)
-        TEST_DIR="${CLOUD_TEST_DIR}"
-        setup_mock_curl
-        setup_mock_ssh
-        setup_mock_agents
-
-        # Override counters used by assertions (they modify PASSED/FAILED/SKIPPED)
-        PASSED=0
-        FAILED=0
-        SKIPPED=0
-
-        printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}"
-
-        if [[ -n "$FILTER_AGENT" ]]; then
-            AGENTS="$FILTER_AGENT"
-        else
-            AGENTS=$(discover_agents "$cloud")
-        fi
-
-        if [[ -z "$AGENTS" ]]; then
-            printf '%b\n' "  ${YELLOW}skip${NC} no agent scripts found in ${cloud}/"
-            SKIPPED=$((SKIPPED + 1))
-        else
-            for agent in $AGENTS; do
-                run_test "$cloud" "$agent"
-            done
-        fi
-        printf '\n'
-
-        # Write counts to results file for aggregation
-        printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/${cloud}.counts"
-
-        rm -rf "${CLOUD_TEST_DIR}"
-    ) > "${CLOUD_RESULTS_DIR}/${cloud}.log" 2>&1 &
-    CLOUD_PIDS="${CLOUD_PIDS} $!"
-done
-
-# Wait for all clouds to finish
-for pid in $CLOUD_PIDS; do
-    wait "$pid" 2>/dev/null || true
-done
-
-# Print output from each cloud (in discovery order for consistent output)
-for cloud in $CLOUDS; do
-    if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.log" ]]; then
-        cat "${CLOUD_RESULTS_DIR}/${cloud}.log"
-    fi
-done
-
-# Aggregate results from all clouds
-for cloud in $CLOUDS; do
-    if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.counts" ]]; then
-        read -r p f s < "${CLOUD_RESULTS_DIR}/${cloud}.counts"
-        PASSED=$((PASSED + p))
-        FAILED=$((FAILED + f))
-        SKIPPED=$((SKIPPED + s))
-    fi
-done
-
-# --- Fly.io failure-mode tests (#1579) ---
-# Run only when fly fixtures exist and no agent filter is active
-if [[ -d "${FIXTURES_DIR}/fly" && ( -z "$FILTER_CLOUD" || "$FILTER_CLOUD" == "fly" ) && -z "$FILTER_AGENT" ]]; then
-    (
-        FLY_FAIL_TEST_DIR=$(mktemp -d)
-        TEST_DIR="${FLY_FAIL_TEST_DIR}"
-        MOCK_LOG="${FLY_FAIL_TEST_DIR}/mock_calls.log"
-        PASSED=0
-        FAILED=0
-        SKIPPED=0
-
-        setup_mock_curl
-        setup_mock_ssh
-        setup_mock_agents
-
-        run_fly_failure_tests
-
-        printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/fly_failures.counts"
-        rm -rf "${FLY_FAIL_TEST_DIR}"
-    ) > "${CLOUD_RESULTS_DIR}/fly_failures.log" 2>&1
-
-    if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.log" ]]; then
-        cat "${CLOUD_RESULTS_DIR}/fly_failures.log"
-    fi
-    if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.counts" ]]; then
-        read -r p f s < "${CLOUD_RESULTS_DIR}/fly_failures.counts"
-        PASSED=$((PASSED + p))
-        FAILED=$((FAILED + f))
-        SKIPPED=$((SKIPPED + s))
-    fi
-fi
-
-# --- Summary ---
-printf '%b\n' "${CYAN}===============================${NC}"
-TOTAL=$((PASSED + FAILED + SKIPPED))
-printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${TOTAL} total"
-printf '%b\n' "${CYAN}===============================${NC}"
-
-if [[ "$FAILED" -gt 0 ]]; then
-    exit 1
-fi
-exit 0
diff --git a/test/qa-dry-run.sh b/test/qa-dry-run.sh
deleted file mode 100644
index c83e17dc..00000000
--- a/test/qa-dry-run.sh
+++ /dev/null
@@ -1,671 +0,0 @@
-#!/bin/bash
-set -eo pipefail
-
-# QA Dry Run — Local-only version of qa-cycle.sh
-# Does everything qa-cycle.sh does but with NO git/gh commands.
-# All output goes to .docs/qa-dry-run-latest/.
-#
-# Usage:
-#   bash test/qa-dry-run.sh
-
-REPO_ROOT="$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse --show-toplevel 2>/dev/null)"
-if [[ -z "${REPO_ROOT}" ]]; then
-    REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-fi
-cd "${REPO_ROOT}"
-
-DRY_RUN_DIR="${REPO_ROOT}/.docs/qa-dry-run-latest"
-LOG_FILE="${DRY_RUN_DIR}/qa-dry-run.log"
-WOULD_COMMIT_LOG="${DRY_RUN_DIR}/would-commit.txt"
-CYCLE_TIMEOUT=2700  # 45 min total
-AGENT_TIMEOUT=600   # 10 min per agent
-
-# Results files
-RESULTS_PHASE2="${DRY_RUN_DIR}/results-phase2.txt"
-RESULTS_PHASE4="${DRY_RUN_DIR}/results-phase4.txt"
-
-# Clean and create output directory
-rm -rf "${DRY_RUN_DIR}"
-mkdir -p "${DRY_RUN_DIR}"
-: > "${LOG_FILE}"
-: > "${WOULD_COMMIT_LOG}"
-
-log() {
-    printf '[%s] [qa-dry] %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "$*" | tee -a "${LOG_FILE}"
-}
-
-cleanup() {
-    local exit_code=$?
-    log "=== QA Dry Run Done (exit_code=${exit_code}) ==="
-}
-trap cleanup EXIT SIGTERM SIGINT
-
-# macOS-compatible timeout: run command with a time limit
-# Usage: run_with_timeout SECONDS COMMAND [ARGS...]
-run_with_timeout() {
-    local secs="$1"; shift
-    "$@" &
-    local pid=$!
-    local elapsed=0
-    while kill -0 "$pid" 2>/dev/null; do
-        if [[ "$elapsed" -ge "$secs" ]]; then
-            kill "$pid" 2>/dev/null
-            sleep 1
-            kill -9 "$pid" 2>/dev/null || true
-            wait "$pid" 2>/dev/null || true
-            return 124
-        fi
-        sleep 1
-        elapsed=$((elapsed + 1))
-    done
-    wait "$pid" 2>/dev/null
-}
-
-log "=== Starting QA Dry Run ==="
-log "Repo root: ${REPO_ROOT}"
-log "Output dir: ${DRY_RUN_DIR}"
-log "Timeout: ${CYCLE_TIMEOUT}s"
-
-# Track start time for total cycle timeout
-CYCLE_START=$(date +%s)
-
-check_timeout() {
-    local now elapsed
-    now=$(date +%s)
-    elapsed=$((now - CYCLE_START))
-    if [[ "$elapsed" -ge "$CYCLE_TIMEOUT" ]]; then
-        log "TIMEOUT: Cycle exceeded ${CYCLE_TIMEOUT}s, stopping"
-        return 1
-    fi
-    return 0
-}
-
-would_commit() {
-    printf '[would-run] %s\n' "$*" >> "${WOULD_COMMIT_LOG}"
-}
-
-# ============================================================
-# Phase 0: Key Preflight
-# ============================================================
-log "=== Phase 0: Key Preflight ==="
-
-if [[ -f "${REPO_ROOT}/shared/key-request.sh" ]]; then
-    source "${REPO_ROOT}/shared/key-request.sh"
-    load_cloud_keys_from_config
-    if [[ -n "${MISSING_KEY_PROVIDERS:-}" ]]; then
-        log "Phase 0: Missing keys for: ${MISSING_KEY_PROVIDERS}"
-        if [[ -n "${KEY_SERVER_URL:-}" ]]; then
-            log "Phase 0: Requesting keys via key-server (will trigger email notification)"
-            request_missing_cloud_keys
-        else
-            log "Phase 0: KEY_SERVER_URL not set — skipping email notification"
-            log "Phase 0: Set KEY_SERVER_URL and KEY_SERVER_SECRET to enable email flow"
-        fi
-    else
-        log "Phase 0: All cloud keys available"
-    fi
-else
-    log "Phase 0: shared/key-request.sh not found, skipping key preflight"
-fi
-
-check_timeout || exit 0
-
-# ============================================================
-# Phase 0.5: macOS Compatibility Lint
-# ============================================================
-log "=== Phase 0.5: macOS Compatibility Lint ==="
-
-LINT_OUTPUT="${DRY_RUN_DIR}/macos-compat-output.txt"
-LINT_ERRORS=0
-LINT_WARNS=0
-
-if [[ -f "${REPO_ROOT}/test/macos-compat.sh" ]]; then
-    LINT_EXIT=0
-    bash "${REPO_ROOT}/test/macos-compat.sh" > "${LINT_OUTPUT}" 2>&1 || LINT_EXIT=$?
-
-    if [[ -f "${LINT_OUTPUT}" ]]; then
-        LINT_ERRORS=$(grep -c "^error " "${LINT_OUTPUT}" 2>/dev/null || true)
-        LINT_WARNS=$(grep -c "^warn " "${LINT_OUTPUT}" 2>/dev/null || true)
-    fi
-
-    if [[ "${LINT_EXIT}" -eq 0 ]]; then
-        log "Phase 0.5: macOS compat lint passed (${LINT_WARNS} warning(s))"
-    else
-        log "Phase 0.5: macOS compat lint found ${LINT_ERRORS} error(s), ${LINT_WARNS} warning(s)"
-        log "Phase 0.5: Continuing (lint is advisory for now)"
-    fi
-else
-    log "Phase 0.5: test/macos-compat.sh not found, skipping"
-fi
-
-check_timeout || exit 0
-
-# ============================================================
-# Phase 1: Record fixtures
-# ============================================================
-log "=== Phase 1: Record fixtures ==="
-
-RECORD_OUTPUT="${DRY_RUN_DIR}/record-output.txt"
-
-RECORD_EXIT=0
-bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" | tee "${RECORD_OUTPUT}" || RECORD_EXIT=$?
-
-if [[ "${RECORD_EXIT}" -eq 0 ]]; then
-    log "Phase 1: All fixtures recorded successfully"
-else
-    log "Phase 1: Some fixture recordings failed, identifying failed clouds..."
-
-    # Parse which clouds had failures
-    RECORD_FAILED_CLOUDS=""
-    current_cloud=""
-    while IFS= read -r line; do
-        clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g')
-        case "$clean" in
-            *"Recording "*" ━━━"*)
-                current_cloud=$(printf '%s' "$clean" | sed 's/.*Recording //; s/ ━━━.*//')
-                ;;
-            *"fail "*)
-                if [[ -n "${current_cloud}" ]]; then
-                    case " ${RECORD_FAILED_CLOUDS} " in
-                        *" ${current_cloud} "*) ;;
-                        *) RECORD_FAILED_CLOUDS="${RECORD_FAILED_CLOUDS} ${current_cloud}" ;;
-                    esac
-                fi
-                ;;
-        esac
-    done < "${RECORD_OUTPUT}"
-    RECORD_FAILED_CLOUDS=$(printf '%s' "${RECORD_FAILED_CLOUDS}" | sed 's/^ //')
-
-    if [[ -n "${RECORD_FAILED_CLOUDS}" ]]; then
-        log "Phase 1: Failed clouds: ${RECORD_FAILED_CLOUDS}"
-
-        # Separate auth failures from code failures
-        NON_AUTH_FAILED_CLOUDS=""
-        STALE_KEY_PROVIDERS=""
-        AUTH_PATTERN="401|403|[Uu]nauthorized|[Ff]orbidden|[Ii]nvalid.*(token|key|api)|[Aa]ccess.denied|[Aa]uthentication.failed"
-
-        for cloud in ${RECORD_FAILED_CLOUDS}; do
-            error_output=$(sed -n "/Recording ${cloud}/,/Recording \|━━━ \|Results:/p" "${RECORD_OUTPUT}" | head -50 || true)
-
-            if printf '%s' "${error_output}" | grep -iqE "${AUTH_PATTERN}"; then
-                log "Phase 1: Auth failure for ${cloud} — key is stale"
-                if type invalidate_cloud_key &>/dev/null; then
-                    invalidate_cloud_key "${cloud}"
-                    while IFS= read -r var_name; do
-                        [[ -n "${var_name}" ]] && unset "${var_name}" 2>/dev/null || true
-                    done <<< "$(get_cloud_env_vars "${cloud}")"
-                fi
-                STALE_KEY_PROVIDERS="${STALE_KEY_PROVIDERS} ${cloud}"
-            else
-                NON_AUTH_FAILED_CLOUDS="${NON_AUTH_FAILED_CLOUDS} ${cloud}"
-            fi
-        done
-        NON_AUTH_FAILED_CLOUDS=$(printf '%s' "${NON_AUTH_FAILED_CLOUDS}" | sed 's/^ //')
-        STALE_KEY_PROVIDERS=$(printf '%s' "${STALE_KEY_PROVIDERS}" | sed 's/^ //')
-
-        if [[ -n "${STALE_KEY_PROVIDERS}" ]]; then
-            log "Phase 1: Stale keys detected: ${STALE_KEY_PROVIDERS}"
-        fi
-
-        # Spawn all record-fix agents in parallel (one per non-auth failed cloud)
-        RECORD_FIX_PIDS=""
-        RECORD_FIX_WORK_DIRS=""
-
-        for cloud in ${NON_AUTH_FAILED_CLOUDS}; do
-            check_timeout || break
-
-            error_lines=$(sed -n "/Recording ${cloud}/,/Recording \|━━━ \|Results:/p" "${RECORD_OUTPUT}" | head -30 || true)
-
-            log "Phase 1: Spawning agent to debug ${cloud} recording failure (async)"
-            would_commit "git worktree add ... -b qa/record-fix-${cloud} origin/main"
-
-            WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX")
-            cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true
-
-            ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD=""
-
-            (
-                cd "${WORK_DIR}"
-                run_with_timeout "${AGENT_TIMEOUT}" claude -p "The API fixture recording for cloud '${cloud}' is failing in test/record.sh.
-
-Error output:
-${error_lines}
-
-Investigate and fix. Only modify ${cloud}/lib/common.sh and test/record.sh." \
-                    2>&1 | tee -a "${DRY_RUN_DIR}/agent-record-fix-${cloud}.log" || true
-
-                # Copy changed files directly back to repo
-                changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true)
-                if [[ -n "$changed" ]]; then
-                    printf '%s\n' "$changed" | while IFS= read -r f; do
-                        [[ -f "$f" ]] || continue
-                        mkdir -p "${REPO_ROOT}/$(dirname "$f")"
-                        cp "$f" "${REPO_ROOT}/$f"
-                    done
-                fi
-            ) &
-            RECORD_FIX_PIDS="${RECORD_FIX_PIDS} $!"
-            RECORD_FIX_WORK_DIRS="${RECORD_FIX_WORK_DIRS} ${WORK_DIR}"
-        done
-
-        # Wait for all record-fix agents
-        if [[ -n "${RECORD_FIX_PIDS}" ]]; then
-            log "Phase 1: Waiting for record-fix agents..."
-            for pid in ${RECORD_FIX_PIDS}; do
-                wait "$pid" 2>/dev/null || true
-            done
-        fi
-
-        # Log what changed and clean up work dirs
-        for cloud in ${NON_AUTH_FAILED_CLOUDS}; do
-            would_commit "git add ${cloud}/lib/common.sh test/record.sh && git commit && git push && gh pr create && gh pr merge"
-        done
-        for work_dir in ${RECORD_FIX_WORK_DIRS}; do
-            rm -rf "${work_dir}"
-        done
-
-        # Re-record after fixes
-        log "Phase 1: Re-recording after fixes..."
-        bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" || {
-            log "Phase 1: Re-record still has failures — continuing with existing fixtures"
-        }
-    fi
-
-    # Request fresh keys for stale providers (triggers email via key-server)
-    if [[ -n "${STALE_KEY_PROVIDERS:-}" ]] && type request_missing_cloud_keys &>/dev/null; then
-        MISSING_KEY_PROVIDERS="${STALE_KEY_PROVIDERS}"
-        log "Phase 1: Requesting fresh keys for stale providers: ${STALE_KEY_PROVIDERS}"
-        request_missing_cloud_keys
-        log "Phase 1: Key request sent (email notification will be sent if KEY_SERVER_URL is configured)"
-    fi
-fi
-
-rm -f "${RECORD_OUTPUT}"
-check_timeout || exit 0
-
-# ============================================================
-# Phase 2: Run mock tests
-# ============================================================
-log "=== Phase 2: Run mock tests ==="
-
-rm -f "${RESULTS_PHASE2}"
-MOCK_EXIT=0
-RESULTS_FILE="${RESULTS_PHASE2}" bash test/mock.sh 2>&1 | tee -a "${LOG_FILE}" || MOCK_EXIT=$?
-
-PASS_COUNT=0
-FAIL_COUNT=0
-if [[ -f "${RESULTS_PHASE2}" ]]; then
-    TOTAL_TESTS=$(wc -l < "${RESULTS_PHASE2}" | tr -d ' ')
-    PASS_COUNT=$(grep -c ':pass$' "${RESULTS_PHASE2}" || true)
-    FAIL_COUNT=$(grep -c ':fail$' "${RESULTS_PHASE2}" || true)
-    log "Phase 2: ${PASS_COUNT} passed, ${FAIL_COUNT} failed, ${TOTAL_TESTS} total"
-else
-    log "Phase 2: No results file generated"
-fi
-
-check_timeout || exit 0
-
-# ============================================================
-# Phase 3: Fix mock failures
-# ============================================================
-log "=== Phase 3: Fix failures ==="
-
-if [[ "${FAIL_COUNT:-0}" -eq 0 ]]; then
-    log "Phase 3: No failures to fix"
-else
-    FAILURES=""
-    FAILED_CLOUDS=""
-    if [[ -f "${RESULTS_PHASE2}" ]]; then
-        FAILURES=$(grep ':fail$' "${RESULTS_PHASE2}" | sed 's/:fail$//' || true)
-        FAILED_CLOUDS=$(grep ':fail$' "${RESULTS_PHASE2}" | sed 's/:fail$//' | cut -d/ -f1 | sort -u || true)
-    fi
-
-    # Spawn all fix agents in parallel (one per failed cloud)
-    FIX_PIDS=""
-    FIX_WORK_DIRS=""
-    FIX_ORIG_HEADS=""
-
-    for cloud in $FAILED_CLOUDS; do
-        check_timeout || break
-
-        cloud_failures=$(printf '%s\n' $FAILURES | grep "^${cloud}/" || true)
-        failing_scripts=""
-        error_context=""
-        for combo in $cloud_failures; do
-            agent=$(printf '%s' "$combo" | cut -d/ -f2)
-            script_path="${cloud}/${agent}.sh"
-            failing_scripts="${failing_scripts} ${script_path}"
-            if [[ -f "${LOG_FILE}" ]]; then
-                ctx=$(grep -A 10 "test ${script_path}" "${LOG_FILE}" | tail -10 || true)
-                if [[ -n "$ctx" ]]; then
-                    error_context="${error_context}
---- ${script_path} ---
-${ctx}
-"
-                fi
-            fi
-        done
-        failing_scripts=$(printf '%s' "$failing_scripts" | sed 's/^ //')
-
-        fail_count=$(printf '%s\n' $cloud_failures | wc -l | tr -d ' ')
-        log "Phase 3: Spawning agent to fix ${fail_count} failing script(s) in ${cloud} (async)"
-        would_commit "git worktree add ... -b qa/fix-${cloud} origin/main"
-
-        WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX")
-        cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true
-
-        ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD=""
-
-        # Run agent in background subshell — log to per-cloud file to avoid interleaving
-        (
-            cd "${WORK_DIR}"
-            run_with_timeout 900 claude -p "Fix the failing mock tests for cloud '${cloud}' in the spawn codebase.
-
-Failing scripts: ${failing_scripts}
-
-Error context from test run:
-${error_context}
-
-Investigate the root cause and fix. You can modify: scripts in ${cloud}/, test/fixtures/${cloud}/, and test/mock.sh." \
-                2>&1 | tee -a "${DRY_RUN_DIR}/agent-fix-${cloud}.log" || true
-
-            # Copy changed files directly back to repo
-            changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true)
-            if [[ -n "$changed" ]]; then
-                printf '%s\n' "$changed" | while IFS= read -r f; do
-                    [[ -f "$f" ]] || continue
-                    mkdir -p "${REPO_ROOT}/$(dirname "$f")"
-                    cp "$f" "${REPO_ROOT}/$f"
-                done
-            fi
-        ) &
-        FIX_PIDS="${FIX_PIDS} $!"
-        FIX_WORK_DIRS="${FIX_WORK_DIRS} ${WORK_DIR}"
-    done
-
-    # Wait for all agents to finish
-    if [[ -n "${FIX_PIDS}" ]]; then
-        log "Phase 3: Waiting for ${FAILED_CLOUDS} fix agents..."
-        for pid in ${FIX_PIDS}; do
-            wait "$pid" 2>/dev/null || true
-        done
-    fi
-
-    # Log and clean up work dirs
-    for cloud in $FAILED_CLOUDS; do
-        would_commit "git add ${cloud}/ test/fixtures/${cloud}/ test/mock.sh && git commit && git push && gh pr create && gh pr merge"
-    done
-    for work_dir in ${FIX_WORK_DIRS}; do
-        rm -rf "${work_dir}"
-    done
-
-    log "Phase 3: Fix agents complete"
-fi
-
-check_timeout || exit 0
-
-# ============================================================
-# Phase 4: Re-run mock tests + update README (no commit)
-# ============================================================
-log "=== Phase 4: Re-run tests and update README ==="
-
-rm -f "${RESULTS_PHASE4}"
-RESULTS_FILE="${RESULTS_PHASE4}" bash test/mock.sh 2>&1 | tee -a "${LOG_FILE}" || true
-
-RETRY_PASS=0
-RETRY_FAIL=0
-if [[ -f "${RESULTS_PHASE4}" ]]; then
-    RETRY_PASS=$(grep -c ':pass$' "${RESULTS_PHASE4}" || true)
-    RETRY_FAIL=$(grep -c ':fail$' "${RESULTS_PHASE4}" || true)
-    log "Phase 4: ${RETRY_PASS} passed, ${RETRY_FAIL} failed"
-
-    if [[ -f "test/update-readme.py" ]]; then
-        python3 test/update-readme.py "${RESULTS_PHASE4}" 2>&1 | tee -a "${LOG_FILE}" || true
-
-        if [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then
-            would_commit "git checkout -b qa/readme-update-\$(date +%s) && git add README.md && git commit && git push && gh pr create && gh pr merge"
-            # Show the diff but don't commit
-            git diff README.md > "${DRY_RUN_DIR}/diff-readme.patch" 2>/dev/null || true
-            # Revert README changes (dry run) - use git restore to avoid checkout pollution
-            git restore README.md 2>/dev/null || git checkout -- README.md 2>/dev/null || true
-            log "Phase 4: README diff saved to diff-readme.patch (not committed)"
-        else
-            log "Phase 4: No README changes needed"
-        fi
-    fi
-else
-    log "Phase 4: No results file generated"
-fi
-
-# ============================================================
-# Phase 5: E2E Tests (optional — requires cloud credentials)
-# ============================================================
-E2E_PASS=0
-E2E_FAIL=0
-E2E_SKIPPED=0
-
-if [[ -f "${REPO_ROOT}/test/e2e.sh" ]]; then
-    # Check if any cloud credentials are available
-    HAS_CLOUD_CREDS=0
-    for _var in FLY_API_TOKEN HCLOUD_TOKEN DO_API_TOKEN DAYTONA_API_KEY OVH_APP_KEY; do
-        if [[ -n "${!_var:-}" ]]; then
-            HAS_CLOUD_CREDS=1
-            break
-        fi
-    done
-
-    if [[ "${HAS_CLOUD_CREDS}" -eq 1 ]] && [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
-        log "=== Phase 5: E2E Tests ==="
-
-        E2E_OUTPUT="${DRY_RUN_DIR}/e2e-output.txt"
-        E2E_EXIT=0
-        # Stream live so failures are visible immediately, not after the full run
-        E2E_AUTO_FIX=0 bash "${REPO_ROOT}/test/e2e.sh" \
-            2>&1 | tee "${E2E_OUTPUT}" | tee -a "${LOG_FILE}" || E2E_EXIT=$?
-
-        # Count only cloud/agent lines (contain "/"), not pre-flight checkmarks
-        if [[ -f "${E2E_OUTPUT}" ]]; then
-            E2E_PASS=$(grep '✓' "${E2E_OUTPUT}" | grep -c '/' 2>/dev/null || true)
-            E2E_FAIL=$(grep '✗' "${E2E_OUTPUT}" | grep -c '/' 2>/dev/null || true)
-        fi
-
-        if [[ "${E2E_EXIT}" -eq 0 ]]; then
-            log "Phase 5: E2E tests passed (${E2E_PASS} passed)"
-        else
-            log "Phase 5: E2E tests had ${E2E_FAIL} failure(s), ${E2E_PASS} passed"
-        fi
-
-        # --- Phase 5b: Fix E2E failures (dry run — copies, no git/PR) ---
-        if [[ "${E2E_FAIL}" -gt 0 ]] && [[ -f "${E2E_OUTPUT}" ]]; then
-            check_timeout || true
-
-            log "=== Phase 5b: Fix E2E failures ==="
-
-            # Parse failing combos — only lines with "/" (skip pre-flight)
-            E2E_FAILED_COMBOS=""
-            E2E_FAILED_AGENTS=""
-            while IFS= read -r line; do
-                clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g')
-                case "$clean" in
-                    *"✗ "*"/"*)
-                        combo=$(printf '%s' "$clean" | sed 's/.*✗ //; s/  .*//')
-                        reason=$(printf '%s' "$clean" | sed 's/.*(\(.*\))/\1/' || true)
-                        cloud="${combo%%/*}"
-                        agent="${combo##*/}"
-                        E2E_FAILED_COMBOS="${E2E_FAILED_COMBOS} ${cloud}/${agent}|${reason}"
-                        case " ${E2E_FAILED_AGENTS} " in
-                            *" ${agent} "*) ;;
-                            *) E2E_FAILED_AGENTS="${E2E_FAILED_AGENTS} ${agent}" ;;
-                        esac
-                        ;;
-                esac
-            done < "${E2E_OUTPUT}"
-            E2E_FAILED_COMBOS=$(printf '%s' "${E2E_FAILED_COMBOS}" | sed 's/^ //')
-            E2E_FAILED_AGENTS=$(printf '%s' "${E2E_FAILED_AGENTS}" | sed 's/^ //')
-
-            if [[ -n "${E2E_FAILED_AGENTS}" ]]; then
-                log "Phase 5b: Failing agents: ${E2E_FAILED_AGENTS}"
-
-                E2E_FIX_PIDS=""
-                E2E_FIX_WORK_DIRS=""
-
-                for agent in ${E2E_FAILED_AGENTS}; do
-                    check_timeout || break
-
-                    # Collect failing clouds and reasons
-                    failing_clouds=""
-                    failure_summary=""
-                    for entry in ${E2E_FAILED_COMBOS}; do
-                        entry_combo="${entry%%|*}"
-                        entry_reason="${entry#*|}"
-                        entry_agent="${entry_combo##*/}"
-                        entry_cloud="${entry_combo%%/*}"
-                        if [[ "${entry_agent}" == "${agent}" ]]; then
-                            failing_clouds="${failing_clouds} ${entry_cloud}"
-                            failure_summary="${failure_summary}  - ${entry_cloud}/${agent}.sh: ${entry_reason}\n"
-                        fi
-                    done
-                    failing_clouds=$(printf '%s' "${failing_clouds}" | sed 's/^ //')
-
-                    # Find ALL clouds with this agent
-                    all_clouds_for_agent=""
-                    other_cloud_scripts=""
-                    for cloud_dir in "${REPO_ROOT}"/*/; do
-                        cname=$(basename "${cloud_dir}")
-                        [[ "${cname}" == "shared" || "${cname}" == "cli" || "${cname}" == "test" || "${cname}" == ".claude" || "${cname}" == ".github" || "${cname}" == ".docs" ]] && continue
-                        if [[ -f "${cloud_dir}${agent}.sh" ]]; then
-                            all_clouds_for_agent="${all_clouds_for_agent} ${cname}"
-                            case " ${failing_clouds} " in
-                                *" ${cname} "*) ;;
-                                *) other_cloud_scripts="${other_cloud_scripts} ${cname}/${agent}.sh" ;;
-                            esac
-                        fi
-                    done
-                    all_clouds_for_agent=$(printf '%s' "${all_clouds_for_agent}" | sed 's/^ //')
-                    other_cloud_scripts=$(printf '%s' "${other_cloud_scripts}" | sed 's/^ //')
-
-                    fail_count=0
-                    for _c in ${failing_clouds}; do fail_count=$((fail_count + 1)); done
-
-                    log "Phase 5b: Spawning agent for '${agent}' (${fail_count} failure(s), propagating to: ${other_cloud_scripts:-none})"
-                    would_commit "git worktree add ... -b qa/e2e-fix-${agent} origin/main"
-
-                    WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX")
-                    cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true
-                    ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD=""
-
-                    modify_files=""
-                    for _c in ${all_clouds_for_agent}; do
-                        modify_files="${modify_files} ${_c}/${agent}.sh ${_c}/lib/common.sh"
-                    done
-
-                    (
-                        cd "${WORK_DIR}"
-                        run_with_timeout "${AGENT_TIMEOUT}" claude -p "Fix E2E test failures for agent **${agent}** and propagate fixes to all clouds.
-
-## E2E Failure Summary
-$(printf '%b' "${failure_summary}")
-## All clouds with ${agent}
-${all_clouds_for_agent}
-
-## What happened
-These scripts were run with real cloud servers (SPAWN_NON_INTERACTIVE=1, no TTY).
-A script passes if it prints 'setup completed successfully' before the session step.
-Common E2E failure causes:
-- Install command fails (wrong package name, missing repo, network timeout)
-- Config file written to wrong path or with wrong permissions
-- Env var injection missing (OPENROUTER_API_KEY, ANTHROPIC_BASE_URL, etc.)
-- Script hangs on an interactive prompt that wasn't guarded by SPAWN_NON_INTERACTIVE
-- SSH wait/connect fails (firewall, wrong port, key not imported)
-
-## Fix Process
-
-1. **Read each failing script** and its cloud's lib/common.sh.
-2. **Compare with working clouds.** Diff the scripts — look for divergence.
-3. **Fix the root cause** in each failing script.
-4. **Propagate to other clouds:** ${other_cloud_scripts:-"(no other clouds)"}
-   Only propagate if the same problematic pattern exists.
-5. **Validate:** Run bash -n on every modified .sh file.
-
-You may modify:${modify_files}" \
-                            2>&1 | tee -a "${DRY_RUN_DIR}/agent-e2e-fix-${agent}.log" || true
-
-                        # Copy changed files back to repo
-                        changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true)
-                        uncommitted=$(git status --porcelain 2>/dev/null | sed 's/^.. //' || true)
-                        for f in ${changed} ${uncommitted}; do
-                            [[ -f "$f" ]] || continue
-                            mkdir -p "${REPO_ROOT}/$(dirname "$f")"
-                            cp "$f" "${REPO_ROOT}/$f"
-                        done
-                    ) &
-                    E2E_FIX_PIDS="${E2E_FIX_PIDS} $!"
-                    E2E_FIX_WORK_DIRS="${E2E_FIX_WORK_DIRS} ${WORK_DIR}"
-                done
-
-                # Wait for all E2E fix agents
-                for pid in ${E2E_FIX_PIDS}; do
-                    wait "$pid" 2>/dev/null || true
-                done
-
-                for agent in ${E2E_FAILED_AGENTS}; do
-                    would_commit "git add */\${agent}.sh && git commit && git push && gh pr create && gh pr merge"
-                done
-                for work_dir in ${E2E_FIX_WORK_DIRS}; do
-                    rm -rf "${work_dir}"
-                done
-
-                log "Phase 5b: E2E fix agents complete"
-            fi
-        fi
-    else
-        E2E_SKIPPED=1
-        log "=== Phase 5: E2E Tests (Skipped — no cloud credentials or OPENROUTER_API_KEY) ==="
-    fi
-else
-    E2E_SKIPPED=1
-    log "=== Phase 5: E2E Tests (Skipped — test/e2e.sh not found) ==="
-fi
-
-check_timeout || exit 0
-
-# ============================================================
-# Summary
-# ============================================================
-log ""
-log "=== QA Dry Run Summary ==="
-log "Phase 0.5 (lint):    ${LINT_ERRORS:-0} error(s) / ${LINT_WARNS:-0} warning(s)"
-log "Phase 2 (initial):   ${PASS_COUNT:-0} pass / ${FAIL_COUNT:-0} fail"
-log "Phase 4 (after fix): ${RETRY_PASS:-0} pass / ${RETRY_FAIL:-0} fail"
-if [[ "${FAIL_COUNT:-0}" -gt 0 ]] && [[ "${RETRY_FAIL:-0}" -lt "${FAIL_COUNT:-0}" ]]; then
-    FIXED=$(( ${FAIL_COUNT:-0} - ${RETRY_FAIL:-0} ))
-    log "Fixed ${FIXED} failure(s) this cycle"
-fi
-if [[ "${E2E_SKIPPED:-0}" -eq 0 ]]; then
-    log "Phase 5 (e2e):       ${E2E_PASS:-0} pass / ${E2E_FAIL:-0} fail"
-else
-    log "Phase 5 (e2e):       skipped"
-fi
-log ""
-log "Output files:"
-log "  ${DRY_RUN_DIR}/qa-dry-run.log          — full log"
-log "  ${DRY_RUN_DIR}/macos-compat-output.txt  — macOS compat lint output"
-log "  ${DRY_RUN_DIR}/results-phase2.txt       — mock test results (initial)"
-log "  ${DRY_RUN_DIR}/results-phase4.txt       — mock test results (after fixes)"
-log "  ${DRY_RUN_DIR}/would-commit.txt         — git/gh commands that would have run"
-
-# List patch files
-PATCH_COUNT=0
-for pf in "${DRY_RUN_DIR}"/diff-*.patch; do
-    [[ -f "$pf" ]] || continue
-    if [[ -s "$pf" ]]; then
-        log "  $(basename "$pf")  — $(wc -l < "$pf" | tr -d ' ') lines"
-        PATCH_COUNT=$((PATCH_COUNT + 1))
-    fi
-done
-if [[ "$PATCH_COUNT" -eq 0 ]]; then
-    log "  (no patches generated)"
-fi
-
-log ""
-log "=== QA Dry Run Complete ==="
diff --git a/test/record.sh b/test/record.sh
deleted file mode 100644
index 5416cefc..00000000
--- a/test/record.sh
+++ /dev/null
@@ -1,959 +0,0 @@
-#!/bin/bash
-# Record real API responses from cloud providers as test fixtures
-#
-# Hits safe GET-only endpoints using each cloud's existing API wrapper,
-# validates the response, and saves it as pretty-printed JSON.
-#
-# Usage:
-#   bash test/record.sh hetzner          # Record one cloud
-#   bash test/record.sh hetzner digitalocean  # Record multiple
-#   bash test/record.sh all              # All clouds with available credentials
-#   bash test/record.sh --list           # Show recordable clouds + credential status
-
-set -eo pipefail
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-FIXTURES_DIR="${REPO_ROOT}/test/fixtures"
-
-# Sandbox: Use test-specific config directory if TEST_CONFIG_DIR is set
-# This prevents polluting production ~/.config/spawn/ during tests
-if [[ -n "${TEST_CONFIG_DIR:-}" ]]; then
-    export HOME="${TEST_CONFIG_DIR}"
-fi
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-NC='\033[0m'
-
-# Counters
-RECORDED=0
-SKIPPED=0
-ERRORS=0
-
-# Whether to prompt for missing credentials (set by 'all' vs 'allsaved')
-PROMPT_FOR_CREDS=true
-
-# All clouds with REST APIs that we can record from
-ALL_RECORDABLE_CLOUDS="hetzner digitalocean fly"
-
-# --- Endpoint registry ---
-# Declare endpoints as string literal for each cloud
-# Format: "fixture_name:endpoint" (one per line, indented)
-_ENDPOINTS_hetzner="
-server_types:/server_types?per_page=50
-locations:/locations
-ssh_keys:/ssh_keys
-servers:/servers
-"
-
-_ENDPOINTS_digitalocean="
-account:/account
-ssh_keys:/account/keys
-droplets:/droplets
-sizes:/sizes
-regions:/regions
-"
-
-
-_ENDPOINTS_fly="
-apps:/apps?org_slug=personal
-"
-
-get_endpoints() {
-    local cloud="$1"
-    local var_name="_ENDPOINTS_${cloud}"
-    if [[ -n "${!var_name:-}" ]]; then
-        printf '%s\n' "${!var_name}" | grep -v '^$'
-    fi
-}
-
-# --- Multi-credential cloud specs ---
-# Returns "config_key:env_var" pairs (one per line) for multi-credential clouds.
-# Single-credential clouds return nothing (handled by get_auth_env_var).
-_get_multi_cred_spec() {
-    local cloud="$1"
-    case "$cloud" in
-    esac
-}
-
-# Load multiple fields from a JSON config file and export as env vars.
-# Arguments: CONFIG_FILE SPEC...  (each spec is "config_key:ENV_VAR")
-_load_multi_config_from_file() {
-    local config_file="$1"; shift
-    [[ -f "$config_file" ]] || return 1
-
-    local config_keys=() env_vars=()
-    local spec
-    for spec in "$@"; do
-        config_keys+=("${spec%%:*}")
-        env_vars+=("${spec#*:}")
-    done
-
-    local vals
-    vals=$(python3 -c "
-import json, sys
-try:
-    d = json.load(open(sys.argv[1]))
-    print('\t'.join(d.get(k, '') for k in sys.argv[2:]))
-except: pass
-" "$config_file" "${config_keys[@]}" 2>/dev/null) || return 1
-
-    [[ -n "${vals:-}" ]] || return 1
-
-    local IFS=$'\t'
-    local fields
-    read -ra fields <<< "$vals"
-
-    local i
-    for i in "${!env_vars[@]}"; do
-        if [[ -n "${fields[$i]:-}" ]]; then
-            # SECURITY: Validate env var name before export
-            if [[ ! "${env_vars[$i]}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
-                echo "SECURITY: Invalid env var name rejected: ${env_vars[$i]}" >&2
-                return 1
-            fi
-            export "${env_vars[$i]}=${fields[$i]}"
-        fi
-    done
-}
-
-# Save multiple env vars to a JSON config file.
-# Arguments: CONFIG_FILE SPEC...  (each spec is "config_key:ENV_VAR")
-_save_multi_config_to_file() {
-    local config_file="$1"; shift
-
-    local py_args=()
-    local py_keys=""
-    local idx=1
-    local spec
-    for spec in "$@"; do
-        local config_key="${spec%%:*}"
-        local env_var="${spec#*:}"
-        local val="${!env_var:-}"
-        py_args+=("$val")
-        py_keys="${py_keys}'${config_key}': sys.argv[${idx}], "
-        idx=$((idx + 1))
-    done
-
-    python3 -c "
-import json, sys
-print(json.dumps({${py_keys}}, indent=2))
-" "${py_args[@]}" > "$config_file"
-}
-
-# --- Auth env var check ---
-get_auth_env_var() {
-    local cloud="$1"
-    case "$cloud" in
-        hetzner)       printf "HCLOUD_TOKEN" ;;
-        digitalocean)  printf "DO_API_TOKEN" ;;
-        fly)           printf "FLY_API_TOKEN" ;;
-    esac
-}
-
-# Try loading token from ~/.config/spawn/{cloud}.json (same config the agent scripts use)
-# Load a single API token from JSON config and export it.
-# Arguments: ENV_VAR CONFIG_FILE
-_load_single_token_config() {
-    local env_var="$1"
-    local config_file="$2"
-
-    [[ -f "$config_file" ]] || return 0
-
-    # SECURITY: Validate env var name before export
-    if [[ ! "${env_var}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
-        echo "SECURITY: Invalid env var name rejected: ${env_var}" >&2
-        return 1
-    fi
-
-    local token
-    token=$(python3 -c "import json,sys; d=json.load(open(sys.argv[1])); print(d.get('api_key','') or d.get('token',''))" "$config_file" 2>/dev/null) || true
-    if [[ -n "${token:-}" ]]; then
-        export "${env_var}=${token}"
-    fi
-}
-
-try_load_config() {
-    local cloud="$1"
-    local env_var
-    env_var=$(get_auth_env_var "$cloud")
-
-    # Already set via env var — nothing to do
-    local current_val="${!env_var:-}"
-    if [[ -n "$current_val" ]]; then
-        return 0
-    fi
-
-    local config_file="$HOME/.config/spawn/${cloud}.json"
-
-    # Multi-credential clouds (OVH, etc.)
-    local specs
-    specs=$(_get_multi_cred_spec "$cloud")
-    if [[ -n "$specs" ]]; then
-        local spec_args=()
-        while IFS= read -r line; do
-            spec_args+=("$line")
-        done <<< "$specs"
-        _load_multi_config_from_file "$config_file" "${spec_args[@]}" || true
-        return 0
-    fi
-
-    # Standard single-token config
-    _load_single_token_config "$env_var" "$config_file"
-}
-
-has_credentials() {
-    local cloud="$1"
-
-    # Try loading from config file first
-    try_load_config "$cloud"
-
-    # Multi-credential clouds: check all env vars from spec
-    local specs
-    specs=$(_get_multi_cred_spec "$cloud")
-    if [[ -n "$specs" ]]; then
-        local line
-        while IFS= read -r line; do
-            local env_var="${line#*:}"
-            [[ -n "${!env_var:-}" ]] || return 1
-        done <<< "$specs"
-        return 0
-    fi
-
-    # Single-credential clouds
-    local env_var
-    env_var=$(get_auth_env_var "$cloud")
-    [[ -n "${!env_var:-}" ]]
-}
-
-# Save credentials to ~/.config/spawn/{cloud}.json for future use
-save_config() {
-    local cloud="$1"
-    local config_dir="$HOME/.config/spawn"
-    local config_file="${config_dir}/${cloud}.json"
-    mkdir -p "$config_dir"
-
-    # Multi-credential clouds
-    local specs
-    specs=$(_get_multi_cred_spec "$cloud")
-    if [[ -n "$specs" ]]; then
-        local spec_args=()
-        while IFS= read -r line; do
-            spec_args+=("$line")
-        done <<< "$specs"
-        _save_multi_config_to_file "$config_file" "${spec_args[@]}"
-    else
-        # Standard single-token config
-        local env_var
-        env_var=$(get_auth_env_var "$cloud")
-        local val="${!env_var:-}"
-        python3 -c "import json, sys; print(json.dumps({'api_key': sys.argv[1]}, indent=2))" "$val" > "$config_file"
-    fi
-    printf '%b\n' "  ${GREEN}saved${NC} → ${config_file}"
-}
-
-# Prompt user for missing credentials, export them, and save to config
-prompt_credentials() {
-    local cloud="$1"
-    local vars_needed=""
-    local val=""
-
-    # Multi-credential clouds: extract env var names from spec
-    local specs
-    specs=$(_get_multi_cred_spec "$cloud")
-    if [[ -n "$specs" ]]; then
-        local line
-        while IFS= read -r line; do
-            vars_needed="${vars_needed} ${line#*:}"
-        done <<< "$specs"
-    else
-        vars_needed=$(get_auth_env_var "$cloud")
-    fi
-
-    for var_name in $vars_needed; do
-        # SECURITY: Validate env var name before using in indirect expansion or export
-        if [[ ! "${var_name}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
-            echo "SECURITY: Invalid env var name rejected: ${var_name}" >&2
-            return 1
-        fi
-        local current="${!var_name:-}"
-        if [[ -n "$current" ]]; then
-            continue
-        fi
-        printf "  Enter %s (press Enter to skip %s): " "$var_name" "$cloud" >&2
-        read -r val
-        if [[ -z "$val" ]]; then
-            return 1
-        fi
-        export "${var_name}=${val}"
-    done
-
-    # Save so they don't have to enter again
-    save_config "$cloud"
-    return 0
-}
-
-# --- API call dispatcher ---
-# Each cloud sources its lib and calls its wrapper function
-call_api() {
-    local cloud="$1"
-    local endpoint="$2"
-    case "$cloud" in
-        hetzner)       hetzner_api GET "$endpoint" ;;
-        digitalocean)  do_api GET "$endpoint" ;;
-        fly)           curl -fsSL -H "Authorization: ${FLY_API_TOKEN}" "https://api.machines.dev/v1${endpoint}" ;;
-    esac
-}
-
-# --- Validation ---
-is_valid_json() {
-    python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null
-}
-
-has_api_error() {
-    local cloud="$1"
-    local response="$2"
-
-    _RESPONSE="$response" _CLOUD="$cloud" python3 << 'VALIDATION_EOF' 2>/dev/null
-import json, sys, os
-d = json.loads(os.environ['_RESPONSE'])
-cloud = os.environ['_CLOUD']
-
-# Helper: data keys that indicate success responses (not errors)
-success_keys = {'servers','images','ssh_keys','flavors','sizes','regions','count','results','id','name','slug','status','ipv4'}
-
-error_checks = {
-    'hetzner': lambda d: d.get('error') and isinstance(d.get('error'), dict),
-    'digitalocean': lambda d: 'id' in d and isinstance(d.get('id'), str) and 'message' in d,
-    'fly': lambda d: 'error' in d and isinstance(d.get('error'), str),
-}
-
-if cloud in error_checks:
-    sys.exit(0 if error_checks[cloud](d) else 1)
-else:
-    sys.exit(1)
-VALIDATION_EOF
-}
-
-# --- Pretty print JSON ---
-pretty_json() {
-    python3 -c "import json,sys; print(json.dumps(json.loads(sys.stdin.read()), indent=2, sort_keys=True))"
-}
-
-# --- Live create+delete cycle (captures real POST/DELETE responses) ---
-# Creates a server with a timestamped name, records the response, then deletes it.
-# These functions access cloud_recorded, cloud_errors, metadata_entries from the
-# calling scope (record_cloud) via bash dynamic scoping — no namerefs needed.
-_record_live_cycle() {
-    local cloud="$1"
-    local fixture_dir="$2"
-
-    # Source cloud lib so API wrappers are available (dynamic scoping
-    # lets _live_* functions update caller's counters/metadata)
-    source "${REPO_ROOT}/${cloud}/lib/common.sh" 2>/dev/null || true
-
-    case "$cloud" in
-        hetzner)       _live_hetzner "$fixture_dir" ;;
-        digitalocean)  _live_digitalocean "$fixture_dir" ;;
-        fly)           _live_fly "$fixture_dir" ;;
-        *)  return 0 ;;  # No live cycle for this cloud yet
-    esac
-}
-
-# Validate response is not empty
-_validate_response_not_empty() {
-    local fixture_name="$1"
-    local response="$2"
-    if [[ -z "$response" ]]; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — empty response"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-    return 0
-}
-
-# Validate response is valid JSON
-_validate_response_json() {
-    local fixture_name="$1"
-    local response="$2"
-    if ! echo "$response" | is_valid_json; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — invalid JSON"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-    return 0
-}
-
-# Validate response is not an API error
-_validate_response_no_error() {
-    local fixture_name="$1"
-    local response="$2"
-    if has_api_error "$cloud" "$response"; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — API error response"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-    return 0
-}
-
-# Record fixture metadata entry
-_record_fixture_metadata() {
-    local fixture_name="$1"
-    local endpoint="$2"
-    local ts
-    ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-    metadata_entries="${metadata_entries}    \"${fixture_name}\": {\"endpoint\": \"${endpoint}\", \"type\": \"live\", \"recorded_at\": \"${ts}\"},
-"
-}
-
-# Save a live fixture and update the caller's counters/metadata
-_save_live_fixture() {
-    local fixture_dir="$1"
-    local fixture_name="$2"
-    local endpoint="$3"
-    local response="$4"
-
-    _validate_response_not_empty "$fixture_name" "$response" || return 1
-    _validate_response_json "$fixture_name" "$response" || return 1
-    _validate_response_no_error "$fixture_name" "$response" || return 1
-
-    echo "$response" | pretty_json > "${fixture_dir}/${fixture_name}.json"
-    printf '%b\n' "  ${GREEN}  ok${NC} ${fixture_name} (live)"
-
-    _record_fixture_metadata "$fixture_name" "$endpoint"
-    cloud_recorded=$((cloud_recorded + 1))
-    return 0
-}
-
-# Generic live create+delete cycle for any cloud provider.
-# Calls a per-cloud builder function that prints the API body to stdout,
-# then runs the shared create -> save -> extract-id -> delete -> save flow.
-#
-# Usage: _live_create_delete_cycle FIXTURE_DIR API_FUNC CREATE_ENDPOINT \
-#          DELETE_ENDPOINT_TEMPLATE ID_PY_EXPR BUILDER_FUNC \
-#          [DELETE_DELAY] [EMPTY_DELETE_FALLBACK]
-#
-# Arguments:
-#   FIXTURE_DIR              - Directory for fixture JSON files
-#   API_FUNC                 - Cloud API function (e.g., "hetzner_api")
-#   CREATE_ENDPOINT          - POST endpoint (e.g., "/servers")
-#   DELETE_ENDPOINT_TEMPLATE - DELETE endpoint with {id} placeholder
-#   ID_PY_EXPR               - Python expression to extract ID from response (receives 'd')
-#   BUILDER_FUNC             - Function that prints the JSON create body to stdout
-#   DELETE_DELAY             - Seconds to sleep before delete (default: 3)
-#   EMPTY_DELETE_FALLBACK    - JSON to use when DELETE returns empty body (optional)
-# Extract resource ID from API response using Python expression
-# Sets global resource_id; returns 0 on success, 1 on failure
-_extract_resource_id() {
-    local response="$1" id_py_expr="$2"
-
-    resource_id=$(echo "$response" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); print(${id_py_expr})" 2>/dev/null) || true
-
-    if [[ -z "${resource_id:-}" ]]; then
-        printf '%b\n' "  ${RED}fail${NC} Could not extract resource ID from create response"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-
-    return 0
-}
-
-# Handle delete response, using fallback if empty
-_handle_delete_response() {
-    local response="$1" empty_delete_fallback="$2"
-
-    if [[ -z "$response" && -n "$empty_delete_fallback" ]]; then
-        echo "$empty_delete_fallback"
-    else
-        echo "$response"
-    fi
-}
-
-_live_create_delete_cycle() {
-    local fixture_dir="$1"
-    local api_func="$2"
-    local create_endpoint="$3"
-    local delete_endpoint_template="$4"
-    local id_py_expr="$5"
-    local builder_func="$6"
-    local delete_delay="${7:-3}"
-    local empty_delete_fallback="${8:-}"
-
-    local body
-    body=$("${builder_func}" "${fixture_dir}") || return 0
-
-    local create_response
-    create_response=$("${api_func}" POST "${create_endpoint}" "$body")
-
-    _save_live_fixture "$fixture_dir" "create_server" "POST ${create_endpoint}" "$create_response" || {
-        printf '%b\n' "  ${RED}fail${NC} Could not create — skipping delete fixture"
-        return 0
-    }
-
-    local resource_id
-    _extract_resource_id "$create_response" "$id_py_expr" || return 0
-
-    printf '%b\n' "  ${CYAN}live${NC} Created (ID: ${resource_id}). Deleting..."
-    sleep "$delete_delay"
-
-    local delete_endpoint="${delete_endpoint_template/\{id\}/${resource_id}}"
-    local delete_response
-    delete_response=$("${api_func}" DELETE "${delete_endpoint}")
-
-    delete_response=$(_handle_delete_response "$delete_response" "$empty_delete_fallback")
-
-    _save_live_fixture "$fixture_dir" "delete_server" "DELETE ${delete_endpoint_template}" "$delete_response"
-    printf '%b\n' "  ${CYAN}live${NC} Resource ${resource_id} deleted"
-}
-
-# --- Per-cloud body builders ---
-# Each prints the JSON create body to stdout and logs setup info to stderr.
-
-_live_hetzner_body() {
-    local fixture_dir="$1"
-    local name="spawn-record-$(date +%s)"
-    printf '%b\n' "  ${CYAN}live${NC} Creating test server '${name}' (cx23, nbg1)..." >&2
-
-    local ssh_keys_response
-    ssh_keys_response=$(hetzner_api GET "/ssh_keys")
-    local ssh_key_ids
-    ssh_key_ids=$(echo "$ssh_keys_response" | python3 -c "
-import json, sys
-d = json.loads(sys.stdin.read())
-print(json.dumps([k['id'] for k in d.get('ssh_keys', [])]))
-" 2>/dev/null) || ssh_key_ids="[]"
-
-    python3 -c "
-import json, sys
-print(json.dumps({
-    'name': sys.argv[1], 'server_type': 'cx23', 'location': 'nbg1',
-    'image': 'ubuntu-24.04', 'ssh_keys': json.loads(sys.argv[2]),
-    'start_after_create': True
-}))
-" "$name" "$ssh_key_ids"
-}
-
-_live_hetzner() {
-    _live_create_delete_cycle "$1" hetzner_api "/servers" "/servers/{id}" \
-        "d['server']['id']" _live_hetzner_body 2
-}
-
-_live_digitalocean_body() {
-    local fixture_dir="$1"
-    local name="spawn-record-$(date +%s)"
-    printf '%b\n' "  ${CYAN}live${NC} Creating test droplet '${name}' (s-1vcpu-512mb-10gb, nyc3)..." >&2
-
-    local ssh_keys_response
-    ssh_keys_response=$(do_api GET "/account/keys")
-    local ssh_key_ids
-    ssh_key_ids=$(echo "$ssh_keys_response" | python3 -c "
-import json, sys
-d = json.loads(sys.stdin.read())
-print(json.dumps([k['id'] for k in d.get('ssh_keys', [])]))
-" 2>/dev/null) || ssh_key_ids="[]"
-
-    python3 -c "
-import json, sys
-print(json.dumps({
-    'name': sys.argv[1], 'region': 'nyc3', 'size': 's-1vcpu-512mb-10gb',
-    'image': 'ubuntu-24-04-x64', 'ssh_keys': json.loads(sys.argv[2])
-}))
-" "$name" "$ssh_key_ids"
-}
-
-_live_digitalocean() {
-    _live_create_delete_cycle "$1" do_api "/droplets" "/droplets/{id}" \
-        "d['droplet']['id']" _live_digitalocean_body 3 \
-        '{"status":"deleted","http_code":204}'
-}
-
-_live_fly_body() {
-    local fixture_dir="$1"
-    local name="spawn-record-$(date +%s)"
-    printf '%b\n' "  ${CYAN}live${NC} Creating test app+machine '${name}' (shared-cpu-1x, iad)..." >&2
-
-    python3 -c "
-import json, sys
-print(json.dumps({
-    'name': sys.argv[1], 'region': 'iad',
-    'config': {
-        'image': 'ubuntu:24.04', 'auto_destroy': True,
-        'guest': {'cpu_kind': 'shared', 'cpus': 1, 'memory_mb': 256}
-    }
-}))
-" "$name"
-}
-
-_live_fly() {
-    local fixture_dir="$1"
-    local name="spawn-record-$(date +%s)"
-    local fly_api_base="https://api.machines.dev/v1"
-    local auth_header="Authorization: ${FLY_API_TOKEN}"
-
-    # Detect FlyV1 tokens (dashboard/deploy tokens use FlyV1 scheme, not Bearer)
-    if [[ "$FLY_API_TOKEN" == FlyV1\ * ]]; then
-        auth_header="Authorization: ${FLY_API_TOKEN}"
-    else
-        auth_header="Authorization: Bearer ${FLY_API_TOKEN}"
-    fi
-
-    # Create app
-    printf '%b\n' "  ${CYAN}live${NC} Creating Fly.io app '${name}'..."
-    local app_resp
-    app_resp=$(curl -fsSL -X POST "${fly_api_base}/apps" \
-        -H "${auth_header}" \
-        -H "Content-Type: application/json" \
-        -d "{\"app_name\":\"${name}\",\"org_slug\":\"personal\"}") || true
-
-    if [[ -n "$app_resp" ]]; then
-        _save_live_fixture "$fixture_dir" "create_app" "POST /apps" "$app_resp" || {
-            printf '%b\n' "  ${RED}fail${NC} App creation failed — skipping machine"
-            return 0
-        }
-    fi
-
-    # Create machine
-    local body
-    body=$(_live_fly_body "$fixture_dir")
-    local machine_resp
-    machine_resp=$(curl -fsSL -X POST "${fly_api_base}/apps/${name}/machines" \
-        -H "${auth_header}" \
-        -H "Content-Type: application/json" \
-        -d "$body") || true
-
-    _save_live_fixture "$fixture_dir" "create_server" "POST /apps/{name}/machines" "$machine_resp" || {
-        # Cleanup app even if machine failed
-        curl -fsSL -X DELETE "${fly_api_base}/apps/${name}" -H "${auth_header}" >/dev/null 2>&1 || true
-        return 0
-    }
-
-    local machine_id
-    machine_id=$(echo "$machine_resp" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])" 2>/dev/null) || true
-
-    # Cleanup: stop + delete machine, delete app
-    printf '%b\n' "  ${CYAN}live${NC} Cleaning up..."
-    if [[ -n "$machine_id" ]]; then
-        curl -fsSL -X POST "${fly_api_base}/apps/${name}/machines/${machine_id}/stop" \
-            -H "${auth_header}" >/dev/null 2>&1 || true
-        sleep 3
-        local del_resp
-        del_resp=$(curl -fsSL -X DELETE "${fly_api_base}/apps/${name}/machines/${machine_id}?force=true" \
-            -H "${auth_header}" 2>/dev/null) || true
-        if [[ -n "$del_resp" ]]; then
-            _save_live_fixture "$fixture_dir" "delete_server" "DELETE /apps/{name}/machines/{id}" "$del_resp" || true
-        fi
-    fi
-    curl -fsSL -X DELETE "${fly_api_base}/apps/${name}" -H "${auth_header}" >/dev/null 2>&1 || true
-    printf '%b\n' "  ${CYAN}live${NC} Cleanup complete"
-}
-
-# --- Record one cloud ---
-# Check credentials and prompt if needed; returns 1 to skip this cloud
-_record_ensure_credentials() {
-    local cloud="$1"
-    if has_credentials "$cloud"; then
-        return 0
-    fi
-
-    local env_var
-    env_var=$(get_auth_env_var "$cloud")
-    if [[ "$PROMPT_FOR_CREDS" == "true" ]]; then
-        printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}"
-        printf '%b\n' "  ${YELLOW}missing${NC} ${env_var}"
-        if prompt_credentials "$cloud"; then
-            return 0
-        fi
-        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud}"
-    else
-        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud} — ${env_var} not set"
-    fi
-    SKIPPED=$((SKIPPED + 1))
-    return 1
-}
-
-# Record a single endpoint fixture; increments cloud_recorded/cloud_errors
-# Usage: _record_endpoint CLOUD FIXTURE_DIR FIXTURE_NAME ENDPOINT
-# Validate API response and report errors
-# Returns 0 if valid, 1 if invalid/error
-_validate_endpoint_response() {
-    local cloud="$1" fixture_name="$2" response="$3"
-
-    if [[ -z "$response" ]]; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — empty response"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-
-    if ! echo "$response" | is_valid_json; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — invalid JSON"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-
-    if has_api_error "$cloud" "$response"; then
-        printf '%b\n' "  ${RED}fail${NC} ${fixture_name} — API error response"
-        cloud_errors=$((cloud_errors + 1))
-        return 1
-    fi
-
-    return 0
-}
-
-# Record endpoint response to fixture file and update metadata
-_save_endpoint_fixture() {
-    local fixture_dir="$1" fixture_name="$2" endpoint="$3" response="$4"
-
-    echo "$response" | pretty_json > "${fixture_dir}/${fixture_name}.json"
-    printf '%b\n' "  ${GREEN}  ok${NC} ${fixture_name} → fixtures/${cloud}/${fixture_name}.json"
-    cloud_recorded=$((cloud_recorded + 1))
-
-    local timestamp
-    timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-    metadata_entries="${metadata_entries}    \"${fixture_name}\": {\"endpoint\": \"${endpoint}\", \"recorded_at\": \"${timestamp}\"},
-"
-}
-
-_record_endpoint() {
-    local cloud="$1" fixture_dir="$2" fixture_name="$3" endpoint="$4"
-
-    # Call API in a subshell that sources the cloud lib
-    local tmp_response
-    tmp_response=$(mktemp /tmp/spawn-record-XXXXXX)
-
-    (
-        source "${REPO_ROOT}/${cloud}/lib/common.sh" 2>/dev/null
-        call_api "$cloud" "$endpoint" 2>/dev/null
-    ) > "$tmp_response" 2>/dev/null || true
-
-    local response
-    response=$(cat "$tmp_response")
-    rm -f "$tmp_response"
-
-    _validate_endpoint_response "$cloud" "$fixture_name" "$response" || return 0
-    _save_endpoint_fixture "$fixture_dir" "$fixture_name" "$endpoint" "$response"
-}
-
-# Write the _metadata.json file for a cloud's fixtures
-_record_write_metadata() {
-    local cloud="$1" fixture_dir="$2"
-
-    local meta_timestamp
-    meta_timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
-
-    # Remove trailing comma and newline from metadata_entries
-    metadata_entries=$(printf '%s' "$metadata_entries" | sed '$ s/,$//')
-
-    cat > "${fixture_dir}/_metadata.json" << METADATA_EOF
-{
-  "cloud": "${cloud}",
-  "recorded_at": "${meta_timestamp}",
-  "fixtures": {
-${metadata_entries}
-  }
-}
-METADATA_EOF
-}
-
-record_cloud() {
-    local cloud="$1"
-
-    _record_ensure_credentials "$cloud" || return 0
-
-    printf '%b\n' "${CYAN}━━━ Recording ${cloud} ━━━${NC}"
-
-    local fixture_dir="${FIXTURES_DIR}/${cloud}"
-    mkdir -p "$fixture_dir"
-
-    local endpoints
-    endpoints=$(get_endpoints "$cloud")
-
-    local cloud_recorded=0
-    local cloud_errors=0
-    local metadata_entries=""
-
-    while IFS=: read -r fixture_name endpoint; do
-        [[ -z "$fixture_name" ]] && continue
-        _record_endpoint "$cloud" "$fixture_dir" "$fixture_name" "$endpoint"
-    done <<< "$endpoints"
-
-    # Live create+delete cycle for write endpoint fixtures
-    _record_live_cycle "$cloud" "$fixture_dir" cloud_recorded cloud_errors metadata_entries || true
-
-    _record_write_metadata "$cloud" "$fixture_dir"
-
-    RECORDED=$((RECORDED + cloud_recorded))
-    ERRORS=$((ERRORS + cloud_errors))
-
-    if [[ "$cloud_errors" -eq 0 ]]; then
-        printf '%b\n' "  ${GREEN}done${NC} ${cloud_recorded} fixtures recorded"
-    else
-        printf '%b\n' "  ${YELLOW}done${NC} ${cloud_recorded} recorded, ${cloud_errors} failed"
-    fi
-    printf '\n'
-}
-
-# Format env var name for list display
-# Args: cloud
-_format_env_var_display() {
-    local cloud="$1"
-    local env_var
-    env_var=$(get_auth_env_var "$cloud")
-
-    # For multi-var clouds, show required env vars from spec
-    local specs
-    specs=$(_get_multi_cred_spec "$cloud")
-    if [[ -n "$specs" ]]; then
-        local first_var var_count
-        first_var=$(head -1 <<< "$specs")
-        first_var="${first_var#*:}"
-        var_count=$(wc -l <<< "$specs" | tr -d ' ')
-        if [[ "$var_count" -gt 1 ]]; then
-            env_var="${first_var} + $((var_count - 1)) more"
-        else
-            env_var="$first_var"
-        fi
-    fi
-    printf '%s' "$env_var"
-}
-
-# --- List mode ---
-list_clouds() {
-    printf '%b\n' "${CYAN}Recordable clouds:${NC}"
-    printf '\n'
-    printf "  %-15s %-30s %s\n" "CLOUD" "AUTH ENV VAR" "STATUS"
-    printf "  %-15s %-30s %s\n" "-----" "------------" "------"
-
-    local ready_count=0
-    for cloud in $ALL_RECORDABLE_CLOUDS; do
-        local env_var
-        env_var=$(_format_env_var_display "$cloud")
-        local status
-
-        if has_credentials "$cloud"; then
-            status=$(printf '%b' "${GREEN}ready${NC}")
-            ready_count=$((ready_count + 1))
-        else
-            status=$(printf '%b' "${RED}not set${NC}")
-        fi
-
-        printf "  %-15s %-30s %b\n" "$cloud" "$env_var" "$status"
-    done
-
-    printf '\n'
-    local total_count
-    total_count=$(echo "$ALL_RECORDABLE_CLOUDS" | wc -w | tr -d ' ')
-    printf '%b\n' "  ${ready_count}/${total_count} clouds have credentials set"
-    printf '\n'
-    printf "  CLI-based clouds (not recordable): sprite, gcp, daytona, aws, local\n"
-}
-
-# --- Main ---
-printf '%b\n' "${CYAN}===============================${NC}"
-printf '%b\n' "${CYAN} Spawn API Response Recorder${NC}"
-printf '%b\n' "${CYAN}===============================${NC}"
-printf '\n'
-
-if [[ $# -eq 0 ]]; then
-    printf "Usage:\n"
-    printf "  bash test/record.sh CLOUD [CLOUD...]   Record fixtures for specified clouds\n"
-    printf "  bash test/record.sh all                Record all clouds (prompts for missing keys)\n"
-    printf "  bash test/record.sh allsaved           Record clouds that already have keys saved\n"
-    printf "  bash test/record.sh --list             Show recordable clouds\n"
-    printf '\n'
-    exit 0
-fi
-
-case "$1" in
-    --list|-l)
-        list_clouds
-        exit 0
-        ;;
-    --help|-h)
-        printf "Usage:\n"
-        printf "  bash test/record.sh CLOUD [CLOUD...]   Record fixtures for specified clouds\n"
-        printf "  bash test/record.sh all                Record all clouds with credentials\n"
-        printf "  bash test/record.sh --list             Show recordable clouds\n"
-        printf '\n'
-        exit 0
-        ;;
-esac
-
-# Determine which clouds to record
-CLOUDS_TO_RECORD=""
-if [[ "$1" == "all" ]]; then
-    CLOUDS_TO_RECORD="$ALL_RECORDABLE_CLOUDS"
-elif [[ "$1" == "allsaved" ]]; then
-    PROMPT_FOR_CREDS=false
-    CLOUDS_TO_RECORD="$ALL_RECORDABLE_CLOUDS"
-else
-    CLOUDS_TO_RECORD="$*"
-fi
-
-# Validate cloud names
-for cloud in $CLOUDS_TO_RECORD; do
-    if ! echo "$ALL_RECORDABLE_CLOUDS" | grep -qw "$cloud"; then
-        printf '%b\n' "${RED}Unknown cloud: ${cloud}${NC}"
-        printf "Recordable clouds: %s\n" "$ALL_RECORDABLE_CLOUDS"
-        exit 1
-    fi
-done
-
-printf "Fixtures dir: %s\n" "$FIXTURES_DIR"
-printf "Clouds:       %s\n" "$CLOUDS_TO_RECORD"
-printf '\n'
-
-mkdir -p "$FIXTURES_DIR"
-
-# --- Run clouds in parallel ---
-RECORD_RESULTS_DIR=$(mktemp -d)
-RECORD_PIDS=""
-
-for cloud in $CLOUDS_TO_RECORD; do
-    (
-        # Reset counters for this cloud (subshell isolation)
-        RECORDED=0
-        SKIPPED=0
-        ERRORS=0
-        record_cloud "$cloud"
-        printf '%d %d %d\n' "$RECORDED" "$SKIPPED" "$ERRORS" > "${RECORD_RESULTS_DIR}/${cloud}.counts"
-    ) > "${RECORD_RESULTS_DIR}/${cloud}.log" 2>&1 &
-    RECORD_PIDS="${RECORD_PIDS} $!"
-done
-
-# Wait for all clouds to finish
-for pid in $RECORD_PIDS; do
-    wait "$pid" 2>/dev/null || true
-done
-
-# Print output from each cloud (in order)
-for cloud in $CLOUDS_TO_RECORD; do
-    if [[ -f "${RECORD_RESULTS_DIR}/${cloud}.log" ]]; then
-        cat "${RECORD_RESULTS_DIR}/${cloud}.log"
-    fi
-done
-
-# Aggregate results
-for cloud in $CLOUDS_TO_RECORD; do
-    if [[ -f "${RECORD_RESULTS_DIR}/${cloud}.counts" ]]; then
-        read -r r s e < "${RECORD_RESULTS_DIR}/${cloud}.counts"
-        RECORDED=$((RECORDED + r))
-        SKIPPED=$((SKIPPED + s))
-        ERRORS=$((ERRORS + e))
-    fi
-done
-
-rm -rf "${RECORD_RESULTS_DIR}"
-
-# --- Summary ---
-printf '%b\n' "${CYAN}===============================${NC}"
-TOTAL=$((RECORDED + SKIPPED + ERRORS))
-printf '%b\n' " Results: ${GREEN}${RECORDED} recorded${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${RED}${ERRORS} failed${NC}"
-printf '%b\n' "${CYAN}===============================${NC}"
-
-if [[ "$ERRORS" -gt 0 ]]; then
-    exit 1
-fi
-exit 0
diff --git a/test/run.sh b/test/run.sh
deleted file mode 100644
index a4a173d9..00000000
--- a/test/run.sh
+++ /dev/null
@@ -1,755 +0,0 @@
-#!/bin/bash
-# shellcheck disable=SC2154
-# Test harness for spawn scripts
-#
-# Tests the shared library and cloud provider scripts:
-#   1. shared/common.sh sources correctly (local + remote)
-#   2. All shared functions resolve
-#   3. Env var flow works (OPENROUTER_API_KEY)
-#   4. Temp files are created and cleaned up
-#   5. Each script reaches its final launch command
-#
-# Note: sprite/ cloud provider was converted to TypeScript (PR #1692).
-# The sprite/*.sh files are now thin shims that exec bun - shell-level
-# integration tests for sprite are covered by bun test instead.
-#
-# Usage:
-#   bash test/run.sh              # test all scripts
-#   bash test/run.sh claude       # test one script
-#   bash test/run.sh --remote     # test remote source (from GitHub)
-
-set -eo pipefail
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-TEST_DIR=$(mktemp -d)
-MOCK_LOG="${TEST_DIR}/sprite_calls.log"
-PASSED=0
-FAILED=0
-FILTER="${1:-}"
-REMOTE=false
-
-if [[ "${FILTER}" == "--remote" ]]; then
-    REMOTE=true
-    FILTER="${2:-}"
-fi
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
-
-cleanup() {
-    rm -rf "${TEST_DIR}"
-    # Clean up any /tmp pollution from mock sprite state files and spawn temp files
-    rm -f /tmp/sprite_mock_created_* /tmp/sprite_mock_created 2>/dev/null || true
-    rm -f /tmp/spawn_* 2>/dev/null || true
-}
-trap 'cleanup' EXIT
-
-# --- Mock sprite CLI ---
-# Records every call to a log, returns success for expected commands
-setup_mocks() {
-    export TEST_DIR
-    cat > "${TEST_DIR}/sprite" << 'MOCK'
-#!/bin/bash
-echo "sprite $*" >> "${MOCK_LOG}"
-
-case "$1" in
-    org)    exit 0 ;;                          # auth check passes
-    list)
-        echo "existing-sprite"
-        # After create, also return the test sprite name so provisioning poll succeeds
-        if [[ -f "${TEST_DIR}/sprite_mock_created" ]]; then
-            echo "${SPRITE_NAME:-}"
-        fi
-        exit 0
-        ;;
-    create)
-        touch "${TEST_DIR}/sprite_mock_created"
-        exit 0
-        ;;
-    exec)
-        # If there's a -file flag, just pretend to upload
-        if [[ "$*" == *"-file"* ]]; then
-            exit 0
-        fi
-        # If -tty, this is the final interactive launch — signal success and exit
-        if [[ "$*" == *"-tty"* ]]; then
-            echo "[MOCK] Would launch interactive session: $*" >> "${MOCK_LOG}"
-            exit 0
-        fi
-        # Regular exec — just succeed
-        exit 0
-        ;;
-    login)  exit 0 ;;
-    *)      exit 0 ;;
-esac
-MOCK
-    chmod +x "${TEST_DIR}/sprite"
-}
-
-# --- Mock other commands that shouldn't run for real ---
-setup_extra_mocks() {
-    # mock claude (for claude.sh install step)
-    cat > "${TEST_DIR}/claude" << 'MOCK'
-#!/bin/bash
-echo "claude $*" >> "${MOCK_LOG}"
-exit 0
-MOCK
-    chmod +x "${TEST_DIR}/claude"
-
-    # mock openssl
-    cat > "${TEST_DIR}/openssl" << 'MOCK'
-#!/bin/bash
-echo "mock-gateway-token-abc123"
-MOCK
-    chmod +x "${TEST_DIR}/openssl"
-
-    # mock sleep to avoid polling delays
-    cat > "${TEST_DIR}/sleep" << 'MOCK'
-#!/bin/bash
-exit 0
-MOCK
-    chmod +x "${TEST_DIR}/sleep"
-
-    # mock timeout/gtimeout to just run the command
-    cat > "${TEST_DIR}/timeout" << 'MOCK'
-#!/bin/bash
-# Skip the timeout value, run the rest
-shift
-exec "$@"
-MOCK
-    chmod +x "${TEST_DIR}/timeout"
-    cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout"
-
-    # mock python3 for JSON parsing used by shared/common.sh
-    cat > "${TEST_DIR}/python3" << 'MOCK'
-#!/bin/bash
-# Read the python script from -c argument
-script=""
-for arg in "$@"; do
-    if [[ "$prev" == "-c" ]]; then
-        script="$arg"
-        break
-    fi
-    prev="$arg"
-done
-# Delegate to real python3 for JSON operations
-exec /usr/bin/python3 "$@"
-MOCK
-    chmod +x "${TEST_DIR}/python3"
-}
-
-# --- Assertions ---
-assert_contains() {
-    local file="$1" pattern="$2" msg="$3"
-    if grep -qE "${pattern}" "${file}" 2>/dev/null; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        printf '%b\n' "    expected pattern: ${pattern}"
-        printf '%b\n' "    in: ${file}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_not_contains() {
-    local file="$1" pattern="$2" msg="$3"
-    if ! grep -qE "${pattern}" "${file}" 2>/dev/null; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-assert_exit_code() {
-    local actual="$1" expected="$2" msg="$3"
-    if [[ "${actual}" -eq "${expected}" ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg} (got exit code ${actual}, expected ${expected})"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-# Assert that a value equals an expected string
-# Usage: assert_equals ACTUAL EXPECTED MSG
-assert_equals() {
-    local actual="$1" expected="$2" msg="$3"
-    if [[ "${actual}" == "${expected}" ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg} (got '${actual}')"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-# Assert that a value contains a substring pattern (glob match)
-# Usage: assert_match ACTUAL PATTERN MSG
-# PATTERN uses glob syntax: *substring* for contains, prefix* for starts-with, etc.
-assert_match() {
-    local actual="$1" pattern="$2" msg="$3"
-    # Use a case statement for glob matching (compatible with bash 3.x)
-    case "${actual}" in
-        ${pattern})
-            printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-            PASSED=$((PASSED + 1))
-            ;;
-        *)
-            printf '%b\n' "  ${RED}✗${NC} ${msg} (got '${actual}')"
-            FAILED=$((FAILED + 1))
-            ;;
-    esac
-}
-
-# Run a shared/common.sh function and assert it succeeds (exit 0)
-assert_common_succeeds() {
-    local msg="$1" cmd="$2"
-    local result
-    result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && '"${cmd}" 2>/dev/null)
-    if [[ "${result}" == "valid" ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-# Run a shared/common.sh function and assert it fails (exit non-zero)
-assert_common_fails() {
-    local msg="$1" cmd="$2"
-    local rc=0
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && '"${cmd}" </dev/null >/dev/null 2>&1 || rc=$?
-    if [[ "${rc}" -ne 0 ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-# --- Sprite command assertions ---
-# Assert that a sprite script follows the standard command lifecycle:
-# auth check -> list -> create -> exec -> env upload -> interactive launch
-_assert_sprite_common_commands() {
-    local script_name="$1"
-    assert_contains "${MOCK_LOG}" "sprite org list" "Checks sprite authentication"
-    assert_contains "${MOCK_LOG}" "sprite list" "Checks if sprite exists"
-    assert_contains "${MOCK_LOG}" "sprite create.*test-sprite-${script_name}" "Creates sprite with correct name"
-    assert_contains "${MOCK_LOG}" "sprite exec.*test-sprite-${script_name}" "Runs commands on sprite"
-    assert_contains "${MOCK_LOG}" "sprite exec.*-file.*/tmp/spawn_" "Uploads env config to sprite"
-    assert_contains "${MOCK_LOG}" "sprite exec.*-tty.*" "Launches interactive session"
-}
-
-# Assert that a sprite script installs agent-specific components
-_assert_agent_specific() {
-    local script_name="$1"
-    case "${script_name}" in
-        claude)
-            assert_contains "${MOCK_LOG}" "sprite exec.*command -v claude" "Checks Claude Code installation"
-            assert_contains "${MOCK_LOG}" "sprite exec.*-file.*/tmp/.*spawn_config" "Uploads Claude config file"
-            assert_contains "${MOCK_LOG}" "sprite exec.*mv.*settings.json" "Moves settings.json to final path"
-            assert_contains "${MOCK_LOG}" "sprite exec.*mv.*\.claude\.json" "Moves .claude.json to final path"
-            ;;
-        openclaw)
-            assert_contains "${MOCK_LOG}" "sprite exec.*bun.*openclaw" "Installs openclaw via bun"
-            assert_contains "${MOCK_LOG}" "sprite exec.*openclaw gateway" "Starts openclaw gateway"
-            ;;
-    esac
-}
-
-# Assert no temp files were leaked during script execution
-_assert_no_temp_leaks() {
-    local leaked_temps
-    leaked_temps=$(find /tmp -maxdepth 1 -name "tmp.*" -newer "${MOCK_LOG}" 2>/dev/null | wc -l)
-    if [[ "${leaked_temps}" -eq 0 ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} No temp files leaked"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${RED}✗${NC} Temp files leaked (${leaked_temps} found in /tmp)"
-        FAILED=$((FAILED + 1))
-    fi
-}
-
-# --- Test runner for a single script ---
-run_script_test() {
-    local script_name="$1"
-    local script_path="${REPO_ROOT}/sprite/${script_name}.sh"
-    local output_file="${TEST_DIR}/${script_name}_output.log"
-
-    echo ""
-    printf '%b\n' "${YELLOW}━━━ Testing ${script_name}.sh ━━━${NC}"
-
-    # Reset mock state
-    : > "${MOCK_LOG}"
-    rm -f "${TEST_DIR}/sprite_mock_created" 2>/dev/null || true
-
-    # Run the script with mocked PATH and env vars (timeout 30s)
-    local exit_code=0
-    MOCK_LOG="${MOCK_LOG}" \
-    TEST_DIR="${TEST_DIR}" \
-    SPRITE_NAME="test-sprite-${script_name}" \
-    OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000" \
-    SPAWN_SKIP_API_VALIDATION=1 \
-    SPAWN_SKIP_GITHUB_AUTH=1 \
-    PATH="${TEST_DIR}:${PATH}" \
-    HOME="${TEST_DIR}/fakehome" \
-        timeout 30 bash "${script_path}" > "${output_file}" 2>&1 || exit_code=$?
-
-    assert_exit_code "${exit_code}" 0 "Script exits successfully"
-    _assert_sprite_common_commands "${script_name}"
-    _assert_agent_specific "${script_name}"
-    _assert_no_temp_leaks
-}
-
-# --- Test shared/common.sh sourcing ---
-# (sprite/lib/common.sh was removed when sprite/ was converted to TypeScript)
-_test_shared_functions_and_syntax() {
-    # Source locally and check all shared functions exist
-    local output
-    output=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        for fn in log_info log_warn log_error safe_read \
-                  get_openrouter_api_key_manual try_oauth_flow \
-                  get_openrouter_api_key_oauth open_browser \
-                  json_escape validate_model_id generate_ssh_key_if_missing \
-                  generic_ssh_wait; do
-            type "${fn}" &>/dev/null && echo "OK:${fn}" || echo "MISSING:${fn}"
-        done
-    ' 2>/dev/null)
-
-    local missing
-    missing=$(echo "${output}" | grep "^MISSING:" || true)
-    assert_equals "${missing}" "" "All shared functions defined"
-
-    # Syntax check
-    local rc=0
-    bash -n "${REPO_ROOT}/shared/common.sh" 2>/dev/null || rc=$?
-    assert_exit_code "${rc}" 0 "shared/common.sh syntax valid"
-}
-
-_test_shared_log_functions() {
-    # log functions write to stderr, not stdout
-    local stdout stderr
-    stdout=$(timeout 5 bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && log_info "test"' </dev/null 2>/dev/null)
-    stderr=$(timeout 5 bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && log_info "test"' </dev/null 2>&1 >/dev/null)
-    assert_equals "${stdout}" "" "Log functions write to stderr (no stdout)"
-    assert_match "${stderr}" "?*" "Log functions produce stderr output"
-}
-
-_test_shared_remote_source() {
-    if [[ "${REMOTE}" != true ]]; then
-        return 0
-    fi
-    local remote_fns
-    remote_fns=$(bash -c '
-        eval "$(curl -fsSL https://raw.githubusercontent.com/OpenRouterTeam/spawn/main/shared/common.sh)"
-        type log_info &>/dev/null && echo "OK" || echo "FAIL"
-    ' 2>/dev/null)
-    assert_equals "${remote_fns}" "OK" "Remote source from GitHub works"
-}
-
-test_common_source() {
-    echo ""
-    printf '%b\n' "${YELLOW}━━━ Testing shared/common.sh ━━━${NC}"
-
-    _test_shared_functions_and_syntax
-    _test_shared_log_functions
-    _test_shared_remote_source
-}
-
-# --- Test shared/common.sh functions ---
-# --- shared/common.sh sub-tests (grouped by feature) ---
-
-_test_model_validation() {
-    assert_common_succeeds "validate_model_id accepts valid model IDs" \
-        'validate_model_id "anthropic/claude-3.5-sonnet" && echo "valid"'
-    assert_common_fails "validate_model_id rejects invalid characters" \
-        'validate_model_id "bad;model"'
-    assert_common_succeeds "validate_model_id accepts empty string" \
-        'validate_model_id "" && echo "valid"'
-    assert_common_succeeds "validate_model_id accepts openrouter/auto" \
-        'validate_model_id "openrouter/auto" && echo "valid"'
-    assert_common_succeeds "validate_model_id accepts model IDs with colons" \
-        'validate_model_id "provider/model:version" && echo "valid"'
-
-    # Bulk test: all shell metacharacters must be rejected
-    # Note: backtick excluded due to shell escaping complexity
-    local dangerous_chars=('$' '&' '|' '>' '<' '(' ')' '{' '}' ';' '*' '?' '[' ']')
-    local rejected_count=0
-    local rc
-    for char in "${dangerous_chars[@]}"; do
-        rc=0
-        local test_str
-        test_str=$(printf 'bad%smodel' "${char}")
-        bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && validate_model_id '"$(printf '%q' "${test_str}")" </dev/null >/dev/null 2>&1 || rc=$?
-        [[ "${rc}" -ne 0 ]] && rejected_count=$((rejected_count + 1))
-    done
-    assert_equals "${rejected_count}" "${#dangerous_chars[@]}" \
-        "validate_model_id rejects shell metacharacters (${rejected_count}/${#dangerous_chars[@]})"
-}
-
-_test_json_escape() {
-    local result
-    result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && json_escape "test\"quote"' 2>/dev/null)
-    # json_escape should produce escaped quotes (\\") in the output
-    assert_match "${result}" '*\\"*' "json_escape handles special characters"
-
-    # Test the bash fallback path (without python3) escapes control characters
-    # The fallback must escape newlines, carriage returns, and tabs to produce valid JSON
-    result=$(bash -c '
-        json_escape_fallback() {
-            local string="${1}"
-            local escaped="${string//\\/\\\\}"
-            escaped="${escaped//\"/\\\"}"
-            escaped="${escaped//$'"'"'\n'"'"'/\\n}"
-            escaped="${escaped//$'"'"'\r'"'"'/\\r}"
-            escaped="${escaped//$'"'"'\t'"'"'/\\t}"
-            echo "\"${escaped}\""
-        }
-        json_escape_fallback "line1
-line2"
-    ' 2>/dev/null)
-    assert_match "${result}" '*\\n*' "json_escape fallback escapes newlines"
-
-    result=$(bash -c '
-        json_escape_fallback() {
-            local string="${1}"
-            local escaped="${string//\\/\\\\}"
-            escaped="${escaped//\"/\\\"}"
-            escaped="${escaped//$'"'"'\n'"'"'/\\n}"
-            escaped="${escaped//$'"'"'\r'"'"'/\\r}"
-            escaped="${escaped//$'"'"'\t'"'"'/\\t}"
-            echo "\"${escaped}\""
-        }
-        json_escape_fallback $'"'"'hello\tworld'"'"'
-    ' 2>/dev/null)
-    assert_match "${result}" '*\\t*' "json_escape fallback escapes tabs"
-}
-
-_test_ssh_key_utils() {
-    # generate_ssh_key_if_missing - creates key
-    local test_key="${TEST_DIR}/test_id_ed25519"
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && generate_ssh_key_if_missing "'"${test_key}"'"' >/dev/null 2>&1
-    local key_exists="no"
-    [[ -f "${test_key}" && -f "${test_key}.pub" ]] && key_exists="yes"
-    assert_equals "${key_exists}" "yes" "generate_ssh_key_if_missing creates key"
-
-    # generate_ssh_key_if_missing - skips existing
-    local mtime_before
-    mtime_before=$(stat -c %Y "${test_key}" 2>/dev/null || stat -f %m "${test_key}" 2>/dev/null)
-    sleep 1
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && generate_ssh_key_if_missing "'"${test_key}"'"' >/dev/null 2>&1
-    local mtime_after
-    mtime_after=$(stat -c %Y "${test_key}" 2>/dev/null || stat -f %m "${test_key}" 2>/dev/null)
-    assert_equals "${mtime_before}" "${mtime_after}" "generate_ssh_key_if_missing skips existing key"
-
-    # get_ssh_fingerprint
-    local result
-    result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && get_ssh_fingerprint "'"${test_key}.pub"'"' 2>/dev/null)
-    assert_match "${result}" "*:*" "get_ssh_fingerprint returns valid fingerprint"
-
-    # extract_ssh_key_ids
-    local mock_json='{"ssh_keys":[{"id":123},{"id":456}]}'
-    result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && echo '"'${mock_json}'"' | extract_ssh_key_ids "$(cat)" "ssh_keys"' 2>/dev/null)
-    assert_match "${result}" "*123*456*" "extract_ssh_key_ids parses JSON correctly"
-}
-
-_test_syntax_and_logging() {
-    local rc=0
-    bash -n "${REPO_ROOT}/shared/common.sh" 2>/dev/null || rc=$?
-    assert_exit_code "${rc}" 0 "shared/common.sh syntax valid"
-
-    local output missing
-    output=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        for fn in log_info log_warn log_error; do
-            type "${fn}" &>/dev/null && echo "OK:${fn}" || echo "MISSING:${fn}"
-        done
-    ' 2>/dev/null)
-    missing=$(echo "${output}" | grep "^MISSING:" || true)
-    assert_equals "${missing}" "" "All logging functions exist in shared/common.sh"
-}
-
-_test_open_browser() {
-    # open_browser: termux
-    local result
-    result=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        termux-open-url() { echo "termux: $*"; }
-        export -f termux-open-url
-        open_browser "https://example.com"
-    ' 2>/dev/null)
-    assert_equals "${result}" "termux: https://example.com" "open_browser detects termux-open-url"
-
-    # open_browser: macOS open
-    result=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        open() { echo "macOS: $*"; }
-        export -f open
-        open_browser "https://example.com"
-    ' 2>/dev/null)
-    assert_equals "${result}" "macOS: https://example.com" "open_browser detects macOS open"
-
-    # open_browser: fallback message
-    local stderr_output
-    stderr_output=$(bash -c '
-        PATH="/usr/bin:/bin"
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        command() {
-            if [[ "$2" == "termux-open-url" || "$2" == "open" || "$2" == "xdg-open" ]]; then
-                return 1
-            fi
-            builtin command "$@"
-        }
-        export -f command
-        open_browser "https://example.com"
-    ' 2>&1 >/dev/null)
-    assert_match "${stderr_output}" "*Please open: https://example.com*" \
-        "open_browser shows fallback message when browsers unavailable"
-}
-
-_test_cloud_init() {
-    # get_cloud_init_userdata
-    local result
-    result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && get_cloud_init_userdata' 2>/dev/null)
-    assert_match "${result}" "*#cloud-config*" "get_cloud_init_userdata returns valid YAML"
-    assert_match "${result}" "*curl*" "get_cloud_init_userdata includes curl"
-    assert_match "${result}" "*git*" "get_cloud_init_userdata includes git"
-    assert_match "${result}" "*zsh*" "get_cloud_init_userdata includes zsh"
-    assert_match "${result}" "*bun.sh/install*" "get_cloud_init_userdata includes Bun installation"
-    assert_match "${result}" "*claude.ai/install*" "get_cloud_init_userdata includes Claude installation"
-
-    # check_openrouter_connectivity -- accepts success or graceful failure
-    if command -v curl &> /dev/null; then
-        local connectivity_result
-        connectivity_result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && check_openrouter_connectivity && echo "reachable"' 2>/dev/null)
-        # Accept both "reachable" and empty (network unavailable) -- just shouldn't crash
-        assert_match "${connectivity_result:-ok}" "*" "check_openrouter_connectivity handles connectivity check"
-    else
-        printf '%b\n' "  ${YELLOW}⚠${NC} check_openrouter_connectivity test skipped (curl not available)"
-    fi
-}
-
-_test_oauth_functions() {
-    local rc
-
-    # wait_for_oauth_code - success
-    local code_test_file="${TEST_DIR}/oauth_code_test"
-    echo "test_code" > "${code_test_file}"
-    rc=0
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && wait_for_oauth_code "'"${code_test_file}"'" 1' >/dev/null 2>&1 || rc=$?
-    assert_exit_code "${rc}" 0 "wait_for_oauth_code returns success when file exists"
-
-    # wait_for_oauth_code - timeout
-    local missing_file="${TEST_DIR}/missing_oauth_code"
-    rc=0
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && wait_for_oauth_code "'"${missing_file}"'" 1' >/dev/null 2>&1 || rc=$?
-    assert_match "${rc}" "[1-9]*" "wait_for_oauth_code returns failure on timeout"
-
-    # cleanup_oauth_session
-    local cleanup_test_dir="${TEST_DIR}/oauth_cleanup_test"
-    mkdir -p "${cleanup_test_dir}"
-    bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && cleanup_oauth_session "" "'"${cleanup_test_dir}"'"' >/dev/null 2>&1
-    local dir_removed="yes"
-    [[ -d "${cleanup_test_dir}" ]] && dir_removed="no"
-    assert_equals "${dir_removed}" "yes" "cleanup_oauth_session removes directory"
-}
-
-_test_ssh_wait() {
-    # generic_ssh_wait - success
-    local result
-    result=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        ssh() { return 0; }
-        export -f ssh
-        generic_ssh_wait "root" "1.2.3.4" "-o Test" "true" "test" 2 1 2>&1
-        echo $?
-    ' 2>/dev/null | tail -1)
-    assert_equals "${result}" "0" "generic_ssh_wait succeeds when command passes"
-
-    # generic_ssh_wait - failure
-    result=$(bash -c '
-        source "'"${REPO_ROOT}"'/shared/common.sh"
-        ssh() { return 1; }
-        export -f ssh
-        generic_ssh_wait "root" "1.2.3.4" "-o Test" "false" "test" 2 1 2>&1
-        echo $?
-    ' 2>/dev/null | tail -1)
-    assert_equals "${result}" "1" "generic_ssh_wait fails after max attempts"
-}
-
-_test_input_and_server_validation() {
-    # safe_read without TTY
-    assert_common_fails "safe_read fails when no TTY available" \
-        'safe_read "test: " </dev/null'
-
-    # validate_server_name
-    assert_common_succeeds "validate_server_name accepts valid names" \
-        'validate_server_name "dev-server-01" && echo "valid"'
-    assert_common_fails "validate_server_name rejects names too short" \
-        'validate_server_name "ab"'
-
-    local long_name
-    long_name=$(printf 'a%.0s' {1..64})
-    assert_common_fails "validate_server_name rejects names too long" \
-        'validate_server_name "'"${long_name}"'"'
-    assert_common_fails "validate_server_name rejects leading dash" \
-        'validate_server_name "-server"'
-    assert_common_fails "validate_server_name rejects trailing dash" \
-        'validate_server_name "server-"'
-    assert_common_fails "validate_server_name rejects invalid characters" \
-        'validate_server_name "server_01"'
-    assert_common_fails "validate_server_name rejects empty string" \
-        'validate_server_name ""'
-}
-
-test_shared_common() {
-    echo ""
-    printf '%b\n' "${YELLOW}━━━ Testing shared/common.sh ━━━${NC}"
-
-    _test_model_validation
-    _test_json_escape
-    _test_ssh_key_utils
-    _test_syntax_and_logging
-    _test_open_browser
-    _test_cloud_init
-    _test_oauth_functions
-    _test_ssh_wait
-    _test_input_and_server_validation
-}
-
-# --- Test source detection in each script ---
-# All cloud provider scripts are now thin bun shims that delegate to TypeScript.
-# Verify each shim: (1) delegates to bun, (2) valid syntax.
-test_source_detection() {
-    echo ""
-    printf '%b\n' "${YELLOW}━━━ Testing source detection (bun shims) ━━━${NC}"
-
-    local cloud script script_path
-    for cloud in sprite fly hetzner digitalocean aws gcp daytona local; do
-        for script in claude openclaw codex opencode kilocode zeroclaw; do
-            script_path="${REPO_ROOT}/${cloud}/${script}.sh"
-            [[ -f "${script_path}" ]] || continue
-
-            # Verify shim delegates to TypeScript via bun
-            assert_contains "${script_path}" 'exec bun run' \
-                "${cloud}/${script}.sh delegates to bun"
-
-            # Verify syntax
-            local rc=0
-            bash -n "${script_path}" 2>/dev/null || rc=$?
-            assert_exit_code "${rc}" 0 "${cloud}/${script}.sh syntax valid"
-        done
-    done
-}
-
-# --- Static analysis with shellcheck ---
-
-# Discover all shell scripts in the repo: agent scripts, lib files, shared, and test harness.
-# Populates the DISCOVERED_SCRIPTS array.
-_discover_shell_scripts() {
-    DISCOVERED_SCRIPTS=()
-    local dir
-    for dir in "${REPO_ROOT}"/*/; do
-        local cloud
-        cloud=$(basename "${dir}")
-        case "${cloud}" in
-            cli|shared|test|node_modules|.git|.github|.claude|.docs) continue ;;
-        esac
-        local f
-        for f in "${dir}"*.sh; do
-            [[ -f "${f}" ]] && DISCOVERED_SCRIPTS+=("${f}")
-        done
-        [[ -f "${dir}lib/common.sh" ]] && DISCOVERED_SCRIPTS+=("${dir}lib/common.sh")
-    done
-    DISCOVERED_SCRIPTS+=("${REPO_ROOT}/shared/common.sh" "${REPO_ROOT}/test/run.sh")
-}
-
-# Run shellcheck on each discovered script and report results.
-_run_shellcheck_on_scripts() {
-    local issue_count=0
-    local checked_count=0
-
-    for script in "${DISCOVERED_SCRIPTS[@]}"; do
-        [[ -f "${script}" ]] || continue
-        checked_count=$((checked_count + 1))
-
-        # SC1090: Can't follow non-constant source
-        # SC2312: Consider invoking this command separately to avoid masking its return value
-        local output
-        output=$(shellcheck --severity=warning --exclude=SC1090,SC2312 "${script}" 2>&1) || true
-
-        if [[ -n "${output}" ]]; then
-            issue_count=$((issue_count + 1))
-            printf '%b\n' "  ${YELLOW}⚠${NC} $(basename "${script}"): found issues"
-            echo "${output}" | sed 's/^/    /'
-        fi
-    done
-
-    if [[ "${issue_count}" -eq 0 ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} No issues found in ${checked_count} scripts"
-        PASSED=$((PASSED + 1))
-    else
-        printf '%b\n' "  ${YELLOW}⚠${NC} Found issues in ${issue_count}/${checked_count} scripts (advisory only)"
-    fi
-}
-
-run_shellcheck() {
-    echo ""
-    printf '%b\n' "${YELLOW}━━━ Running shellcheck (static analysis) ━━━${NC}"
-
-    if ! command -v shellcheck &> /dev/null; then
-        printf '%b\n' "  ${YELLOW}⚠${NC} shellcheck not found (install with: apt install shellcheck / brew install shellcheck)"
-        printf '%b\n' "  ${YELLOW}⚠${NC} Skipping static analysis"
-        return 0
-    fi
-
-    _discover_shell_scripts
-    _run_shellcheck_on_scripts
-}
-
-# --- Main ---
-echo "==============================="
-echo " Spawn Script Test Suite"
-echo "==============================="
-echo ""
-echo "Repo:     ${REPO_ROOT}"
-echo "Temp dir: ${TEST_DIR}"
-echo "Filter:   ${FILTER:-all}"
-echo "Remote:   ${REMOTE}"
-
-setup_mocks
-setup_extra_mocks
-
-# Create fake home for sprite script tests
-mkdir -p "${TEST_DIR}/fakehome/.ssh"
-mkdir -p "${TEST_DIR}/fakehome/.config/spawn"
-mkdir -p "${TEST_DIR}/fakehome/.claude"
-mkdir -p "${TEST_DIR}/fakehome/.local/bin"
-
-run_shellcheck
-test_common_source
-test_shared_common
-test_source_detection
-
-# Note: sprite/ cloud provider scripts depend on sprite/lib/common.sh which was
-# removed when sprite was converted to TypeScript (PR #1692). Integration tests
-# for sprite agent scripts are covered by bun test (cli/src/__tests__/) instead.
-
-# --- Summary ---
-echo ""
-echo "==============================="
-TOTAL=$((PASSED + FAILED))
-printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${TOTAL} total"
-echo "==============================="
-
-[[ "${FAILED}" -eq 0 ]] && exit 0 || exit 1
diff --git a/test/test-sandbox.sh b/test/test-sandbox.sh
deleted file mode 100755
index da2f1a78..00000000
--- a/test/test-sandbox.sh
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/bin/bash
-# Test that all bash test scripts are properly sandboxed
-# Verifies no production environment pollution
-
-set -eo pipefail
-
-REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
-
-PASSED=0
-FAILED=0
-
-# Capture initial state of agent directories before running tests
-INITIAL_OPENCLAW_EXISTS=false
-INITIAL_SPRITE_EXISTS=false
-INITIAL_CLAUDE_DIR_EXISTS=false
-INITIAL_CLAUDE_JSON_EXISTS=false
-INITIAL_CLAUDE_SETTINGS_EXISTS=false
-INITIAL_CLAUDE_JSON_MTIME=""
-INITIAL_CLAUDE_SETTINGS_MTIME=""
-
-[[ -d "$HOME/.openclaw" ]] && INITIAL_OPENCLAW_EXISTS=true
-[[ -d "$HOME/.sprite" ]] && INITIAL_SPRITE_EXISTS=true
-[[ -d "$HOME/.claude" ]] && INITIAL_CLAUDE_DIR_EXISTS=true
-
-if [[ -f "$HOME/.claude.json" ]]; then
-    INITIAL_CLAUDE_JSON_EXISTS=true
-    INITIAL_CLAUDE_JSON_MTIME=$(stat -c %Y "$HOME/.claude.json" 2>/dev/null || stat -f %m "$HOME/.claude.json" 2>/dev/null)
-fi
-
-if [[ -f "$HOME/.claude/settings.json" ]]; then
-    INITIAL_CLAUDE_SETTINGS_EXISTS=true
-    INITIAL_CLAUDE_SETTINGS_MTIME=$(stat -c %Y "$HOME/.claude/settings.json" 2>/dev/null || stat -f %m "$HOME/.claude/settings.json" 2>/dev/null)
-fi
-
-assert_no_file() {
-    local pattern="$1"
-    local msg="$2"
-    if ls ${pattern} 2>/dev/null | grep -q .; then
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        printf '%b\n' "    Found: $(ls ${pattern} 2>/dev/null | head -3)"
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    fi
-}
-
-assert_config_not_modified() {
-    local config_path="$HOME/.config/spawn"
-    local msg="$1"
-
-    # If config doesn't exist, that's fine
-    if [[ ! -d "$config_path" ]]; then
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg} (dir doesn't exist)"
-        PASSED=$((PASSED + 1))
-        return 0
-    fi
-
-    # If it exists, check if any files were modified in last 5 minutes
-    local recent_files
-    recent_files=$(find "$config_path" -type f -mmin -5 2>/dev/null)
-    if [[ -n "$recent_files" ]]; then
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        printf '%b\n' "    Modified: $recent_files"
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    fi
-}
-
-assert_no_directory() {
-    local dir_path="$1"
-    local msg="$2"
-    if [[ -d "$dir_path" ]]; then
-        printf '%b\n' "  ${RED}✗${NC} ${msg}"
-        printf '%b\n' "    Found: $dir_path"
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "  ${GREEN}✓${NC} ${msg}"
-        PASSED=$((PASSED + 1))
-    fi
-}
-
-echo "========================================"
-echo " Bash Test Sandboxing Verification"
-echo "========================================"
-echo ""
-
-# Test 1: Run test/run.sh and verify no /tmp pollution
-echo "${YELLOW}Test 1: test/run.sh sandboxing${NC}"
-cd "${REPO_ROOT}"
-timeout 60 bash test/run.sh >/dev/null 2>&1 || true
-assert_no_file "/tmp/sprite_mock_created*" "No sprite mock files in /tmp after test/run.sh"
-assert_config_not_modified "Production config not modified by test/run.sh"
-
-# Test 2: Verify test/record.sh respects TEST_CONFIG_DIR
-echo ""
-echo "${YELLOW}Test 2: test/record.sh sandboxing${NC}"
-TEST_CONFIG_DIR=$(mktemp -d)
-export TEST_CONFIG_DIR
-timeout 10 bash test/record.sh --list >/dev/null 2>&1 || true
-assert_no_file "$HOME/.config/spawn/*.json.test-*" "No test files in production config"
-rm -rf "${TEST_CONFIG_DIR}"
-unset TEST_CONFIG_DIR
-
-# Test 3: Verify mock.sh uses isolated temp directories
-echo ""
-echo "${YELLOW}Test 3: test/mock.sh sandboxing${NC}"
-# Mock test runs in parallel with isolated TEST_DIR per cloud
-# Just verify it doesn't leave artifacts in /tmp or production dirs
-timeout 10 bash test/mock.sh hetzner claude 2>/dev/null || true
-assert_config_not_modified "Production config not modified by test/mock.sh"
-
-# Test 4: Verify no agent-specific directories created in HOME
-echo ""
-echo "${YELLOW}Test 4: Agent directory residue check${NC}"
-
-# Check if .openclaw was created by tests
-if [[ "$INITIAL_OPENCLAW_EXISTS" == "false" ]]; then
-    assert_no_directory "$HOME/.openclaw" "No ~/.openclaw directory created"
-else
-    printf '%b\n' "  ${YELLOW}⊘${NC} Skipped ~/.openclaw check (existed before tests)"
-fi
-
-# Check if .sprite was created by tests
-if [[ "$INITIAL_SPRITE_EXISTS" == "false" ]]; then
-    assert_no_directory "$HOME/.sprite" "No ~/.sprite directory created"
-else
-    printf '%b\n' "  ${YELLOW}⊘${NC} Skipped ~/.sprite check (existed before tests)"
-fi
-
-# Check if .claude was created by tests
-if [[ "$INITIAL_CLAUDE_DIR_EXISTS" == "false" ]]; then
-    assert_no_directory "$HOME/.claude" "No ~/.claude directory created"
-else
-    printf '%b\n' "  ${YELLOW}⊘${NC} Skipped ~/.claude check (existed before tests)"
-fi
-
-# Test 5: Verify Claude settings not mutated in production config
-echo ""
-echo "${YELLOW}Test 5: Claude settings integrity${NC}"
-
-# Check .claude.json mutation only if it existed before tests
-if [[ "$INITIAL_CLAUDE_JSON_EXISTS" == "true" ]]; then
-    # Compare modification time before and after tests
-    CURRENT_MTIME=$(stat -c %Y "$HOME/.claude.json" 2>/dev/null || stat -f %m "$HOME/.claude.json" 2>/dev/null)
-    if [[ "$CURRENT_MTIME" != "$INITIAL_CLAUDE_JSON_MTIME" ]]; then
-        printf '%b\n' "  ${RED}✗${NC} Production ~/.claude.json was modified by tests"
-        printf '%b\n' "    File: $HOME/.claude.json"
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "  ${GREEN}✓${NC} Production ~/.claude.json not modified by tests"
-        PASSED=$((PASSED + 1))
-    fi
-elif [[ -f "$HOME/.claude.json" ]]; then
-    # File was created by tests
-    printf '%b\n' "  ${RED}✗${NC} ~/.claude.json should not be created by tests"
-    printf '%b\n' "    Created: $HOME/.claude.json"
-    FAILED=$((FAILED + 1))
-else
-    printf '%b\n' "  ${GREEN}✓${NC} ~/.claude.json not created by tests"
-    PASSED=$((PASSED + 1))
-fi
-
-# Check settings.json mutation only if it existed before tests
-if [[ "$INITIAL_CLAUDE_SETTINGS_EXISTS" == "true" ]]; then
-    # Compare modification time before and after tests
-    CURRENT_MTIME=$(stat -c %Y "$HOME/.claude/settings.json" 2>/dev/null || stat -f %m "$HOME/.claude/settings.json" 2>/dev/null)
-    if [[ "$CURRENT_MTIME" != "$INITIAL_CLAUDE_SETTINGS_MTIME" ]]; then
-        printf '%b\n' "  ${RED}✗${NC} Production ~/.claude/settings.json was modified by tests"
-        printf '%b\n' "    File: $HOME/.claude/settings.json"
-        FAILED=$((FAILED + 1))
-    else
-        printf '%b\n' "  ${GREEN}✓${NC} Production ~/.claude/settings.json not modified by tests"
-        PASSED=$((PASSED + 1))
-    fi
-elif [[ -f "$HOME/.claude/settings.json" ]]; then
-    # File was created by tests
-    printf '%b\n' "  ${RED}✗${NC} ~/.claude/settings.json should not be created by tests"
-    printf '%b\n' "    Created: $HOME/.claude/settings.json"
-    FAILED=$((FAILED + 1))
-else
-    printf '%b\n' "  ${GREEN}✓${NC} ~/.claude/settings.json not created by tests"
-    PASSED=$((PASSED + 1))
-fi
-
-echo ""
-echo "========================================"
-TOTAL=$((PASSED + FAILED))
-printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${TOTAL} total"
-echo "========================================"
-
-[[ "${FAILED}" -eq 0 ]] && exit 0 || exit 1
diff --git a/test/update-readme.py b/test/update-readme.py
deleted file mode 100644
index b0fbc3ad..00000000
--- a/test/update-readme.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-"""Update README.md matrix cells based on test results.
-
-Usage:
-    python3 test/update-readme.py results.txt
-
-Results file format (one per line):
-    cloud/agent:pass
-    cloud/agent:fail
-
-Only touches cells that have test results; untested combinations stay unchanged.
-"""
-import json
-import re
-import sys
-import os
-
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python3 test/update-readme.py RESULTS_FILE", file=sys.stderr)
-        sys.exit(1)
-
-    results_file = sys.argv[1]
-    repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    readme_path = os.path.join(repo_root, "README.md")
-    manifest_path = os.path.join(repo_root, "manifest.json")
-
-    # Parse results
-    results = {}
-    with open(results_file) as f:
-        for line in f:
-            line = line.strip()
-            if not line or ":" not in line:
-                continue
-            combo, status = line.rsplit(":", 1)
-            results[combo] = status  # cloud/agent -> pass|fail
-
-    if not results:
-        print("No results to apply.")
-        return
-
-    # Load manifest to map agent keys to display names
-    with open(manifest_path) as f:
-        manifest = json.load(f)
-
-    # Build agent key -> name mapping for row matching
-    agent_names = {}
-    for key, info in manifest["agents"].items():
-        agent_names[info["name"]] = key  # "Claude Code" -> "claude"
-
-    # Read README
-    with open(readme_path) as f:
-        lines = f.readlines()
-
-    # Find the matrix table: header row starts with "| |"
-    header_idx = None
-    for i, line in enumerate(lines):
-        if line.startswith("| |") or line.startswith("| | "):
-            header_idx = i
-            break
-
-    if header_idx is None:
-        print("Could not find matrix table header in README.md", file=sys.stderr)
-        sys.exit(1)
-
-    # Parse cloud columns from header
-    # Header: | | [Sprite](sprite/) | [Hetzner Cloud](hetzner/) | ...
-    header = lines[header_idx]
-    header_cells = [c.strip() for c in header.split("|")]
-    # header_cells[0] = "", header_cells[1] = "" (row label), header_cells[2:] = cloud cells
-
-    cloud_columns = {}  # cloud_dir -> column index (0-based within cells)
-    for col_idx, cell in enumerate(header_cells):
-        # Extract dir from [Name](dir/)
-        m = re.search(r'\[.*?\]\(([^/)]+)/?[^)]*\)', cell)
-        if m:
-            cloud_columns[m.group(1)] = col_idx
-
-    # Process data rows (skip header and separator)
-    changed = False
-    for i in range(header_idx + 2, len(lines)):
-        line = lines[i]
-        if not line.startswith("|"):
-            break
-
-        cells = line.split("|")
-        if len(cells) < 3:
-            continue
-
-        # Extract agent key from first data cell
-        # e.g. " [**Claude Code**](https://claude.ai) " -> "Claude Code"
-        row_label = cells[1].strip()
-        name_match = re.search(r'\[\*\*(.*?)\*\*\]', row_label)
-        if not name_match:
-            continue
-        display_name = name_match.group(1)
-        agent_key = agent_names.get(display_name)
-        if not agent_key:
-            continue
-
-        row_changed = False
-        for cloud_dir, col_idx in cloud_columns.items():
-            combo = f"{cloud_dir}/{agent_key}"
-            if combo not in results:
-                continue
-            if col_idx >= len(cells):
-                continue
-
-            status = results[combo]
-            old_cell = cells[col_idx]
-            # Preserve whitespace padding
-            stripped = old_cell.strip()
-            if status == "pass" and stripped != "\u2713":
-                cells[col_idx] = old_cell.replace(stripped, "\u2713") if stripped else " \u2713 "
-                row_changed = True
-            elif status == "fail" and stripped != "\u2717":
-                cells[col_idx] = old_cell.replace(stripped, "\u2717") if stripped else " \u2717 "
-                row_changed = True
-
-        if row_changed:
-            lines[i] = "|".join(cells)
-            if not lines[i].endswith("\n"):
-                lines[i] += "\n"
-            changed = True
-
-    if changed:
-        with open(readme_path, "w") as f:
-            f.writelines(lines)
-        print(f"README.md updated with {len(results)} test results.")
-    else:
-        print("No changes needed in README.md.")
-
-
-if __name__ == "__main__":
-    main()