spawn/test/mock.sh

#!/bin/bash
# Fixture-based mock test suite for cloud provider agent scripts
#
# Uses recorded API responses from test/fixtures/{cloud}/ to test
# every agent script without making real API calls.
#
# Usage:
#   bash test/mock.sh                    # Test all clouds with fixtures
#   bash test/mock.sh hetzner            # Test all agents on one cloud
#   bash test/mock.sh hetzner claude     # Test one agent on one cloud

set -eo pipefail

if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
    printf 'WARNING: bash %s detected. Some features may need bash 4+.\n' "${BASH_VERSION}" >&2
fi

REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
FIXTURES_DIR="${REPO_ROOT}/test/fixtures"
TEST_DIR=$(mktemp -d)
MOCK_LOG="${TEST_DIR}/mock_calls.log"

# Colors (respect NO_COLOR standard: https://no-color.org/)
if [[ -n "${NO_COLOR:-}" ]]; then
    RED='' GREEN='' YELLOW='' CYAN='' NC=''
else
    RED='\033[0;31m'
    GREEN='\033[0;32m'
    YELLOW='\033[1;33m'
    CYAN='\033[0;36m'
    NC='\033[0m'
fi

# Counters
PASSED=0
FAILED=0
SKIPPED=0

# Cleanup on exit
cleanup() {
    rm -rf "${TEST_DIR}"
    rm -f /tmp/spawn_* 2>/dev/null || true
}
trap cleanup EXIT

# ============================================================
# Assertions (same pattern as test/run.sh)
# ============================================================

assert_exit_code() {
    local actual="$1"
    local expected="$2"
    local msg="$3"
    if [[ "${actual}" -eq "${expected}" ]]; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} ${msg} (got exit code ${actual})"
        FAILED=$((FAILED + 1))
    fi
}

assert_log_contains() {
    local pattern="$1"
    local msg="$2"
    if grep -qE "${pattern}" "${MOCK_LOG}" 2>/dev/null; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} ${msg}"
        FAILED=$((FAILED + 1))
    fi
}

assert_api_called() {
    local method="$1"
    local endpoint_pattern="$2"
    local msg="${3:-calls ${method} ${endpoint_pattern}}"
    if grep -qE "curl ${method} .*${endpoint_pattern}" "${MOCK_LOG}" 2>/dev/null; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} ${msg}"
        FAILED=$((FAILED + 1))
    fi
}

assert_env_injected() {
    local var_name="$1"
    local msg="${2:-injects ${var_name}}"
    # Check mock log (ssh/scp commands may reference the var) and output log.
    # Also check case-insensitively: OPENROUTER_API_KEY → "openrouter" appears
    # in output like "Using OpenRouter API key from environment".
    local first_word
    first_word=$(printf '%s' "$var_name" | sed 's/_.*//' | tr '[:upper:]' '[:lower:]')
    if grep -qE "${var_name}" "${MOCK_LOG}" 2>/dev/null || \
       grep -qE "${var_name}" "${TEST_DIR}/output.log" 2>/dev/null || \
       grep -qi "${first_word}" "${TEST_DIR}/output.log" 2>/dev/null || \
       grep -qi "${first_word}" "${MOCK_LOG}" 2>/dev/null; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} ${msg}"
        FAILED=$((FAILED + 1))
    fi
}

assert_file_created() {
    local path_pattern="$1"
    local msg="${2:-creates file matching ${path_pattern}}"
    if grep -qE "(scp|upload|file).*${path_pattern}" "${MOCK_LOG}" 2>/dev/null; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} ${msg}"
        FAILED=$((FAILED + 1))
    fi
}

assert_no_body_errors() {
    local msg="${1:-no request body validation errors}"
    if grep -qE "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null; then
        local errors
        errors=$(grep "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null)
        printf '%b\n' "    ${RED}✗${NC} ${msg}"
        printf '%b\n' "    ${RED}  Errors:${NC}"
        printf '%s\n' "$errors" | while IFS= read -r line; do
            printf '      %s\n' "$line"
        done
        FAILED=$((FAILED + 1))
    else
        printf '%b\n' "    ${GREEN}✓${NC} ${msg}"
        PASSED=$((PASSED + 1))
    fi
}

assert_server_cleaned_up() {
    local state_file="$1"
    local msg="${2:-server lifecycle tracked}"
    if [[ ! -f "$state_file" ]]; then
        printf '%b\n' "    ${YELLOW}⚠${NC} ${msg} (no state file)"
        return 0
    fi
    local created deleted
    created=$(grep -c "^CREATED:" "$state_file" 2>/dev/null || true)
    deleted=$(grep -c "^DELETED:" "$state_file" 2>/dev/null || true)
    if [[ "$created" -gt 0 ]]; then
        printf '%b\n' "    ${GREEN}✓${NC} ${msg} (created=${created}, deleted=${deleted})"
        PASSED=$((PASSED + 1))
        if [[ "$deleted" -lt "$created" ]]; then
            printf '%b\n' "    ${YELLOW}⚠${NC} warning: ${created} created but only ${deleted} deleted (expected — user takes over)"
        fi
    else
        printf '%b\n' "    ${YELLOW}⚠${NC} ${msg} (no server creation tracked)"
    fi
}

# ============================================================
# Mock setup
# ============================================================

setup_mock_curl() {
    local SCRIPT_DIR
    SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    cp "${SCRIPT_DIR}/mock-curl-script.sh" "${TEST_DIR}/curl"
    chmod +x "${TEST_DIR}/curl"
}

setup_mock_ssh() {
    # Mock ssh — log and succeed
    cat > "${TEST_DIR}/ssh" << 'MOCKSSH'
#!/bin/bash
echo "ssh $*" >> "${MOCK_LOG}"
exit 0
MOCKSSH
    chmod +x "${TEST_DIR}/ssh"

    # Mock scp — log and succeed
    cat > "${TEST_DIR}/scp" << 'MOCKSCP'
#!/bin/bash
echo "scp $*" >> "${MOCK_LOG}"
exit 0
MOCKSCP
    chmod +x "${TEST_DIR}/scp"
}

# Create a mock that logs its invocation and exits 0
# Usage: _create_logging_mock NAME [NAME...]
_create_logging_mock() {
    local name
    for name in "$@"; do
        cat > "${TEST_DIR}/${name}" << MOCK
#!/bin/bash
echo "${name} \$*" >> "\${MOCK_LOG}"
exit 0
MOCK
        chmod +x "${TEST_DIR}/${name}"
    done
}

# Create a mock that silently exits 0 (no logging)
# Usage: _create_silent_mock NAME [NAME...]
_create_silent_mock() {
    local name
    for name in "$@"; do
        cat > "${TEST_DIR}/${name}" << 'MOCK'
#!/bin/bash
exit 0
MOCK
        chmod +x "${TEST_DIR}/${name}"
    done
}

# Create the ssh-keygen mock script
_create_ssh_keygen_mock() {
    cat > "${TEST_DIR}/ssh-keygen" << 'MOCK'
#!/bin/bash
echo "ssh-keygen $*" >> "${MOCK_LOG}"
# Check for -l flag (fingerprint listing)
for arg in "$@"; do
    case "$arg" in
        -l*) echo "256 MD5:af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d test@test (ED25519)"; exit 0 ;;
    esac
done
# Parse -f flag for key creation
KEY_PATH=""
prev=""
for arg in "$@"; do
    if [ "$prev" = "-f" ]; then
        KEY_PATH="$arg"
    fi
    prev="$arg"
done
if [ -n "$KEY_PATH" ]; then
    mkdir -p "$(dirname "$KEY_PATH")"
    touch "$KEY_PATH"
    echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${KEY_PATH}.pub"
fi
exit 0
MOCK
    chmod +x "${TEST_DIR}/ssh-keygen"
}

setup_mock_agents() {
    # Agent binaries
    _create_logging_mock claude openclaw zeroclaw codex opencode kilocode q

    # Tools used during agent install and file upload
    _create_logging_mock pip pip3 npm npx node openssl shred cargo go git base64

    # bun: pass `bun -e "..."` (JSON processing) through to the real binary;
    # log all other invocations as no-ops.
    # Fallback chain: real bun → node (with Bun.stdin polyfill) → exit 0
    # CI (GitHub Actions ubuntu-latest) has node but not bun, so the node
    # fallback is essential for _fly_json / _fly_list_orgs / list_servers.
    cat > "${TEST_DIR}/bun" << 'MOCKBUN'
#!/bin/bash
echo "bun $*" >> "${MOCK_LOG}"

# Find the real bun binary (skip our mock directory)
_find_real_bun() {
    local _self_dir
    _self_dir="$(cd "$(dirname "$0")" && pwd)"
    IFS=: read -ra _path_dirs <<< "$PATH"
    for _d in "${_path_dirs[@]}"; do
        if [[ "$_d" != "$_self_dir" && -x "$_d/bun" ]]; then
            echo "$_d/bun"
            return 0
        fi
    done
    return 1
}

# Delegate `bun run <file>` and `bun test <file>` to the real bun.
# fly/ agent shims use `bun run main.ts` — must pass through.
if [[ "$1" == "run" || "$1" == "test" ]]; then
    _real_bun=$(_find_real_bun) || { echo "real bun not found" >&2; exit 1; }
    exec "$_real_bun" "$@"
fi

if [[ "$1" == "-e" ]]; then
    _code="$2"
    shift 2  # remove -e and the code, leaving extra args (e.g. -- field default)
    _real_bun=$(_find_real_bun)
    if [[ -n "$_real_bun" ]]; then
        exec "$_real_bun" -e "$_code" "$@"
    fi
    # No real bun found — try node with a Bun.stdin polyfill
    _self_dir="$(cd "$(dirname "$0")" && pwd)"
    IFS=: read -ra _path_dirs <<< "$PATH"
    _real_node=""
    for _d in "${_path_dirs[@]}"; do
        if [[ "$_d" != "$_self_dir" && -x "$_d/node" ]]; then
            _real_node="$_d/node"
            break
        fi
    done
    if [[ -n "$_real_node" ]]; then
        # Polyfill Bun.stdin.text() for node: read all of stdin as a string.
        # --input-type=module enables top-level await (used by fly/lib scripts).
        _polyfill='globalThis.Bun={stdin:{text:()=>new Promise(r=>{let d="";process.stdin.setEncoding("utf8");process.stdin.on("data",c=>d+=c);process.stdin.on("end",()=>r(d))})}};'
        # Strip TypeScript type annotations for node compatibility.
        _js_code=$(printf '%s' "$_code" | sed -E 's/: (any\[\]|any|string|number|void)//g; s/ as any//g')
        exec "$_real_node" --input-type=module -e "${_polyfill}${_js_code}" "$@"
    fi
fi
exit 0
MOCKBUN
    chmod +x "${TEST_DIR}/bun"

    # Silent mocks (no logging needed)
    _create_silent_mock clear sleep

    # Mock timeout/gtimeout to just run the command (skip the timeout value)
    cat > "${TEST_DIR}/timeout" << 'MOCK'
#!/bin/bash
shift
exec "$@"
MOCK
    chmod +x "${TEST_DIR}/timeout"
    cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout"

    # Mock python3 — delegate to real python3 for JSON parsing
    cat > "${TEST_DIR}/python3" << 'MOCK'
#!/bin/bash
exec /usr/bin/python3 "$@"
MOCK
    chmod +x "${TEST_DIR}/python3"

    # Mock 'ssh-keygen' — returns MD5 fingerprint matching fixture data
    _create_ssh_keygen_mock

    # Mock fly/flyctl CLI — handles ssh console, auth token, version
    _create_fly_mock
}

_create_fly_mock() {
    cat > "${TEST_DIR}/fly" << 'MOCK'
#!/bin/bash
echo "fly $*" >> "${MOCK_LOG}"

# Simulate fly CLI failures when MOCK_ERROR_SCENARIO is set
case "${MOCK_ERROR_SCENARIO:-}" in
    ssh_tunnel_failure)
        case "$1" in
            ssh)
                echo "Error: failed to connect to tunnel: context deadline exceeded" >&2
                exit 1 ;;
            machine)
                case "${2:-}" in
                    exec)
                        echo "Error: machine not reachable" >&2
                        exit 1 ;;
                esac ;;
        esac ;;
    ssh_timeout)
        case "$1" in
            ssh|machine)
                # Never return "ok" — simulates SSH not becoming ready
                exit 1 ;;
        esac ;;
esac

case "$1" in
    auth)
        case "${2:-}" in
            token) echo "test-token-fly" ;;
        esac ;;
    machine)
        case "${2:-}" in
            exec)
                # fly machine exec MACHINE_ID --app APP -- bash -c CMD
                all_args="$*"
                if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
                    echo "ok"
                fi
                ;;
            list) echo "[]" ;;
        esac ;;
    ssh)
        # fly ssh console -a APP -C "bash -c CMD" --quiet
        # Extract the command and simulate its output
        all_args="$*"
        # Check for "echo ok" (may be escaped as echo\ ok by printf %q)
        if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
            echo "ok"
        fi
        ;;
    version)
        echo "fly v0.3.50" ;;
esac
exit 0
MOCK
    chmod +x "${TEST_DIR}/fly"
    cp "${TEST_DIR}/fly" "${TEST_DIR}/flyctl"
}

setup_fake_home() {
    local fake_home="${TEST_DIR}/fakehome"
    mkdir -p "${fake_home}/.ssh"
    mkdir -p "${fake_home}/.config/spawn"
    mkdir -p "${fake_home}/.claude"
    mkdir -p "${fake_home}/.local/bin"
    # Create dummy SSH key pair
    echo "-----BEGIN OPENSSH PRIVATE KEY-----" > "${fake_home}/.ssh/id_ed25519"
    echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${fake_home}/.ssh/id_ed25519.pub"
    chmod 600 "${fake_home}/.ssh/id_ed25519"
    echo "${fake_home}"
}

# ============================================================
# Cloud API helpers (for use by test infra tests)
# ============================================================

# Strip API base URL to get just the endpoint path.
# Used by test/test-infra-sync.test.ts to validate cloud coverage.
_strip_simple_base() {
    local url="$1" pattern="$2"
    echo "$url" | sed "s|${pattern}||"
}

_strip_pattern_base() {
    local url="$1" sed_pattern="$2"
    echo "$url" | sed "$sed_pattern"
}


_strip_api_base() {
    local url="$1"
    local endpoint="$url"

    case "$url" in
        https://api.hetzner.cloud/v1*)
            endpoint="${url#https://api.hetzner.cloud/v1}" ;;
        https://api.digitalocean.com/v2*)
            endpoint="${url#https://api.digitalocean.com/v2}" ;;
        https://api.machines.dev/v1*)
            endpoint="${url#https://api.machines.dev/v1}" ;;
    esac

    echo "$endpoint" | sed 's|?.*||'
}

# Get required POST body fields for a cloud endpoint.
_get_required_fields() {
    local cloud="$1"
    local endpoint="$2"

    case "${cloud}:${endpoint}" in
        hetzner:/servers) echo "name server_type image location" ;;
        digitalocean:/droplets) echo "name region size image" ;;
        fly:*/machines) echo "name region config" ;;
    esac
}

# Validate POST request body contains required fields for major clouds.
# Used during mock script execution to catch invalid API requests.
# Args: cloud method endpoint body
_validate_body() {
    local cloud="$1"
    local method="$2"
    local endpoint="$3"
    local body="$4"

    [[ "$method" != "POST" ]] && return 0
    [[ -z "$body" ]] && return 0

    local required_fields
    required_fields=$(_get_required_fields "$cloud" "$endpoint")
    [[ -z "$required_fields" ]] && return 0

    # Check if body is valid JSON
    if ! printf '%s' "$body" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then
        echo "BODY_ERROR:invalid_json:${endpoint}" >> "${MOCK_LOG}"
        return 1
    fi

    # Check for required fields
    for field in $required_fields; do
        if ! printf '%s' "$body" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then
            echo "BODY_ERROR:missing_field:${field}:${endpoint}" >> "${MOCK_LOG}"
        fi
    done

    return 0
}

# ============================================================
# Cloud-specific env var setup
# ============================================================

setup_env_for_cloud() {
    local cloud="$1"

    # Universal env vars
    export OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000"
    export INSTANCE_STATUS_POLL_DELAY=0

    # Cloud-specific env vars from fixture data
    local env_file="${FIXTURES_DIR}/${cloud}/_env.sh"
    if [[ -f "$env_file" ]]; then
        # shellcheck disable=SC1090
        source "$env_file"
    fi
}

# ============================================================
# Discovery
# ============================================================

discover_clouds() {
    for fixture_dir in "${FIXTURES_DIR}"/*/; do
        local cloud
        cloud=$(basename "$fixture_dir")
        if [[ -f "${fixture_dir}/_metadata.json" ]]; then
            echo "$cloud"
        fi
    done
}

discover_agents() {
    local cloud="$1"
    for script in "${REPO_ROOT}/${cloud}"/*.sh; do
        [[ -f "$script" ]] || continue
        local agent
        agent=$(basename "$script" .sh)
        echo "$agent"
    done
}

# ============================================================
# Test runner helpers
# ============================================================

# Wait for a process to complete or timeout
# Args: pid timeout_seconds exit_code_var
_wait_with_timeout() {
    local pid="$1"
    local timeout="$2"
    local exit_code_var="$3"
    local i=0

    while kill -0 "$pid" 2>/dev/null; do
        if [[ "$i" -ge "$timeout" ]]; then
            kill -9 "$pid" 2>/dev/null
            wait "$pid" 2>/dev/null || true
            eval "${exit_code_var}=124"
            return
        fi
        sleep 1
        i=$((i + 1))
    done
    wait "$pid" 2>/dev/null || eval "${exit_code_var}=$?"
}

# Run a script in a sandboxed environment with a 4-second timeout.
# Sets exit_code variable in the caller's scope.
# Args: script_path cloud state_file fake_home
run_script_with_timeout() {
    local script_path="$1"
    local cloud="$2"
    local state_file="$3"
    local fake_home="$4"

    exit_code=0

    MOCK_LOG="${MOCK_LOG}" \
    MOCK_FIXTURE_DIR="${FIXTURES_DIR}/${cloud}" \
    MOCK_CLOUD="${cloud}" \
    MOCK_REPO_ROOT="${REPO_ROOT}" \
    MOCK_VALIDATE_BODY="${MOCK_VALIDATE_BODY:-}" \
    MOCK_TRACK_STATE="${MOCK_TRACK_STATE:-}" \
    MOCK_STATE_FILE="${state_file}" \
    MOCK_ERROR_SCENARIO="${MOCK_ERROR_SCENARIO:-}" \
    PATH="${TEST_DIR}:${PATH}" \
    HOME="${fake_home}" \
        bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
    local pid=$!
    _wait_with_timeout "$pid" 4 "exit_code"
}

# Print last 20 lines of output on script failure.
# Args: exit_code
show_failure_output() {
    local exit_code="$1"
    if [[ "${exit_code}" -ne 0 ]]; then
        printf '%b\n' "    ${RED}--- output (last 20 lines) ---${NC}"
        tail -20 "${TEST_DIR}/output.log" 2>/dev/null | while IFS= read -r line; do
            printf '    %s\n' "$line"
        done
        printf '%b\n' "    ${RED}--- end output ---${NC}"
    fi
}

# Assert that the script failed when an error scenario was injected.
# Returns 0 (with result recorded) if an error scenario is active, 1 otherwise.
# Args: exit_code cloud agent
assert_error_scenario() {
    local exit_code="$1"
    local cloud="$2"
    local agent="$3"

    [[ -n "${MOCK_ERROR_SCENARIO:-}" ]] || return 1

    if [[ "${exit_code}" -ne 0 ]]; then
        printf '%b\n' "    ${GREEN}✓${NC} fails on ${MOCK_ERROR_SCENARIO} (exit code ${exit_code})"
        PASSED=$((PASSED + 1))
        record_test_result "${cloud}" "${agent}" "pass"
    else
        printf '%b\n' "    ${RED}✗${NC} should fail on ${MOCK_ERROR_SCENARIO} but exited 0"
        FAILED=$((FAILED + 1))
        record_test_result "${cloud}" "${agent}" "fail"
    fi
    return 0
}

# Assert that the expected cloud-specific API calls were made.
# Reads assertions from test/fixtures/{cloud}/_api_assertions.sh if present,
# otherwise falls back to a generic API call check.
# Args: cloud
assert_cloud_api_calls() {
    local cloud="$1"
    local assertions_file="${FIXTURES_DIR}/${cloud}/_api_assertions.sh"
    if [[ -f "$assertions_file" ]]; then
        # shellcheck disable=SC1090
        source "$assertions_file"
    else
        assert_log_contains "curl (GET|POST) https://" "makes API calls"
    fi
}

# Write pass/fail result to RESULTS_FILE if set.
# Args: cloud agent result [reason]
# Result format: cloud/agent:pass or cloud/agent:fail[:reason]
# Reasons: exit_code, missing_api_call, missing_env, no_fixture
record_test_result() {
    local cloud="$1"
    local agent="$2"
    local result="$3"
    local reason="${4:-}"
    [[ -n "${RESULTS_FILE:-}" ]] || return 0
    if [[ -n "$reason" ]]; then
        printf '%s/%s:%s:%s\n' "${cloud}" "${agent}" "${result}" "${reason}" >> "${RESULTS_FILE}"
    else
        printf '%s/%s:%s\n' "${cloud}" "${agent}" "${result}" >> "${RESULTS_FILE}"
    fi
}

# ============================================================
# Test runner
# ============================================================

# Run an assertion and store the number of new failures in _ASSERT_DELTA.
# Usage: _tracked_assert <assertion_command> [args...]
# The assertion runs in the current shell so PASSED/FAILED propagate.
_tracked_assert() {
    local _before=$FAILED
    "$@"
    _ASSERT_DELTA=$(( FAILED - _before ))
}

# Determine the primary failure reason from tracked failure counts.
# Args: has_no_fixture exit_fails api_fails ssh_fails env_fails
# Prints the reason string to stdout.
_categorize_failure() {
    local has_no_fixture="$1" exit_fails="$2" api_fails="$3" ssh_fails="$4" env_fails="$5"
    if [[ "$has_no_fixture" -gt 0 ]]; then echo "no_fixture"
    elif [[ "$exit_fails" -gt 0 ]]; then echo "exit_code"
    elif [[ "$api_fails" -gt 0 ]]; then echo "missing_api_call"
    elif [[ "$env_fails" -gt 0 ]]; then echo "missing_env"
    elif [[ "$ssh_fails" -gt 0 ]]; then echo "missing_ssh"
    else echo "unknown"
    fi
}

# Run assertions for a script and track which categories failed.
# Outputs: _exit_failed, _api_failed, _ssh_failed, _env_failed (as 0/1)
_run_assertions_and_track() {
    local exit_code="$1" cloud="$2"
    local _ASSERT_DELTA=0

    _tracked_assert assert_exit_code "${exit_code}" 0 "exits successfully"
    _exit_failed=$_ASSERT_DELTA

    _tracked_assert assert_cloud_api_calls "$cloud"
    _api_failed=$_ASSERT_DELTA

    _tracked_assert assert_log_contains "ssh " "uses SSH"
    _ssh_failed=$_ASSERT_DELTA

    _tracked_assert assert_env_injected "OPENROUTER_API_KEY"
    _env_failed=$_ASSERT_DELTA

    if [[ "${MOCK_VALIDATE_BODY:-}" == "1" ]]; then
        assert_no_body_errors
    fi
    if [[ "${MOCK_TRACK_STATE:-}" == "1" ]]; then
        assert_server_cleaned_up "$3"
    fi
}

# Check for missing fixtures in the mock log.
_has_missing_fixture() {
    grep -q "NO_FIXTURE:" "${MOCK_LOG}" 2>/dev/null && echo 1 || echo 0
}

# Setup test environment for a script
# Args: cloud state_file
_setup_test_env() {
    local cloud="$1"
    local state_file="$2"
    : > "${MOCK_LOG}"
    setup_env_for_cloud "$cloud"
    : > "${state_file}"
}

# Record test result based on failure categories
# Args: cloud agent pre_failed
_record_categorized_result() {
    local cloud="$1"
    local agent="$2"
    local pre_failed="$3"

    local pre_fail=$((FAILED - pre_failed))
    if [[ "$pre_fail" -gt 0 ]]; then
        local _has_no_fixture
        _has_no_fixture=$(_has_missing_fixture)
        local _reason
        _reason=$(_categorize_failure "$_has_no_fixture" "$_exit_failed" "$_api_failed" "$_ssh_failed" "$_env_failed")
        record_test_result "${cloud}" "${agent}" "fail" "${_reason}"
    else
        record_test_result "${cloud}" "${agent}" "pass"
    fi
}

# Run per-agent install assertions.
# Sources shared assertions file and optional per-cloud overrides.
_run_agent_assertions() {
    local cloud="$1"
    local agent="$2"
    local shared_file="${FIXTURES_DIR}/_shared_agent_assertions.sh"
    local cloud_file="${FIXTURES_DIR}/${cloud}/_agent_assertions.sh"

    if [[ -f "$shared_file" ]]; then
        # shellcheck disable=SC1090
        source "$shared_file"
        # Apply per-cloud overrides if they exist
        if [[ -f "$cloud_file" ]]; then
            # shellcheck disable=SC1090
            source "$cloud_file"
        fi
        assert_agent_install "$cloud" "$agent"
    fi
}

run_test() {
    local cloud="$1"
    local agent="$2"
    local script_path="${REPO_ROOT}/${cloud}/${agent}.sh"

    if [[ ! -f "$script_path" ]]; then
        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud}/${agent}.sh — file not found"
        SKIPPED=$((SKIPPED + 1))
        return 0
    fi

    # fly/ scripts use TypeScript (bun) with native fetch() for API calls.
    # Fixture-based mock tests (which intercept curl) don't apply.
    # Fly coverage comes from: bun test (44 tests) + fly failure mode tests (4 tests).
    if [[ "$cloud" == "fly" && -z "${MOCK_ERROR_SCENARIO:-}" ]]; then
        printf '%b\n' "  ${YELLOW}skip${NC} ${cloud}/${agent}.sh — TS provider (tested via bun test + failure modes)"
        SKIPPED=$((SKIPPED + 1))
        return 0
    fi

    printf '%b\n' "  ${CYAN}test${NC} ${cloud}/${agent}.sh"

    local _pre_failed="${FAILED}"
    local fake_home
    fake_home=$(setup_fake_home)
    local state_file="${TEST_DIR}/state_${cloud}_${agent}.log"

    _setup_test_env "$cloud" "$state_file"

    local exit_code
    run_script_with_timeout "${script_path}" "${cloud}" "${state_file}" "${fake_home}"
    show_failure_output "${exit_code}"

    # Error scenario mode: just check that script failed, then return
    if assert_error_scenario "${exit_code}" "${cloud}" "${agent}"; then
        printf '\n'
        return 0
    fi

    # Normal mode: run standard assertions and track failures per category
    _run_assertions_and_track "${exit_code}" "${cloud}" "${state_file}"

    # Per-agent install assertions
    _run_agent_assertions "$cloud" "$agent"

    _record_categorized_result "${cloud}" "${agent}" "$_pre_failed"

    printf '\n'
}

# ============================================================
# Fly.io failure-mode tests (#1579)
# ============================================================

# Run a single Fly.io agent script under a specific error scenario.
# Expects MOCK_ERROR_SCENARIO to trigger error injection in mock curl and/or fly CLI.
# Args: scenario agent
_run_fly_error_test() {
    local scenario="$1"
    local agent="$2"
    local script_path="${REPO_ROOT}/fly/${agent}.sh"

    [[ -f "$script_path" ]] || return 0

    printf '%b\n' "  ${CYAN}test${NC} fly/${agent}.sh [${scenario}]"

    local fake_home
    fake_home=$(setup_fake_home)
    local state_file="${TEST_DIR}/state_fly_${agent}_${scenario}.log"

    : > "${MOCK_LOG}"
    setup_env_for_cloud "fly"
    : > "${state_file}"

    # Re-create fly mock so it picks up the error scenario
    _create_fly_mock

    local exit_code=0
    MOCK_LOG="${MOCK_LOG}" \
    MOCK_FIXTURE_DIR="${FIXTURES_DIR}/fly" \
    MOCK_CLOUD="fly" \
    MOCK_REPO_ROOT="${REPO_ROOT}" \
    MOCK_ERROR_SCENARIO="${scenario}" \
    MOCK_STATE_FILE="${state_file}" \
    SPAWN_NON_INTERACTIVE=1 \
    PATH="${TEST_DIR}:${PATH}" \
    HOME="${fake_home}" \
        bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
    local pid=$!
    _wait_with_timeout "$pid" 10 "exit_code"

    if [[ "${exit_code}" -ne 0 ]]; then
        printf '%b\n' "    ${GREEN}✓${NC} fails on ${scenario} (exit code ${exit_code})"
        PASSED=$((PASSED + 1))
    else
        printf '%b\n' "    ${RED}✗${NC} should fail on ${scenario} but exited 0"
        FAILED=$((FAILED + 1))
    fi
    printf '\n'
}

# Run all Fly.io failure-mode tests using a single representative agent.
# Uses claude.sh as the test subject since it exercises the full provisioning path.
run_fly_failure_tests() {
    printf '%b\n' "${CYAN}━━━ fly failure modes (#1579) ━━━${NC}"

    local test_agent="claude"
    if [[ ! -f "${REPO_ROOT}/fly/${test_agent}.sh" ]]; then
        printf '%b\n' "  ${YELLOW}skip${NC} fly/${test_agent}.sh not found"
        SKIPPED=$((SKIPPED + 1))
        return 0
    fi

    # 1. API rate limit (429) — mock curl returns 429 for cloud API calls
    _run_fly_error_test "rate_limit" "$test_agent"

    # 2. Machine creation failure (422) — mock curl returns 422 for POST to */machines*
    _run_fly_error_test "create_failure" "$test_agent"

    # 3. SSH tunnel failure — fly ssh console / fly machine exec exit non-zero
    _run_fly_error_test "ssh_tunnel_failure" "$test_agent"

    # 4. SSH timeout — fly CLI never returns "ok", _fly_wait_for_ssh exhausts retries
    _run_fly_error_test "ssh_timeout" "$test_agent"
}

# ============================================================
# Main
# ============================================================

printf '%b\n' "${CYAN}===============================${NC}"
printf '%b\n' "${CYAN} Spawn Mock Test Suite${NC}"
printf '%b\n' "${CYAN}===============================${NC}"
printf '\n'

# Parse arguments
FILTER_CLOUD="${1:-}"
FILTER_AGENT="${2:-}"

# Set up mocks once
setup_mock_curl
setup_mock_ssh
setup_mock_agents

# Discover what to test
if [[ -n "$FILTER_CLOUD" ]]; then
    CLOUDS="$FILTER_CLOUD"
    if [[ ! -d "${FIXTURES_DIR}/${FILTER_CLOUD}" ]]; then
        printf '%b\n' "${RED}No fixtures for cloud: ${FILTER_CLOUD}${NC}"
        printf "Available: %s\n" "$(discover_clouds | tr '\n' ' ')"
        exit 1
    fi
else
    CLOUDS=$(discover_clouds)
fi

if [[ -z "$CLOUDS" ]]; then
    printf '%b\n' "${YELLOW}No fixture data found in ${FIXTURES_DIR}/${NC}"
    printf "Run test/record.sh first to record API fixtures.\n"
    exit 0
fi

printf "Fixtures dir: %s\n" "${FIXTURES_DIR}"
printf "Clouds:       %s\n" "$CLOUDS"
printf '\n'

# --- Run clouds in parallel ---
CLOUD_RESULTS_DIR="${TEST_DIR}/cloud_results"
mkdir -p "${CLOUD_RESULTS_DIR}"

CLOUD_PIDS=""
for cloud in $CLOUDS; do
    (
        # Isolated per-cloud state
        CLOUD_TEST_DIR=$(mktemp -d)
        MOCK_LOG="${CLOUD_TEST_DIR}/mock_calls.log"
        CLOUD_PASSED=0
        CLOUD_FAILED=0
        CLOUD_SKIPPED=0

        # Re-create mocks in per-cloud temp dir (curl/ssh/agents need own copies)
        TEST_DIR="${CLOUD_TEST_DIR}"
        setup_mock_curl
        setup_mock_ssh
        setup_mock_agents

        # Override counters used by assertions (they modify PASSED/FAILED/SKIPPED)
        PASSED=0
        FAILED=0
        SKIPPED=0

        printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}"

        if [[ -n "$FILTER_AGENT" ]]; then
            AGENTS="$FILTER_AGENT"
        else
            AGENTS=$(discover_agents "$cloud")
        fi

        if [[ -z "$AGENTS" ]]; then
            printf '%b\n' "  ${YELLOW}skip${NC} no agent scripts found in ${cloud}/"
            SKIPPED=$((SKIPPED + 1))
        else
            for agent in $AGENTS; do
                run_test "$cloud" "$agent"
            done
        fi
        printf '\n'

        # Write counts to results file for aggregation
        printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/${cloud}.counts"

        rm -rf "${CLOUD_TEST_DIR}"
    ) > "${CLOUD_RESULTS_DIR}/${cloud}.log" 2>&1 &
    CLOUD_PIDS="${CLOUD_PIDS} $!"
done

# Wait for all clouds to finish
for pid in $CLOUD_PIDS; do
    wait "$pid" 2>/dev/null || true
done

# Print output from each cloud (in discovery order for consistent output)
for cloud in $CLOUDS; do
    if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.log" ]]; then
        cat "${CLOUD_RESULTS_DIR}/${cloud}.log"
    fi
done

# Aggregate results from all clouds
for cloud in $CLOUDS; do
    if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.counts" ]]; then
        read -r p f s < "${CLOUD_RESULTS_DIR}/${cloud}.counts"
        PASSED=$((PASSED + p))
        FAILED=$((FAILED + f))
        SKIPPED=$((SKIPPED + s))
    fi
done

# --- Fly.io failure-mode tests (#1579) ---
# Run only when fly fixtures exist and no agent filter is active
if [[ -d "${FIXTURES_DIR}/fly" && ( -z "$FILTER_CLOUD" || "$FILTER_CLOUD" == "fly" ) && -z "$FILTER_AGENT" ]]; then
    (
        FLY_FAIL_TEST_DIR=$(mktemp -d)
        TEST_DIR="${FLY_FAIL_TEST_DIR}"
        MOCK_LOG="${FLY_FAIL_TEST_DIR}/mock_calls.log"
        PASSED=0
        FAILED=0
        SKIPPED=0

        setup_mock_curl
        setup_mock_ssh
        setup_mock_agents

        run_fly_failure_tests

        printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/fly_failures.counts"
        rm -rf "${FLY_FAIL_TEST_DIR}"
    ) > "${CLOUD_RESULTS_DIR}/fly_failures.log" 2>&1

    if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.log" ]]; then
        cat "${CLOUD_RESULTS_DIR}/fly_failures.log"
    fi
    if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.counts" ]]; then
        read -r p f s < "${CLOUD_RESULTS_DIR}/fly_failures.counts"
        PASSED=$((PASSED + p))
        FAILED=$((FAILED + f))
        SKIPPED=$((SKIPPED + s))
    fi
fi

# --- Summary ---
printf '%b\n' "${CYAN}===============================${NC}"
TOTAL=$((PASSED + FAILED + SKIPPED))
printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${TOTAL} total"
printf '%b\n' "${CYAN}===============================${NC}"

if [[ "$FAILED" -gt 0 ]]; then
    exit 1
fi
exit 0