spawn/shared/common.sh

#!/bin/bash
# shellcheck disable=SC2154
# Shared bash functions used across all spawn scripts
# Provider-agnostic utilities for logging, input, OAuth, etc.
#
# This file is meant to be sourced by cloud provider-specific common.sh files.
# It does not set bash flags (like set -eo pipefail) as those should be set
# by the scripts that source this file.

# ============================================================
# Debug mode
# ============================================================

# Enable debug output if SPAWN_DEBUG is set
if [[ -n "${SPAWN_DEBUG:-}" ]]; then
    set -x
fi

# ============================================================
# Color definitions and logging
# ============================================================

# Use non-readonly vars to avoid errors if sourced multiple times
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Print colored messages (to stderr so they don't pollute command substitution output)
log_info() {
    printf '%b\n' "${GREEN}${1}${NC}" >&2
}

log_warn() {
    printf '%b\n' "${YELLOW}${1}${NC}" >&2
}

log_error() {
    printf '%b\n' "${RED}${1}${NC}" >&2
}

# Progress/status messages (use instead of log_warn for non-warning status updates)
log_step() {
    printf '%b\n' "${CYAN}${1}${NC}" >&2
}

# Print a structured diagnostic: header, possible causes, and how-to-fix steps.
# Arguments: HEADER CAUSE... --- FIX...
# The literal "---" separates causes from fixes.
_log_diagnostic() {
    local header="${1}"; shift
    log_error "${header}"
    log_error ""
    log_error "Possible causes:"
    while [[ $# -gt 0 && "${1}" != "---" ]]; do
        log_error "  - ${1}"; shift
    done
    if [[ $# -gt 0 ]]; then
        shift  # skip ---
        log_error ""
        log_error "How to fix:"
        local i=1
        while [[ $# -gt 0 ]]; do
            log_error "  ${i}. ${1}"; shift
            i=$((i + 1))
        done
    fi
}

# Log actionable guidance when agent installation verification fails.
# Usage: log_install_failed AGENT_NAME [INSTALL_CMD] [SERVER_IP]
# Example: log_install_failed "Claude Code" "curl -fsSL https://claude.ai/install.sh | bash" "$IP"
log_install_failed() {
    local agent_name="${1}"
    local install_cmd="${2:-}"
    local server_ip="${3:-}"

    log_error "${agent_name} installation failed"
    log_error ""
    log_error "The agent could not be installed or verified on the server."
    log_error ""
    printf '%b\n' "${YELLOW}Common causes:${NC}" >&2
    log_error "  • Network timeout downloading packages (npm, pip, etc.)"
    log_error "  • Insufficient disk space or memory on the server"
    log_error "  • Missing system dependencies for ${agent_name}"
    log_error "  • Cloud provider's package mirror temporarily unavailable"
    log_error ""
    printf '%b\n' "${YELLOW}Next steps:${NC}" >&2
    if [[ -n "${server_ip}" ]]; then
        log_error "  1. SSH into the server to investigate:"
        log_error "     ${CYAN}ssh root@${server_ip}${NC}"
        log_error "     ${CYAN}df -h${NC}   # Check disk space"
        log_error "     ${CYAN}free -h${NC}  # Check memory"
    fi
    if [[ -n "${install_cmd}" ]]; then
        log_error "  2. Try manual installation:"
        log_error "     ${CYAN}${install_cmd}${NC}"
    fi
    log_error "  3. Retry with a fresh server (many failures are transient)"
    log_error "     ${CYAN}spawn <agent> <cloud>${NC}"
}

# ============================================================
# Configurable timing constants
# ============================================================

# Polling interval for OAuth code waiting and other wait loops
# Set SPAWN_POLL_INTERVAL=0.1 for faster testing, or higher for slow networks
POLL_INTERVAL="${SPAWN_POLL_INTERVAL:-1}"

# ============================================================
# Dependency checks
# ============================================================

# Check if Python 3 is available (required for JSON parsing throughout Spawn)
check_python_available() {
    if ! command -v python3 &> /dev/null; then
        log_error "Python 3 is required but not installed"
        log_error ""
        log_error "Spawn uses Python 3 for JSON parsing and API interactions."
        log_error ""
        printf '%b\n' "${YELLOW}Install Python 3:${NC}" >&2
        log_error "  ${CYAN}# Ubuntu/Debian${NC}"
        log_error "  sudo apt-get update && sudo apt-get install -y python3"
        log_error ""
        log_error "  ${CYAN}# Fedora/RHEL${NC}"
        log_error "  sudo dnf install -y python3"
        log_error ""
        log_error "  ${CYAN}# macOS${NC}"
        log_error "  brew install python3"
        log_error ""
        log_error "  ${CYAN}# Arch Linux${NC}"
        log_error "  sudo pacman -S python"
        log_error ""
        return 1
    fi
    return 0
}

# Install jq if not already present (required by some cloud providers)
# Platform-specific jq install helpers
_install_jq_brew() {
    if command -v brew &>/dev/null; then
        brew install jq || { log_error "Failed to install jq via Homebrew. Run 'brew install jq' manually."; return 1; }
    else
        log_error "jq is required but not installed"
        log_error "Install it with: brew install jq"
        log_error "If Homebrew is not available: https://jqlang.github.io/jq/download/"
        return 1
    fi
}

_install_jq_apt() {
    sudo apt-get update -qq && sudo apt-get install -y jq || {
        log_error "Failed to install jq via apt. Run 'sudo apt-get install -y jq' manually."
        return 1
    }
}

_install_jq_dnf() {
    sudo dnf install -y jq || {
        log_error "Failed to install jq via dnf. Run 'sudo dnf install -y jq' manually."
        return 1
    }
}

_install_jq_apk() {
    sudo apk add jq || {
        log_error "Failed to install jq via apk. Run 'sudo apk add jq' manually."
        return 1
    }
}

_report_jq_not_found() {
    log_error "jq is required but not installed"
    log_error ""
    printf '%b\n' "${YELLOW}Install jq for your system:${NC}" >&2
    log_error "  ${CYAN}# Ubuntu/Debian${NC}"
    log_error "  sudo apt-get install -y jq"
    log_error ""
    log_error "  ${CYAN}# Fedora/RHEL${NC}"
    log_error "  sudo dnf install -y jq"
    log_error ""
    log_error "  ${CYAN}# macOS${NC}"
    log_error "  brew install jq"
    log_error ""
    log_error "  ${CYAN}# Other systems${NC}"
    log_error "  https://jqlang.github.io/jq/download/"
}

ensure_jq() {
    if command -v jq &>/dev/null; then
        return 0
    fi

    log_step "Installing jq..."

    if [[ "$OSTYPE" == "darwin"* ]]; then
        _install_jq_brew || return 1
    elif command -v apt-get &>/dev/null; then
        _install_jq_apt || return 1
    elif command -v dnf &>/dev/null; then
        _install_jq_dnf || return 1
    elif command -v apk &>/dev/null; then
        _install_jq_apk || return 1
    else
        _report_jq_not_found
        return 1
    fi

    if ! command -v jq &>/dev/null; then
        log_error "jq was installed but is not found in PATH"
        log_error "Try opening a new terminal or run: hash -r"
        return 1
    fi

    log_info "jq installed"
}

# ============================================================
# Input handling
# ============================================================

# Safe read function that works in both interactive and non-interactive modes
# Set SPAWN_NON_INTERACTIVE=1 to force immediate failure (for CI/CD, e2e tests)
safe_read() {
    local prompt="${1}"
    local result=""

    # Honour explicit non-interactive flag (e2e tests, CI/CD pipelines)
    if [[ "${SPAWN_NON_INTERACTIVE:-}" == "1" ]]; then
        log_error "Cannot prompt for input: SPAWN_NON_INTERACTIVE is set"
        log_error "Set all required environment variables before launching spawn."
        return 1
    fi

    if [[ -t 0 ]]; then
        # stdin is a terminal - read directly
        read -r -p "${prompt}" result || return 1
    elif echo -n "" > /dev/tty 2>/dev/null; then
        # /dev/tty is functional - use it
        read -r -p "${prompt}" result < /dev/tty || return 1
    else
        # No interactive input available
        log_error "Cannot prompt for input: no interactive terminal available"
        log_error ""
        log_error "You're running spawn in non-interactive mode (piped input, background job, or CI/CD)."
        log_error "Set all required environment variables before launching spawn."
        log_error ""
        log_error "Example:"
        log_error "  export OPENROUTER_API_KEY=sk-or-v1-..."
        log_error "  export CLOUD_API_TOKEN=..."
        log_error "  spawn <agent> <cloud>"
        log_error ""
        log_error "Or use inline variables:"
        log_error "  OPENROUTER_API_KEY=sk-or-v1-... spawn <agent> <cloud>"
        return 1
    fi

    echo "${result}"
}

# ============================================================
# Network utilities
# ============================================================

# Listen on a port with netcat (handles busybox/Termux nc requiring -p flag)
# Find a working Node.js runtime (bun preferred, then node)
find_node_runtime() {
    if command -v bun &>/dev/null; then echo "bun"; return 0; fi
    if command -v node &>/dev/null; then echo "node"; return 0; fi
    return 1
}

# Open browser to URL (supports macOS, Linux, Termux)
open_browser() {
    local url=${1}
    if command -v termux-open-url &> /dev/null; then
        termux-open-url "${url}" </dev/null
    elif command -v open &> /dev/null; then
        open "${url}" </dev/null
    elif command -v xdg-open &> /dev/null; then
        xdg-open "${url}" </dev/null
    else
        log_step "Please open: ${url}"
    fi
}

# Validate model ID to prevent command injection
validate_model_id() {
    local model_id="${1}"
    if [[ -z "${model_id}" ]]; then return 0; fi
    if [[ ! "${model_id}" =~ ^[a-zA-Z0-9/_:.-]+$ ]]; then
        log_error "Invalid model ID: '${model_id}'"
        log_error ""
        log_error "Model IDs can only contain:"
        log_error "  - Letters (a-z, A-Z)"
        log_error "  - Numbers (0-9)"
        log_error "  - Special characters: / - _ : ."
        log_error ""
        log_error "Examples of valid model IDs:"
        log_error "  - anthropic/claude-3.5-sonnet"
        log_error "  - openai/gpt-4-turbo"
        log_error "  - openrouter/auto"
        log_error ""
        log_error "Browse all models at: https://openrouter.ai/models"
        return 1
    fi
    return 0
}

# Verify an OpenRouter API key is valid by hitting the /auth/key endpoint
# Returns 0 if valid, 1 if invalid/expired/network error
verify_openrouter_key() {
    local api_key="${1}"
    if [[ -z "${api_key}" ]]; then return 1; fi
    if [[ -n "${SPAWN_SKIP_API_VALIDATION:-}" || "${BUN_ENV:-}" == "test" || "${NODE_ENV:-}" == "test" ]]; then return 0; fi
    if ! command -v curl &>/dev/null; then return 0; fi  # skip if no curl

    local response http_code
    # Pass auth header via stdin (-K -) so the API key isn't visible in ps output
    response=$(printf 'header = "Authorization: Bearer %s"\n' "${api_key}" | \
        curl -s --connect-timeout 5 --max-time 10 -w "\n%{http_code}" \
        -K - \
        "https://openrouter.ai/api/v1/auth/key" 2>/dev/null) || return 0  # network error = skip
    http_code=$(printf '%s' "${response}" | tail -1)

    case "${http_code}" in
        200) return 0 ;;
        401|403)
            log_error "OpenRouter API key is invalid or expired"
            log_error "Get a new key at: https://openrouter.ai/settings/keys"
            return 1 ;;
        *)  return 0 ;;  # unknown status = don't block
    esac
}

# Verify a model ID exists on OpenRouter
# Returns 0 if valid (or can't check), 1 if model not found
verify_openrouter_model() {
    local model_id="${1}"
    if [[ -z "${model_id}" ]]; then return 0; fi
    if [[ "${model_id}" == "openrouter/auto" || "${model_id}" == "openrouter/free" ]]; then return 0; fi
    if [[ -n "${SPAWN_SKIP_API_VALIDATION:-}" || "${BUN_ENV:-}" == "test" || "${NODE_ENV:-}" == "test" ]]; then return 0; fi
    if ! command -v curl &>/dev/null; then return 0; fi
    if ! command -v python3 &>/dev/null; then return 0; fi

    local models_json
    models_json=$(curl -s --connect-timeout 5 --max-time 15 \
        "https://openrouter.ai/api/v1/models" 2>/dev/null) || return 0

    # Extract model IDs and check for exact match
    local found
    found=$(printf '%s' "${models_json}" | python3 -c "
import sys, json
try:
    data = json.load(sys.stdin)
    ids = [m['id'] for m in data.get('data', [])]
    print('yes' if sys.argv[1] in ids else 'no')
except: print('skip')
" "${model_id}" 2>/dev/null)

    if [[ "${found}" == "no" ]]; then
        log_warn "Model '${model_id}' not found on OpenRouter"
        log_info "Browse available models at: https://openrouter.ai/models"
        return 1
    fi
    return 0
}

# Helper to show server name validation requirements
show_server_name_requirements() {
    log_error ""
    log_error "Server name requirements:"
    log_error "  - Length: 3-63 characters"
    log_error "  - Characters: letters (a-z, A-Z), numbers (0-9), dashes (-)"
    log_error "  - No leading or trailing dashes"
    log_error ""
    log_error "Examples of valid names:"
    log_error "  - my-server"
    log_error "  - dev-box-01"
    log_error "  - spawn-agent"
}

# Validate server/sprite name to prevent injection and ensure cloud provider compatibility
# Server names must be 3-63 characters, alphanumeric + dash, no leading/trailing dash
validate_server_name() {
    local server_name="${1}"

    if [[ -z "${server_name}" ]]; then
        log_error "Server name cannot be empty"
        return 1
    fi

    local name_length=${#server_name}

    # Check length (3-63 characters)
    if [[ ${name_length} -lt 3 ]] || [[ ${name_length} -gt 63 ]]; then
        local constraint
        if [[ ${name_length} -lt 3 ]]; then
            constraint="too short (minimum 3)"
        else
            constraint="too long (maximum 63)"
        fi
        log_error "Server name ${constraint}: '${server_name}'"
        show_server_name_requirements
        return 1
    fi

    # Check for valid characters (alphanumeric + dash only)
    if [[ ! "${server_name}" =~ ^[a-zA-Z0-9-]+$ ]]; then
        log_error "Invalid server name: '${server_name}' (must contain only alphanumeric characters and dashes)"
        show_server_name_requirements
        return 1
    fi

    # Check no leading or trailing dash
    if [[ "${server_name}" =~ ^- ]] || [[ "${server_name}" =~ -$ ]]; then
        log_error "Invalid server name: '${server_name}' (cannot start or end with dash)"
        show_server_name_requirements
        return 1
    fi

    return 0
}

# Validate API token to prevent command injection
# Allows alphanumeric, dashes, underscores, and common token separators
# Blocks shell metacharacters: ; ' " < > | & $ ` \ ( )
validate_api_token() {
    local token="${1}"

    if [[ -z "${token}" ]]; then
        log_error "API token cannot be empty"
        log_error "Please provide a valid API token"
        return 1
    fi

    # Block shell metacharacters that could enable command injection
    if [[ "${token}" =~ [\;\'\"\<\>\|\&\$\`\\\(\)] ]]; then
        log_error "Invalid token format: contains special characters"
        log_error "API tokens should only contain letters, numbers, dashes, and underscores."
        log_error "Copy the token directly from your provider's dashboard without extra characters."
        return 1
    fi

    return 0
}

# Validate region/location name (cloud provider regions, datacenters, zones)
# Alphanumeric, hyphens, underscores only, 1-63 chars
validate_region_name() {
    local region="${1}"

    if [[ -z "${region}" ]]; then
        log_error "Region name cannot be empty"
        return 1
    fi

    if [[ ! "${region}" =~ ^[a-zA-Z0-9_-]{1,63}$ ]]; then
        log_error "Invalid region name: '${region}'"
        log_error "Region names must be 1-63 characters: alphanumeric, hyphens, underscores only"
        return 1
    fi

    return 0
}

# Validate resource name (generic: server types, sizes, plans, etc.)
# Alphanumeric, hyphens, underscores, dots, 1-63 chars
validate_resource_name() {
    local name="${1}"

    if [[ -z "${name}" ]]; then
        log_error "Resource name cannot be empty"
        return 1
    fi

    if [[ ! "${name}" =~ ^[a-zA-Z0-9_.-]{1,63}$ ]]; then
        log_error "Invalid resource name: '${name}'"
        log_error "Resource names must be 1-63 characters: alphanumeric, hyphens, underscores, dots only"
        return 1
    fi

    return 0
}

# Validated read wrapper - reads input and validates it with a validator function
# Usage: validated_read "prompt" validator_function_name
# Returns: Validated input via stdout, or exits on error/empty input
# Example: api_key=$(validated_read "Enter API key: " validate_api_token)
validated_read() {
    local prompt="${1}"
    local validator="${2}"
    local value

    while true; do
        value=$(safe_read "${prompt}") || return 1

        if [[ -z "${value}" ]]; then
            return 1
        fi

        if "${validator}" "${value}"; then
            echo "${value}"
            return 0
        fi

        log_warn "Please try again."
    done
}

# Convert a display name to a valid kebab-case resource identifier.
# "My Dev Box" → "my-dev-box"   "Claude 2024!" → "claude-2024"
_to_kebab_case() {
    printf '%s' "${1}" \
        | tr '[:upper:]' '[:lower:]' \
        | sed 's/[^a-z0-9-]/-/g' \
        | sed 's/-\{2,\}/-/g' \
        | sed 's/^-//;s/-$//'
}

# Ask for a human-readable spawn name upfront, then derive a kebab-case
# default used for resource naming on every cloud.
# Idempotent — safe to call multiple times; skips prompt if already done.
# Respects SPAWN_NAME when set by the CLI (e.g. spawn gcp claude --name "My Box").
# Exports: SPAWN_NAME_DISPLAY, SPAWN_NAME_KEBAB
prompt_spawn_name() {
    # Already prompted this session — nothing to do
    if [[ -n "${SPAWN_NAME_KEBAB:-}" ]]; then
        return 0
    fi

    local display_name
    if [[ -n "${SPAWN_NAME:-}" ]]; then
        display_name="${SPAWN_NAME}"
        log_info "Spawn name: ${display_name}"
    else
        echo "" >&2
        display_name=$(safe_read 'Spawn name (e.g. "My Dev Box"): ') || display_name=""
        [[ -z "${display_name}" ]] && display_name="spawn"
    fi

    local kebab
    kebab=$(_to_kebab_case "${display_name}")
    [[ -z "${kebab}" ]] && kebab="spawn"

    export SPAWN_NAME_DISPLAY="${display_name}"
    export SPAWN_NAME_KEBAB="${kebab}"
    log_info "Resource name: ${kebab}"
}

# Generic function to get resource name from environment or prompt
# Usage: get_resource_name ENV_VAR_NAME PROMPT_TEXT
# Returns: Resource name via stdout
# Example: get_resource_name "LIGHTSAIL_SERVER_NAME" "Enter Lightsail instance name: "
get_resource_name() {
    local env_var_name="${1}"
    local prompt_text="${2}"
    local resource_value="${!env_var_name}"

    # Platform-specific env var takes absolute precedence
    if [[ -n "${resource_value}" ]]; then
        log_info "Using ${prompt_text%:*} from environment: ${resource_value}"
        echo "${resource_value}"
        return 0
    fi

    # Show spawn name kebab as a pre-filled default (press Enter to accept)
    local default_name="${SPAWN_NAME_KEBAB:-}"
    local effective_prompt="${prompt_text}"
    if [[ -n "${default_name}" ]]; then
        effective_prompt="${prompt_text%:*} [${default_name}]: "
    fi

    local name
    name=$(safe_read "${effective_prompt}") || name=""
    [[ -z "${name}" && -n "${default_name}" ]] && name="${default_name}"

    if [[ -z "${name}" ]]; then
        log_error "${prompt_text%:*} is required but not provided"
        log_error ""
        log_error "For non-interactive usage, set the environment variable:"
        log_error "  ${env_var_name}=your-value spawn ..."
        return 1
    fi
    echo "${name}"
}

# Get server name from environment or prompt, with validation
# Usage: get_validated_server_name ENV_VAR_NAME PROMPT_TEXT
# Returns: Validated server name via stdout
# Example: get_validated_server_name "HETZNER_SERVER_NAME" "Enter server name: "
get_validated_server_name() {
    local server_name
    server_name=$(get_resource_name "$1" "$2") || return 1

    if ! validate_server_name "$server_name"; then
        return 1
    fi

    echo "$server_name"
}

# Interactively prompt for model ID with validation
# Usage: get_model_id_interactive [default_model] [agent_name]
# Returns: Model ID via stdout
# Example: MODEL_ID=$(get_model_id_interactive "openrouter/auto" "gptme")
get_model_id_interactive() {
    local default_model="${1:-openrouter/auto}"
    local agent_name="${2:-}"

    # If MODEL_ID is already set in the environment, validate and use it without prompting
    if [[ -n "${MODEL_ID:-}" ]]; then
        if ! validate_model_id "${MODEL_ID}"; then
            log_error "MODEL_ID environment variable contains invalid characters"
            return 1
        fi
        if ! verify_openrouter_model "${MODEL_ID}"; then
            log_warn "MODEL_ID from environment not found on OpenRouter, prompting..."
            MODEL_ID=""
        else
            echo "${MODEL_ID}"
            return 0
        fi
    fi

    local max_attempts=3 attempt=0
    while true; do
        attempt=$((attempt + 1))
        if [[ ${attempt} -gt ${max_attempts} ]]; then
            log_error "No valid model after ${max_attempts} attempts"
            return 1
        fi

        echo "" >&2
        log_info "Browse models at: https://openrouter.ai/models"
        if [[ -n "${agent_name}" ]]; then
            log_info "Which model would you like to use with ${agent_name}?"
        else
            log_info "Which model would you like to use?"
        fi

        local model_id=""
        model_id=$(safe_read "Enter model ID [${default_model}]: ") || model_id=""
        model_id="${model_id:-${default_model}}"

        if ! validate_model_id "${model_id}"; then
            log_error "Invalid characters in model ID, try again"
            continue
        fi

        if ! verify_openrouter_model "${model_id}"; then
            local confirm
            confirm=$(safe_read "Use '${model_id}' anyway? (y/N): ") || confirm=""
            if [[ "${confirm}" =~ ^[Yy]$ ]]; then
                echo "${model_id}"
                return 0
            fi
            continue
        fi

        echo "${model_id}"
        return 0
    done
}

# ============================================================
# OpenRouter authentication
# ============================================================

# Manually prompt for API key
# Prompt user for API key with format validation (max 3 attempts)
# Returns: API key via stdout on success, exits with 1 on failure
_prompt_and_validate_api_key() {
    local api_key=""
    local attempts=0
    local max_attempts=3

    while [[ -z "${api_key}" ]]; do
        attempts=$((attempts + 1))
        if [[ ${attempts} -gt ${max_attempts} ]]; then
            log_error "Too many failed attempts."
            log_error ""
            log_error "How to fix:"
            log_error "  1. Get your key from: https://openrouter.ai/settings/keys"
            log_error "  2. Set it before running spawn: export OPENROUTER_API_KEY=sk-or-v1-..."
            log_error "  3. Then re-run: spawn <agent> <cloud>"
            return 1
        fi

        api_key=$(safe_read "Enter your OpenRouter API key: ") || return 1
        [[ -n "${api_key}" ]] || { log_error "API key cannot be empty"; continue; }

        # Validate format and confirm if invalid
        if [[ ! "${api_key}" =~ ^sk-or-v1-[a-f0-9]{64}$ ]]; then
            log_warn "This doesn't look like an OpenRouter API key (expected format: sk-or-v1-...)"
            local confirm
            confirm=$(safe_read "Use this key anyway? (y/N): ") || return 1
            [[ "${confirm}" =~ ^[Yy]$ ]] && break
            api_key=""
        fi
    done

    echo "${api_key}"
}

get_openrouter_api_key_manual() {
    echo "" >&2
    log_info "Manual API Key Entry"
    printf '%b\n' "${GREEN}Get your API key from: https://openrouter.ai/settings/keys${NC}" >&2
    echo "" >&2

    _prompt_and_validate_api_key
}

# Validate port number for OAuth server
# SECURITY: Prevents injection attacks via port parameter
validate_oauth_port() {
    local port="${1}"

    # Ensure port is a valid integer
    if [[ ! "${port}" =~ ^[0-9]+$ ]]; then
        log_error "Invalid port number: '${port}' (must be numeric)"
        return 1
    fi

    # Ensure port is in valid range (1024-65535, avoiding privileged ports)
    if [[ "${port}" -lt 1024 ]] || [[ "${port}" -gt 65535 ]]; then
        log_error "Invalid port number: ${port} (must be between 1024-65535)"
        return 1
    fi

    return 0
}

# Generate OAuth callback HTML pages (success and error)
# Sets OAUTH_SUCCESS_HTML and OAUTH_ERROR_HTML variables
_generate_oauth_html() {
    local css='*{margin:0;padding:0;box-sizing:border-box}body{font-family:system-ui,-apple-system,sans-serif;display:flex;justify-content:center;align-items:center;min-height:100vh;background:#fff;color:#090a0b}@media(prefers-color-scheme:dark){body{background:#090a0b;color:#fafafa}}.card{text-align:center;max-width:400px;padding:2rem}.icon{font-size:2.5rem;margin-bottom:1rem}h1{font-size:1.25rem;font-weight:600;margin-bottom:.5rem}p{font-size:.875rem;color:#6b7280}@media(prefers-color-scheme:dark){p{color:#9ca3af}}'
    OAUTH_SUCCESS_HTML="<html><head><meta name=\"viewport\" content=\"width=device-width,initial-scale=1\"><style>${css}</style></head><body><div class=\"card\"><div class=\"icon\">&#10003;</div><h1>Authentication Successful</h1><p>You can close this tab and return to your terminal.</p></div><script>setTimeout(function(){try{window.close()}catch(e){}},3000)</script></body></html>"
    OAUTH_ERROR_HTML="<html><head><meta name=\"viewport\" content=\"width=device-width,initial-scale=1\"><style>${css}h1{color:#dc2626}@media(prefers-color-scheme:dark){h1{color:#ef4444}}</style></head><body><div class=\"card\"><div class=\"icon\">&#10007;</div><h1>Authentication Failed</h1><p>Invalid or missing state parameter (CSRF protection). Please try again.</p></div></body></html>"
}

# Validate OAuth server prerequisites (port, state token, runtime)
# Sets OAUTH_RUNTIME and OAUTH_STATE variables on success
# $1=starting_port $2=state_file
_validate_oauth_server_args() {
    local starting_port="${1}"
    local state_file="${2}"

    OAUTH_RUNTIME=$(find_node_runtime) || { log_warn "No Node.js runtime found"; return 1; }

    # SECURITY: Validate port number to prevent injection
    if ! validate_oauth_port "${starting_port}"; then
        log_error "OAuth server port validation failed"
        return 1
    fi

    # SECURITY: Read CSRF state token for validation
    OAUTH_STATE=$(cat "${state_file}" 2>/dev/null || echo "")
    if [[ -z "${OAUTH_STATE}" ]]; then
        log_error "CSRF state token file is missing or empty"
        return 1
    fi
}

# Generate the Node.js script for the OAuth callback server
# $1=expected_state $2=success_html $3=error_html $4=code_file $5=port_file $6=starting_port
_generate_oauth_server_script() {
    local expected_state="${1}" success_html="${2}" error_html="${3}"
    local code_file="${4}" port_file="${5}" starting_port="${6}"

    # SECURITY: Escape single quotes in all parameters to prevent injection
    # When parameters are embedded in the Node.js script string, unescaped quotes
    # could break out of the string context and execute arbitrary code
    expected_state="${expected_state//\'/\\\'}"
    success_html="${success_html//\'/\\\'}"
    error_html="${error_html//\'/\\\'}"
    code_file="${code_file//\'/\\\'}"
    port_file="${port_file//\'/\\\'}"

    printf '%s' "
const http = require('http');
const fs = require('fs');
const url = require('url');
const expectedState = '${expected_state}';
const html = '${success_html}';
const errorHtml = '${error_html}';
const server = http.createServer((req, res) => {
  const parsed = url.parse(req.url, true);
  if (parsed.pathname === '/callback' && parsed.query.code) {
    if (!parsed.query.state || parsed.query.state !== expectedState) {
      res.writeHead(403, {'Content-Type':'text/html','Connection':'close'});
      res.end(errorHtml);
      setTimeout(() => { server.close(); process.exit(1); }, 500);
      return;
    }
    // SECURITY: Validate OAuth code format before writing to file
    // OpenRouter OAuth codes are alphanumeric with hyphens/underscores, typically 32-64 chars
    const code = String(parsed.query.code || '');
    if (!/^[a-zA-Z0-9_-]{16,128}$/.test(code)) {
      res.writeHead(400, {'Content-Type':'text/html','Connection':'close'});
      res.end('<html><body><h1>Invalid OAuth Code</h1><p>The authorization code format is invalid.</p></body></html>');
      setTimeout(() => { server.close(); process.exit(1); }, 500);
      return;
    }
    fs.writeFileSync('${code_file}', code);
    res.writeHead(200, {'Content-Type':'text/html','Connection':'close'});
    res.end(html);
    setTimeout(() => { server.close(); process.exit(0); }, 500);
  } else {
    res.writeHead(200, {'Content-Type':'text/html'});
    res.end('<html><body>Waiting for OAuth callback...</body></html>');
  }
});
let currentPort = ${starting_port};
const maxPort = ${starting_port} + 10;
function tryListen() {
  server.listen(currentPort, '127.0.0.1', () => {
    fs.writeFileSync('${port_file}', currentPort.toString());
    fs.writeFileSync('/dev/fd/1', '');
  });
}
server.on('error', (err) => {
  if (err.code === 'EADDRINUSE' && currentPort < maxPort) {
    currentPort++;
    tryListen();
  } else {
    process.exit(1);
  }
});
setTimeout(() => process.exit(0), 300000);
tryListen();
"
}

# Start OAuth callback server using Node.js/Bun HTTP server
# Proper HTTP server — handles multiple connections, favicon requests, etc.
# Tries a range of ports if the initial port is busy
# $1=starting_port $2=code_file $3=port_file (writes actual port used) $4=state_file (CSRF token)
# Returns: server PID
# SECURITY: Validates port number and CSRF state parameter
start_oauth_server() {
    local starting_port="${1}"
    local code_file="${2}"
    local port_file="${3}"
    local state_file="${4}"

    _validate_oauth_server_args "${starting_port}" "${state_file}" || return 1

    _generate_oauth_html
    local script
    script=$(_generate_oauth_server_script "${OAUTH_STATE}" "${OAUTH_SUCCESS_HTML}" "${OAUTH_ERROR_HTML}" \
        "${code_file}" "${port_file}" "${starting_port}")

    "${OAUTH_RUNTIME}" -e "${script}" </dev/null >/dev/null 2>&1 &

    echo $!
}

# Wait for OAuth code with timeout, returns 0 if code received
wait_for_oauth_code() {
    local code_file="${1}"
    local timeout="${2:-120}"
    local elapsed=0

    log_step "Waiting for authentication in browser (this usually takes 10-30 seconds, timeout: ${timeout}s)..."
    while [[ ! -f "${code_file}" ]] && [[ ${elapsed} -lt ${timeout} ]]; do
        sleep "${POLL_INTERVAL}"
        # Use python3 for float addition since bash arithmetic only handles integers
        # If POLL_INTERVAL is 0.5, bash $(( )) would fail. Fallback keeps timeout working.
        if command -v python3 &>/dev/null; then
            elapsed=$(python3 -c "print(int(${elapsed} + ${POLL_INTERVAL}))" 2>/dev/null || echo "$((elapsed + 1))")
        else
            # No python3 available - fall back to integer seconds (may timeout early with fractional POLL_INTERVAL)
            elapsed=$((elapsed + 1))
        fi
    done

    [[ -f "${code_file}" ]]
}

# Exchange OAuth code for API key
exchange_oauth_code() {
    local oauth_code="${1}"

    # SECURITY: Use json_escape to prevent JSON injection via crafted OAuth codes
    local escaped_code
    escaped_code=$(json_escape "${oauth_code}")

    local key_response curl_exit
    key_response=$(curl -s --max-time 30 -X POST "https://openrouter.ai/api/v1/auth/keys" \
        -H "Content-Type: application/json" \
        -d "{\"code\": ${escaped_code}}" 2>&1)
    curl_exit=$?

    if [[ ${curl_exit} -ne 0 ]]; then
        log_error "Failed to contact OpenRouter API (curl exit code: ${curl_exit})"
        log_warn "This may indicate a network issue or temporary service outage"
        log_warn "Please check your internet connection and try again"
        return 1
    fi

    local api_key
    api_key=$(echo "${key_response}" | grep -o '"key":"[^"]*"' | sed 's/"key":"//;s/"$//')

    if [[ -z "${api_key}" ]]; then
        log_error "Failed to exchange OAuth code for API key"
        log_warn "Server response: ${key_response}"
        log_warn "This may indicate the OAuth code expired or was already used"
        log_warn "Please try again, or set OPENROUTER_API_KEY manually"
        return 1
    fi

    echo "${api_key}"
}

# Clean up OAuth session resources
cleanup_oauth_session() {
    local server_pid="${1}"
    local oauth_dir="${2}"

    if [[ -n "${server_pid}" ]]; then
        # Verify PID still exists before killing to prevent race conditions
        if kill -0 "${server_pid}" 2>/dev/null; then
            # Kill process group to catch any child processes (netcat listeners, etc)
            kill -TERM "-${server_pid}" 2>/dev/null || kill "${server_pid}" 2>/dev/null || true
            # Give it time to shut down gracefully
            sleep 0.5
            # Force kill if still running
            kill -KILL "-${server_pid}" 2>/dev/null || true
            wait "${server_pid}" 2>/dev/null || true
        fi
    fi

    # SAFETY: Validate path before rm -rf to prevent accidental deletion of system directories
    # Only delete if:
    # 1. Variable is non-empty
    # 2. Directory exists
    # 3. Path starts with /tmp/ (mktemp always creates in /tmp)
    # 4. Path contains more than just /tmp (prevent rm -rf /tmp)
    if [[ -n "${oauth_dir}" && -d "${oauth_dir}" && "${oauth_dir}" == /tmp/* && "${oauth_dir}" != "/tmp" && "${oauth_dir}" != "/tmp/" ]]; then
        rm -rf "${oauth_dir}"
    fi
}

# Check network connectivity to OpenRouter
# Returns 0 if reachable, 1 if network is unreachable
check_openrouter_connectivity() {
    local host="openrouter.ai"
    local port="443"
    local timeout=5

    # Try curl with short timeout if available
    if command -v curl &> /dev/null; then
        if curl -s --connect-timeout "${timeout}" --max-time "${timeout}" "https://${host}" -o /dev/null 2>/dev/null; then
            return 0
        fi
    fi

    # Fallback to nc/telnet test
    if command -v nc &> /dev/null; then
        if timeout "${timeout}" nc -z "${host}" "${port}" 2>/dev/null; then
            return 0
        fi
    elif command -v timeout &> /dev/null && command -v bash &> /dev/null; then
        # Bash TCP socket test as last resort
        if timeout "${timeout}" bash -c "exec 3<>/dev/tcp/${host}/${port}" 2>/dev/null; then
            return 0
        fi
    fi

    return 1
}

# Start OAuth server and wait for it to be ready
# Returns: "port_number" on success, "" on failure (cleanup handled by caller)
start_and_verify_oauth_server() {
    local callback_port="${1}"
    local code_file="${2}"
    local port_file="${3}"
    local state_file="${4}"
    local server_pid="${5}"

    sleep "${POLL_INTERVAL}"
    if ! kill -0 "${server_pid}" 2>/dev/null; then
        log_warn "Failed to start OAuth server - ports ${callback_port}-$((callback_port + 10)) may be in use"
        log_warn "Try closing other dev servers or set OPENROUTER_API_KEY to skip OAuth"
        return 1
    fi

    # Wait for port file to be created (server successfully bound to a port)
    local wait_count=0
    while [[ ! -f "${port_file}" ]] && [[ ${wait_count} -lt 10 ]]; do
        sleep 0.2
        wait_count=$((wait_count + 1))
    done

    if [[ ! -f "${port_file}" ]]; then
        log_warn "OAuth server failed to allocate a port after 2 seconds"
        log_warn "Another process may be using ports ${callback_port}-$((callback_port + 10))"
        return 1
    fi

    cat "${port_file}"
}

# Validate OAuth prerequisites (network, Node.js runtime)
# Returns 0 if all checks pass, 1 otherwise
_check_oauth_prerequisites() {
    if ! check_openrouter_connectivity; then
        log_warn "Cannot reach openrouter.ai - network may be unavailable"
        log_warn "Please check your internet connection and try again"
        log_warn "Alternatively, set OPENROUTER_API_KEY in your environment to skip OAuth"
        return 1
    fi

    local runtime
    runtime=$(find_node_runtime)
    if [[ -z "${runtime}" ]]; then
        log_warn "No Node.js runtime (bun/node) found - required for the OAuth callback server"
        log_warn "Install one with: brew install node  OR  curl -fsSL https://bun.sh/install | bash"
        return 1
    fi

    return 0
}

# Start OAuth server and return actual port, cleanup on failure
# Sets server_pid and returns 0 on success, 1 on failure
_setup_oauth_server() {
    local callback_port="${1}"
    local code_file="${2}"
    local port_file="${3}"
    local state_file="${4}"
    local pid_file="${5}"

    log_step "Starting local OAuth server (trying ports ${callback_port}-$((callback_port + 10)))..."
    local server_pid
    server_pid=$(start_oauth_server "${callback_port}" "${code_file}" "${port_file}" "${state_file}")

    # Persist server PID to file for reliable retrieval
    if [[ -n "${pid_file}" && -n "${server_pid}" ]]; then
        printf '%s' "${server_pid}" > "${pid_file}"
    fi

    local actual_port
    actual_port=$(start_and_verify_oauth_server "${callback_port}" "${code_file}" "${port_file}" "${state_file}" "${server_pid}")
    if [[ -z "${actual_port}" ]]; then
        return 1
    fi

    log_info "OAuth server listening on port ${actual_port}"
    echo "${actual_port}"
    return 0
}

# Wait for OAuth code with timeout and cleanup on failure
# Returns 0 on success, 1 on failure
_wait_for_oauth() {
    local code_file="${1}"

    if ! wait_for_oauth_code "${code_file}" 120; then
        log_warn "OAuth timeout - no response received"
        return 1
    fi
    return 0
}

# Try OAuth flow (orchestrates the helper functions above)
# SECURITY: Generates CSRF state token to prevent OAuth code interception
_generate_csrf_state() {
    if command -v openssl &>/dev/null; then
        openssl rand -hex 16
    elif [[ -r /dev/urandom ]]; then
        od -An -N16 -tx1 /dev/urandom | tr -d ' \n'
    else
        log_error "Cannot generate secure CSRF token: neither openssl nor /dev/urandom available"
        log_error "Install openssl or ensure /dev/urandom is readable"
        return 1
    fi
}

# Create temp directory with OAuth session files and CSRF state
_init_oauth_session() {
    local oauth_dir
    oauth_dir=$(mktemp -d) || {
        log_error "Failed to create temporary directory for OAuth session"
        log_error "Check disk space and /tmp permissions"
        return 1
    }

    # SAFETY: Verify mktemp succeeded before proceeding
    if [[ -z "${oauth_dir}" || ! -d "${oauth_dir}" ]]; then
        log_error "Failed to create temporary directory for OAuth session"
        log_error "Check disk space and /tmp permissions"
        return 1
    fi

    # SECURITY: Generate random CSRF state token (32 hex chars = 128 bits)
    local csrf_state
    csrf_state=$(_generate_csrf_state)
    printf '%s' "${csrf_state}" > "${oauth_dir}/state" || {
        rm -rf "${oauth_dir}"
        log_error "Failed to write OAuth state file"
        return 1
    }
    chmod 600 "${oauth_dir}/state"

    echo "${oauth_dir}"
}

# Open browser and wait for OAuth callback, returning the auth code
# Outputs the OAuth code on success, returns 1 on timeout
_await_oauth_callback() {
    local code_file="${1}"
    local server_pid="${2}"
    local oauth_dir="${3}"
    local actual_port="${4}"
    local csrf_state="${5}"

    local callback_url="http://localhost:${actual_port}/callback"
    local auth_url="https://openrouter.ai/auth?callback_url=${callback_url}&state=${csrf_state}"
    log_step "Opening browser to authenticate with OpenRouter..."
    open_browser "${auth_url}"

    if ! _wait_for_oauth "${code_file}"; then
        cleanup_oauth_session "${server_pid}" "${oauth_dir}"
        log_error "OAuth authentication timed out after 120 seconds"
        log_error ""
        log_error "The authentication flow was not completed in time."
        log_error ""
        log_error "Troubleshooting:"
        log_error "  1. Check if your browser opened to openrouter.ai"
        log_error "  2. Complete the authentication and allow the redirect"
        log_error "  3. Ensure port ${actual_port} is not blocked by firewall/proxy"
        log_error ""
        log_error "Alternative: Use a manual API key instead"
        log_error "  export OPENROUTER_API_KEY=sk-or-v1-..."
        log_error "  Get a key at: https://openrouter.ai/settings/keys"
        return 1
    fi

    cat "${code_file}"
}

# Helper: Start OAuth server and get session details
# Returns: "port|pid|oauth_dir" on success, "" on failure
_start_oauth_session_with_server() {
    local callback_port="${1}"

    local oauth_dir
    oauth_dir=$(_init_oauth_session)
    local code_file="${oauth_dir}/code"
    local pid_file="${oauth_dir}/server_pid"

    local actual_port
    actual_port=$(_setup_oauth_server "${callback_port}" "${code_file}" "${oauth_dir}/port" "${oauth_dir}/state" "${pid_file}") || {
        cleanup_oauth_session "" "${oauth_dir}"
        return 1
    }

    local server_pid
    server_pid=$(cat "${pid_file}" 2>/dev/null || echo "")
    if [[ -z "${server_pid}" ]]; then
        log_error "Failed to retrieve OAuth server PID"
        cleanup_oauth_session "" "${oauth_dir}"
        return 1
    fi

    echo "${actual_port}|${server_pid}|${oauth_dir}"
}

try_oauth_flow() {
    local callback_port=${1:-5180}

    log_step "Attempting OAuth authentication..."

    if ! _check_oauth_prerequisites; then
        return 1
    fi

    local session_info
    session_info=$(_start_oauth_session_with_server "${callback_port}") || return 1

    local actual_port server_pid oauth_dir
    IFS='|' read -r actual_port server_pid oauth_dir <<< "${session_info}"

    local csrf_state
    csrf_state=$(cat "${oauth_dir}/state")

    # Open browser and wait for callback
    local oauth_code
    oauth_code=$(_await_oauth_callback "${oauth_dir}/code" "${server_pid}" "${oauth_dir}" "${actual_port}" "${csrf_state}") || return 1
    cleanup_oauth_session "${server_pid}" "${oauth_dir}"

    # Exchange code for API key
    log_step "Exchanging OAuth code for API key..."
    local api_key
    api_key=$(exchange_oauth_code "${oauth_code}") || return 1

    log_info "Successfully obtained OpenRouter API key via OAuth!"
    echo "${api_key}"
}

# Main function: Try OAuth, fallback to manual entry
get_openrouter_api_key_oauth() {
    local callback_port=${1:-5180}

    # Try OAuth flow first
    local api_key
    api_key=$(try_oauth_flow "${callback_port}")

    if [[ -n "${api_key}" ]]; then
        echo "${api_key}"
        return 0
    fi

    # OAuth failed, offer manual entry
    echo "" >&2
    log_warn "Browser-based OAuth login was not completed."
    log_warn "This is normal on remote servers, SSH sessions, or headless environments."
    log_info "You can paste an API key instead. Create one at: https://openrouter.ai/settings/keys"
    echo "" >&2
    local manual_choice
    manual_choice=$(safe_read "Paste your API key manually? (Y/n): ") || {
        log_error "Cannot prompt for manual entry in non-interactive mode"
        log_warn "Set OPENROUTER_API_KEY environment variable before running spawn"
        return 1
    }

    if [[ "${manual_choice}" =~ ^[Nn]$ ]]; then
        log_error "Authentication cancelled. An OpenRouter API key is required to use spawn."
        log_warn "To authenticate, either:"
        log_warn "  - Re-run this command and complete the OAuth flow in your browser"
        log_warn "  - Set OPENROUTER_API_KEY=sk-or-v1-... before running spawn"
        log_warn "  - Create a key at: https://openrouter.ai/settings/keys"
        return 1
    fi

    api_key=$(get_openrouter_api_key_manual)
    echo "${api_key}"
}

# ============================================================
# Environment injection helpers
# ============================================================

# Generate environment variable config content
# Usage: generate_env_config KEY1=val1 KEY2=val2 ...
# Outputs the env config to stdout
# SECURITY: Values are single-quoted to prevent shell injection when sourced.
# Single quotes prevent all interpretation of special characters ($, `, \, etc.)
generate_env_config() {
    echo ""
    echo "# [spawn:env]"
    # All spawn environments are disposable cloud VMs — mark as sandbox
    echo "export IS_SANDBOX='1'"
    for env_pair in "$@"; do
        local key="${env_pair%%=*}"
        local value="${env_pair#*=}"

        # SECURITY: Validate environment variable names to prevent injection
        # Only allow uppercase letters, numbers, and underscores (standard env var format)
        if [[ ! "${key}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
            log_error "SECURITY: Invalid environment variable name rejected: ${key}"
            continue
        fi

        # Escape any single quotes in the value: replace ' with '\''
        local escaped_value="${value//\'/\'\\\'\'}"
        echo "export ${key}='${escaped_value}'"
    done
}

# Inject environment variables into remote server's shell config (SSH-based clouds)
# Usage: inject_env_vars_ssh SERVER_IP UPLOAD_FUNC RUN_FUNC KEY1=val1 KEY2=val2 ...
# Example: inject_env_vars_ssh "$DO_SERVER_IP" upload_file run_server \
#            "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \
#            "ANTHROPIC_BASE_URL=https://openrouter.ai/api"
inject_env_vars_ssh() {
    local server_ip="${1}"
    local upload_func="${2}"
    local run_func="${3}"
    shift 3

    local env_temp
    env_temp=$(mktemp)
    chmod 600 "${env_temp}"
    track_temp_file "${env_temp}"

    generate_env_config "$@" > "${env_temp}"

    # SECURITY: Use unpredictable temp file name to prevent race condition
    # Attacker could create symlink at /tmp/env_config to exfiltrate credentials
    local rand_suffix
    rand_suffix=$(basename "${env_temp}")
    local temp_remote="/tmp/spawn_env_${rand_suffix}"

    # Append to .bashrc and .zshrc only — do NOT write to .profile or .bash_profile
    "${upload_func}" "${server_ip}" "${env_temp}" "${temp_remote}"
    "${run_func}" "${server_ip}" "cat '${temp_remote}' >> ~/.bashrc && cat '${temp_remote}' >> ~/.zshrc && rm '${temp_remote}'"

    # Note: temp file will be cleaned up by trap handler

    # Offer optional GitHub CLI setup
    offer_github_auth "${run_func} ${server_ip}"
}

# Inject environment variables for providers without SSH (modal, e2b, sprite)
# For providers where upload_file and run_server don't take server_ip as first arg
# Usage: inject_env_vars_local upload_file run_server KEY1=VAL1 KEY2=VAL2 ...
# Example: inject_env_vars_local upload_file run_server \
#            "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \
#            "ANTHROPIC_BASE_URL=https://openrouter.ai/api"
inject_env_vars_local() {
    local upload_func="${1}"
    local run_func="${2}"
    shift 2

    local env_temp
    env_temp=$(mktemp)
    chmod 600 "${env_temp}"
    track_temp_file "${env_temp}"

    generate_env_config "$@" > "${env_temp}"

    # SECURITY: Use unpredictable temp file name to prevent race condition
    local rand_suffix
    rand_suffix=$(basename "${env_temp}")
    local temp_remote="/tmp/spawn_env_${rand_suffix}"

    # Append to .bashrc and .zshrc only
    "${upload_func}" "${env_temp}" "${temp_remote}"
    "${run_func}" "cat '${temp_remote}' >> ~/.bashrc && cat '${temp_remote}' >> ~/.zshrc && rm '${temp_remote}'"

    # Note: temp file will be cleaned up by trap handler

    # Offer optional GitHub CLI setup
    offer_github_auth "${run_func}"
}

# Prompt user about GitHub CLI setup BEFORE provisioning.
# Stores the answer so the actual install can happen later (after the
# server is up) without re-prompting.
# Usage: prompt_github_auth   (call before create_server)
prompt_github_auth() {
    SPAWN_GITHUB_AUTH_PROMPTED=1

    # Skip in non-interactive or if user opted out
    if [[ -n "${SPAWN_SKIP_GITHUB_AUTH:-}" ]]; then
        return 0
    fi

    printf '\n'
    local choice
    choice=$(safe_read "Set up GitHub CLI (gh) on this machine? (y/N): ") || return 0
    if [[ "${choice}" =~ ^[Yy]$ ]]; then
        SPAWN_GITHUB_AUTH_REQUESTED=1

        # Capture local GitHub token for passthrough to remote VM
        if [[ -n "${GITHUB_TOKEN:-}" ]]; then
            SPAWN_GITHUB_TOKEN="${GITHUB_TOKEN}"
        elif command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then
            SPAWN_GITHUB_TOKEN="$(gh auth token 2>/dev/null)" || true
        fi
    fi
}

# Run GitHub CLI setup on remote VM if previously requested via prompt_github_auth.
# If prompt_github_auth was never called, falls back to prompting interactively.
# Usage (SSH clouds): offer_github_auth "run_server SERVER_IP"
# Usage (local):      offer_github_auth "run_server"
offer_github_auth() {
    local run_callback="${1}"

    # Skip if user opted out via env var
    if [[ -n "${SPAWN_SKIP_GITHUB_AUTH:-}" ]]; then
        return 0
    fi

    # Build the remote command with optional token export.
    # Prefer the local copy (running from a checkout) so fixes don't wait for
    # a merge to main. Base64-encode it for safe inline transport.
    local gh_cmd
    local _local_gh="${SCRIPT_DIR:-}/../../shared/github-auth.sh"
    if [[ -n "${SCRIPT_DIR:-}" && -f "${_local_gh}" && ! -L "${_local_gh}" ]]; then
        local _gh_b64
        _gh_b64=$(base64 < "${_local_gh}" | tr -d '\n')
        gh_cmd="printf '%s' '${_gh_b64}' | base64 -d | bash"
    else
        gh_cmd="curl -fsSL https://raw.githubusercontent.com/OpenRouterTeam/spawn/main/shared/github-auth.sh | bash"
    fi
    if [[ -n "${SPAWN_GITHUB_TOKEN:-}" ]]; then
        local escaped_token
        escaped_token=$(printf '%q' "${SPAWN_GITHUB_TOKEN}")
        gh_cmd="export GITHUB_TOKEN=${escaped_token}; ${gh_cmd}"
    fi

    # If prompt_github_auth was already called, use its stored answer
    if [[ "${SPAWN_GITHUB_AUTH_PROMPTED:-}" == "1" ]]; then
        if [[ "${SPAWN_GITHUB_AUTH_REQUESTED:-}" == "1" ]]; then
            log_step "Installing and authenticating GitHub CLI..."
            ${run_callback} "${gh_cmd}" || log_warn "GitHub CLI setup failed (non-fatal, continuing)"
        fi
        return 0
    fi

    # Fallback: prompt_github_auth was never called, ask now
    printf '\n'
    local choice
    choice=$(safe_read "Set up GitHub CLI (gh) on this machine? (y/N): ") || return 0
    if [[ ! "${choice}" =~ ^[Yy]$ ]]; then
        return 0
    fi

    # Attempt token capture in fallback path too
    if [[ -z "${SPAWN_GITHUB_TOKEN:-}" ]]; then
        if [[ -n "${GITHUB_TOKEN:-}" ]]; then
            SPAWN_GITHUB_TOKEN="${GITHUB_TOKEN}"
        elif command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then
            SPAWN_GITHUB_TOKEN="$(gh auth token 2>/dev/null)" || true
        fi
        if [[ -n "${SPAWN_GITHUB_TOKEN:-}" ]]; then
            local escaped_token
            escaped_token=$(printf '%q' "${SPAWN_GITHUB_TOKEN}")
            gh_cmd="export GITHUB_TOKEN=${escaped_token}; ${gh_cmd}"
        fi
    fi

    log_step "Installing and authenticating GitHub CLI..."
    ${run_callback} "${gh_cmd}" || log_warn "GitHub CLI setup failed (non-fatal, continuing)"
}

# ============================================================
# Resource cleanup trap handlers
# ============================================================

# Array to track temporary files for cleanup
CLEANUP_TEMP_FILES=()

# Track a temporary file for cleanup on exit
# Usage: track_temp_file PATH
track_temp_file() {
    local temp_file="${1}"
    CLEANUP_TEMP_FILES+=("${temp_file}")
}

# Cleanup function for temporary files
# Called automatically on EXIT, INT, TERM signals
cleanup_temp_files() {
    local exit_code=$?

    for temp_file in "${CLEANUP_TEMP_FILES[@]}"; do
        if [[ -f "${temp_file}" ]]; then
            # Securely remove temp files (may contain credentials)
            shred -f -u "${temp_file}" 2>/dev/null || rm -f "${temp_file}"
        fi
    done

    return "${exit_code}"
}

# Register cleanup trap handler
# Call this at the start of scripts that create temp files
register_cleanup_trap() {
    trap cleanup_temp_files EXIT INT TERM
}

# ============================================================
# Agent setup helpers (composable, callback-based)
# ============================================================
# These helpers accept pre-applied RUN/UPLOAD/SESSION callbacks,
# following the same callback pattern used by offer_github_auth
# and setup_claude_code_config.
#
# Usage pattern in agent scripts:
#   RUN="run_server ${SERVER_IP}"
#   UPLOAD="upload_file ${SERVER_IP}"
#   SESSION="interactive_session ${SERVER_IP}"
#
#   install_agent "Codex" "npm install -g @openai/codex" "$RUN"
#   verify_agent "Codex" "command -v codex" "npm install -g @openai/codex" "$RUN"
#   get_or_prompt_api_key
#   inject_env_vars_cb "$RUN" "$UPLOAD" "OPENROUTER_API_KEY=${OPENROUTER_API_KEY}"
#   launch_session "Hetzner server" "$SESSION" "source ~/.zshrc && codex"

# Run an agent's install command on the target machine
# Usage: install_agent AGENT_NAME INSTALL_CMD RUN_CB
install_agent() {
    local agent_name="$1" install_cmd="$2" run_cb="$3"
    log_step "Installing ${agent_name}..."
    # Pass the raw command to the run callback — do NOT use printf '%q' + bash -c
    # here. The run callback (run_server, run_sprite, ssh) already handles escaping
    # for remote transport. Double-escaping breaks shell operators (&&, ||, >, |)
    # inside install commands.
    if ! ${run_cb} "${install_cmd}"; then
        log_install_failed "${agent_name}" "${install_cmd}"
        return 1
    fi
    log_info "${agent_name} installation completed"
}

# Verify an agent installed correctly; exit 1 on failure
# Usage: verify_agent AGENT_NAME VERIFY_CMD INSTALL_CMD RUN_CB
verify_agent() {
    local agent_name="$1" verify_cmd="$2" install_cmd="$3" run_cb="$4"
    if ! ${run_cb} "${verify_cmd}" >/dev/null 2>&1; then
        log_install_failed "${agent_name}" "${install_cmd}"
        exit 1
    fi
    log_info "${agent_name} installation verified successfully"
}

# Install Claude Code with multi-method fallback and detailed error reporting.
# Tries: 1) curl installer (standalone binary)  2) bun  3) npm
# The curl installer bundles its own runtime. npm/bun install a Node.js package
# whose shebang needs 'node', so we ensure a node runtime exists after those.
# Usage: install_claude_code RUN_CB
_finalize_claude_install() {
    local run_cb="$1"
    local claude_path="$2"
    log_step "Setting up Claude Code shell integration..."
    ${run_cb} "${claude_path} && claude install --force" >/dev/null 2>&1 || true
    # Write claude PATH to .bashrc and .zshrc
    ${run_cb} "for rc in ~/.bashrc ~/.zshrc; do grep -q '.claude/local/bin' \"\$rc\" 2>/dev/null || printf '\\n# Claude Code PATH\\nexport PATH=\"\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH\"\\n' >> \"\$rc\"; done" >/dev/null 2>&1 || true
}

_verify_claude_installed() {
    local run_cb="$1"
    local claude_path="$2"
    ${run_cb} "${claude_path} && command -v claude" >/dev/null 2>&1
}

_install_via_curl() {
    local run_cb="$1"
    local claude_path="$2"
    log_step "Installing Claude Code (method 1/2: curl installer)..."
    if ${run_cb} "curl -fsSL https://claude.ai/install.sh | bash" 2>&1; then
        if _verify_claude_installed "$run_cb" "$claude_path"; then
            log_info "Claude Code installed via curl installer"
            _finalize_claude_install "$run_cb" "$claude_path"
            return 0
        fi
        log_warn "curl installer exited 0 but claude not found on PATH"
    else
        log_warn "curl installer failed (site may be temporarily unavailable)"
    fi
    return 1
}

_ensure_nodejs_runtime() {
    local run_cb="$1"
    local claude_path="$2"
    if ! ${run_cb} "${claude_path} && command -v node" >/dev/null 2>&1; then
        log_step "Installing Node.js runtime (required for claude package)..."
        if ${run_cb} "apt-get install -y nodejs npm && npm install -g n && n 22 && ln -sf /usr/local/bin/node /usr/bin/node && ln -sf /usr/local/bin/npm /usr/bin/npm && ln -sf /usr/local/bin/npx /usr/bin/npx" >/dev/null 2>&1; then
            log_info "Node.js installed via n"
        else
            log_warn "Could not install Node.js - bun method may fail"
        fi
    fi
}

_install_via_bun() {
    local run_cb="$1"
    local claude_path="$2"
    log_step "Installing Claude Code (method 2/2: bun)..."
    if ${run_cb} "${claude_path} && bun i -g @anthropic-ai/claude-code 2>&1" 2>&1; then
        if _verify_claude_installed "$run_cb" "$claude_path"; then
            log_info "Claude Code installed via bun"
            _finalize_claude_install "$run_cb" "$claude_path"
            return 0
        fi
        log_warn "bun install exited 0 but claude binary not found"
    else
        log_warn "bun install failed"
    fi
    return 1
}

install_claude_code() {
    local run_cb="$1"
    local claude_path='export PATH=$HOME/.claude/local/bin:$HOME/.local/bin:$HOME/.bun/bin:$PATH'

    # Clean up ~/.bash_profile if it was created by a previous broken deployment.
    ${run_cb} "if [ -f ~/.bash_profile ] && grep -q 'spawn:env\|Claude Code PATH\|spawn:path' ~/.bash_profile 2>/dev/null; then rm -f ~/.bash_profile; fi" >/dev/null 2>&1 || true

    # Already installed?
    if _verify_claude_installed "$run_cb" "$claude_path"; then
        log_info "Claude Code already installed"
        _finalize_claude_install "$run_cb" "$claude_path"
        return 0
    fi

    # Try curl installer first
    if _install_via_curl "$run_cb" "$claude_path"; then
        return 0
    fi

    # Ensure Node.js runtime for bun method
    _ensure_nodejs_runtime "$run_cb" "$claude_path"

    # Try bun installer
    if _install_via_bun "$run_cb" "$claude_path"; then
        return 0
    fi

    # All methods failed
    log_install_failed "Claude Code" "curl -fsSL https://claude.ai/install.sh | bash"
    exit 1
}

# Get OpenRouter API key from environment or prompt via OAuth
# Sets the global OPENROUTER_API_KEY variable
get_or_prompt_api_key() {
    echo ""
    if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
        log_info "Using OpenRouter API key from environment"
        if ! verify_openrouter_key "${OPENROUTER_API_KEY}"; then
            log_warn "Environment key failed validation, prompting for a new one..."
            OPENROUTER_API_KEY=""
        fi
    fi

    local max_attempts=3 attempt=0
    while [[ -z "${OPENROUTER_API_KEY:-}" ]]; do
        attempt=$((attempt + 1))
        if [[ ${attempt} -gt ${max_attempts} ]]; then
            log_error "No valid API key after ${max_attempts} attempts"
            exit 1
        fi
        OPENROUTER_API_KEY=$(get_openrouter_api_key_oauth 5180) || true
        if [[ -n "${OPENROUTER_API_KEY:-}" ]] && ! verify_openrouter_key "${OPENROUTER_API_KEY}"; then
            OPENROUTER_API_KEY=""
        fi
    done
}

# Inject environment variables using pre-applied callbacks
# Usage: inject_env_vars_cb RUN_CB UPLOAD_CB KEY1=val1 KEY2=val2 ...
# Example: inject_env_vars_cb "$RUN" "$UPLOAD" \
#            "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \
#            "ANTHROPIC_BASE_URL=https://openrouter.ai/api"
inject_env_vars_cb() {
    local run_cb="$1" upload_cb="$2"
    shift 2

    log_step "Setting up environment variables..."

    local env_temp
    env_temp=$(mktemp)
    chmod 600 "${env_temp}"
    track_temp_file "${env_temp}"

    generate_env_config "$@" > "${env_temp}"

    # SECURITY: Use unpredictable temp file name to prevent race condition
    local rand_suffix
    rand_suffix=$(basename "${env_temp}")
    local temp_remote="/tmp/spawn_env_${rand_suffix}"

    ${upload_cb} "${env_temp}" "${temp_remote}"
    ${run_cb} "cat '${temp_remote}' >> ~/.bashrc && cat '${temp_remote}' >> ~/.zshrc && rm '${temp_remote}'"

    # Offer optional GitHub CLI setup
    offer_github_auth "${run_cb}"
}

# Print success message and launch an interactive agent session
# Usage: launch_session CLOUD_MSG SESSION_CB LAUNCH_CMD
launch_session() {
    local cloud_msg="$1" session_cb="$2" launch_cmd="$3"
    echo ""
    log_info "${cloud_msg} setup completed successfully!"
    echo ""
    log_step "Starting agent..."
    sleep 1
    clear 2>/dev/null || true
    ${session_cb} "${launch_cmd}"
}

# ============================================================
# Cloud adapter runner (spawn_agent)
# ============================================================
# Orchestrates the standard agent deployment flow using cloud_* adapter
# functions. Agent scripts define hooks (agent_install, agent_env_vars,
# agent_launch_cmd, etc.) and call spawn_agent to run them.
#
# Required cloud_* functions (defined in {cloud}/lib/common.sh):
#   cloud_authenticate, cloud_provision, cloud_wait_ready,
#   cloud_run, cloud_upload, cloud_interactive, cloud_label
#
# Required agent hooks:
#   agent_env_vars   — print env config lines to stdout (via generate_env_config)
#   agent_launch_cmd — print the shell command to launch the agent
#
# Optional agent hooks:
#   agent_pre_provision — run before provisioning (e.g., prompt_github_auth)
#   agent_install       — install the agent on the server
#   agent_configure     — agent-specific config (settings files, etc.)
#   agent_save_connection — save connection info for `spawn list`
#   agent_pre_launch    — run before launching (e.g., start daemon)
#
# Optional agent variables:
#   AGENT_MODEL_PROMPT  — if set, prompt for model selection
#   AGENT_MODEL_DEFAULT — default model ID (default: openrouter/auto)

# Check if a function is defined (bash 3.2 compatible)
_fn_exists() { type "$1" 2>/dev/null | head -1 | grep -q 'function'; }

# Inject env vars using cloud_* adapter functions
_spawn_inject_env_vars() {
    log_step "Setting up environment variables..."
    local env_temp
    env_temp=$(mktemp)
    chmod 600 "${env_temp}"
    track_temp_file "${env_temp}"

    agent_env_vars > "${env_temp}"

    # SECURITY: Use unpredictable temp file name to prevent symlink attacks
    local rand_suffix
    rand_suffix=$(basename "${env_temp}")
    local temp_remote="/tmp/spawn_env_${rand_suffix}"

    cloud_upload "${env_temp}" "${temp_remote}"

    # Write env vars to ~/.spawnrc instead of inlining into .bashrc/.zshrc.
    # Ubuntu's default .bashrc has an interactive-shell guard that exits early —
    # anything appended after the guard is never loaded when SSH runs a command string.
    cloud_run "cp '${temp_remote}' ~/.spawnrc && chmod 600 ~/.spawnrc && rm '${temp_remote}'"

    # Hook .spawnrc into .bashrc and .zshrc so interactive shells pick up the vars too
    cloud_run "grep -q 'source ~/.spawnrc' ~/.bashrc 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> ~/.bashrc" || log_warn "Could not hook .spawnrc into .bashrc"
    cloud_run "grep -q 'source ~/.spawnrc' ~/.zshrc 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> ~/.zshrc" || log_warn "Could not hook .spawnrc into .zshrc"

    offer_github_auth cloud_run
}

# Main orchestration runner for agent deployment
# Usage: spawn_agent AGENT_DISPLAY_NAME
spawn_agent() {
    local agent_name="$1"

    # 1. Authenticate with cloud provider
    cloud_authenticate

    # 2. Pre-provision hooks (e.g., prompt for GitHub auth)
    if _fn_exists agent_pre_provision; then agent_pre_provision || true; fi

    # 3. Get API key (before provisioning so user isn't waiting on server)
    get_or_prompt_api_key

    # 4. Model selection (if agent needs it)
    if [[ -n "${AGENT_MODEL_PROMPT:-}" ]]; then
        MODEL_ID=$(get_model_id_interactive "${AGENT_MODEL_DEFAULT:-openrouter/auto}" "${agent_name}") || exit 1
    fi

    # 5. Provision server
    local server_name
    server_name=$(get_server_name)
    cloud_provision "${server_name}"

    # 6. Wait for readiness (may already be done after OAuth)
    cloud_wait_ready

    # 7. Install agent
    if _fn_exists agent_install; then
        agent_install || exit 1
    fi

    # 8. Inject environment variables
    _spawn_inject_env_vars

    # 9. Agent-specific configuration (non-fatal — agent may work with defaults)
    if _fn_exists agent_configure; then agent_configure || log_warn "Agent configuration failed (continuing with defaults)"; fi

    # 10. Save connection info (non-fatal — convenience feature only)
    if _fn_exists agent_save_connection; then agent_save_connection || log_warn "Could not save connection info"; fi

    # 11. Pre-launch hooks (non-fatal — e.g., gateway daemon may start slowly)
    if _fn_exists agent_pre_launch; then agent_pre_launch || log_warn "Pre-launch hook failed (continuing)"; fi

    # 12. Launch interactive session
    log_info "${agent_name} is ready"
    local launch_cmd
    launch_cmd=$(agent_launch_cmd)
    launch_session "$(cloud_label)" cloud_interactive "${launch_cmd}"
}

# ============================================================
# SSH configuration
# ============================================================

# Validate SSH_OPTS to prevent command injection
# Only allow safe SSH option patterns (dash-prefixed flags and values)
_validate_ssh_opts() {
    local opts="${1}"
    # Allow empty
    if [[ -z "${opts}" ]]; then
        return 0
    fi
    # Pattern: SSH opts must start with dash and contain only safe characters
    # Allows: -o Option=value -i /path/to/key -p 22 etc.
    # Blocks: semicolons, pipes, backticks, $() and other shell metacharacters
    if [[ "${opts}" =~ [\;\|\&\`\$\(\)\<\>] ]]; then
        log_error "SECURITY: SSH_OPTS contains shell metacharacters"
        log_error "Rejected value: ${opts}"
        return 1
    fi
    return 0
}

# Default SSH options for all cloud providers
# Clouds can override this if they need provider-specific settings
if [[ -z "${SSH_OPTS:-}" ]]; then
    SSH_OPTS="-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3 -o ConnectTimeout=10 -i ${HOME}/.ssh/id_ed25519"
else
    # Validate user-provided SSH_OPTS for security
    if ! _validate_ssh_opts "${SSH_OPTS}"; then
        log_error "Invalid SSH_OPTS provided. Using secure defaults."
        SSH_OPTS="-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3 -o ConnectTimeout=10 -i ${HOME}/.ssh/id_ed25519"
    fi
fi

# ============================================================
# SSH key management helpers
# ============================================================

# Generate SSH key if it doesn't exist
# Usage: generate_ssh_key_if_missing KEY_PATH
generate_ssh_key_if_missing() {
    local key_path="${1}"
    if [[ -f "${key_path}" ]]; then
        return 0
    fi
    log_step "Generating SSH key at ${key_path}..."
    mkdir -p "$(dirname "${key_path}")" || {
        log_error "Failed to create SSH key directory: $(dirname "${key_path}")"
        log_error "Check that you have write permissions to this directory."
        return 1
    }
    ssh-keygen -t ed25519 -f "${key_path}" -N "" -q || {
        log_error "Failed to generate SSH key at ${key_path}"
        log_error ""
        log_error "How to fix:"
        log_error "  1. Check disk space: df -h $(dirname "${key_path}")"
        log_error "  2. Check permissions: ls -la $(dirname "${key_path}")"
        log_error "  3. Generate manually: ssh-keygen -t ed25519 -f ${key_path}"
        return 1
    }
    log_info "SSH key generated at ${key_path}"
}

# Get MD5 fingerprint of SSH public key
# Usage: get_ssh_fingerprint PUB_KEY_PATH
get_ssh_fingerprint() {
    local pub_path="${1}"
    if [[ ! -f "${pub_path}" ]]; then
        log_error "SSH public key not found: ${pub_path}"
        log_error "Expected a public key file alongside your private key."
        log_error "Regenerate with: ssh-keygen -t ed25519 -f ${pub_path%.pub}"
        return 1
    fi
    local fingerprint
    fingerprint=$(ssh-keygen -lf "${pub_path}" -E md5 2>/dev/null | awk '{print $2}' | sed 's/MD5://')
    if [[ -z "${fingerprint}" ]]; then
        log_error "Failed to read SSH public key fingerprint from ${pub_path}"
        log_error "The key file may be corrupted or in an unsupported format."
        log_error "Regenerate with: ssh-keygen -t ed25519 -f ${pub_path%.pub}"
        return 1
    fi
    echo "${fingerprint}"
}

# JSON-escape a string (for embedding in JSON bodies)
# Usage: json_escape STRING
json_escape() {
    local string="${1}"
    python3 -c "import json, sys; print(json.dumps(sys.stdin.read().rstrip('\n')))" <<< "${string}" 2>/dev/null || {
        # Fallback: manually escape backslashes, quotes, and JSON control characters
        local escaped="${string//\\/\\\\}"
        escaped="${escaped//\"/\\\"}"
        escaped="${escaped//$'\n'/\\n}"
        escaped="${escaped//$'\r'/\\r}"
        escaped="${escaped//$'\t'/\\t}"
        echo "\"${escaped}\""
    }
}

# Extract SSH key IDs from cloud provider API response
# Usage: extract_ssh_key_ids API_RESPONSE KEY_FIELD
# KEY_FIELD: "ssh_keys" (DigitalOcean/Vultr) or "data" (Linode)
extract_ssh_key_ids() {
    local api_response="${1}"
    local key_field="${2:-ssh_keys}"
    python3 -c "
import json, sys
data = json.loads(sys.stdin.read())
ids = [k['id'] for k in data.get('${key_field}', [])]
print(json.dumps(ids))
" <<< "${api_response}" 2>/dev/null || {
        log_error "Failed to parse SSH key IDs from API response"
        log_error "The API response may be malformed or python3 is unavailable"
        return 1
    }
}

# ============================================================
# Cloud provisioning helpers
# ============================================================

# Generate cloud-init userdata YAML for server provisioning
# This is the default userdata used by all cloud providers
# Clouds can override this function if they need provider-specific cloud-init config
get_cloud_init_userdata() {
    cat << 'CLOUD_INIT_EOF'
#cloud-config
package_update: true
packages:
  - curl
  - unzip
  - git
  - zsh
  - nodejs
  - npm

runcmd:
  # Set up 2G swap to prevent OOM kills on small VMs
  - fallocate -l 2G /swapfile
  - chmod 600 /swapfile
  - mkswap /swapfile
  - swapon /swapfile
  # Upgrade Node.js to v22 LTS (apt has v18, agents like Cline need v20+)
  # n installs to /usr/local/bin but apt's v18 at /usr/bin can shadow it, so symlink over
  - npm install -g n && n 22 && ln -sf /usr/local/bin/node /usr/bin/node && ln -sf /usr/local/bin/npm /usr/bin/npm && ln -sf /usr/local/bin/npx /usr/bin/npx
  # Install Bun
  - su - root -c 'curl -fsSL https://bun.sh/install | bash'
  # Install Claude Code
  - su - root -c 'curl -fsSL https://claude.ai/install.sh | bash'
  # Mark as sandbox environment (disposable cloud VM)
  - echo 'export IS_SANDBOX=1' >> /root/.bashrc
  - echo 'export IS_SANDBOX=1' >> /root/.zshrc
  # Configure PATH in .bashrc and .zshrc (include claude installer path)
  - echo 'export PATH="${HOME}/.claude/local/bin:${HOME}/.local/bin:${HOME}/.bun/bin:${PATH}"' >> /root/.bashrc
  - echo 'export PATH="${HOME}/.claude/local/bin:${HOME}/.local/bin:${HOME}/.bun/bin:${PATH}"' >> /root/.zshrc
  # Signal completion
  - touch /root/.cloud-init-complete
CLOUD_INIT_EOF
}

# ============================================================
# Cloud API helpers
# ============================================================

# Calculate exponential backoff with jitter for retry logic
# Usage: calculate_retry_backoff CURRENT_INTERVAL MAX_INTERVAL
# Returns: backoff interval with ±20% jitter
calculate_retry_backoff() {
    local interval="${1}"
    local max_interval="${2}"

    # Validate inputs to prevent empty or invalid intervals
    if [[ -z "${interval}" ]] || [[ "${interval}" -lt 1 ]]; then
        echo "1"
        return 0
    fi

    # Calculate next interval with exponential backoff
    local next_interval=$((interval * 2))
    if [[ "${next_interval}" -gt "${max_interval}" ]]; then
        next_interval="${max_interval}"
    fi

    # Add jitter: ±20% randomization to prevent thundering herd
    # Fallback to no-jitter interval if python3 is unavailable
    python3 -c "import random; print(int(${interval} * (0.8 + random.random() * 0.4)))" 2>/dev/null || printf '%s' "${interval}"
}

# Handle API retry decision with backoff - extracted to reduce duplication across API wrappers
# Usage: _api_should_retry_on_error ATTEMPT MAX_RETRIES INTERVAL MAX_INTERVAL MESSAGE
# Returns: 0 to continue/retry, 1 to fail
# Caller updates interval and attempt variables after success
_api_should_retry_on_error() {
    local attempt="${1}"
    local max_retries="${2}"
    local interval="${3}"
    local max_interval="${4}"
    local message="${5}"

    if [[ "${attempt}" -ge "${max_retries}" ]]; then
        return 1  # Don't retry - max attempts exhausted
    fi

    local jitter
    jitter=$(calculate_retry_backoff "${interval}" "${max_interval}")
    log_warn "${message} (attempt ${attempt}/${max_retries}), retrying in ${jitter}s..."
    sleep "${jitter}"

    return 0  # Do retry
}

# Helper to update retry interval with backoff
# Usage: _update_retry_interval INTERVAL_VAR MAX_INTERVAL_VAR
# This eliminates repeated interval update logic across API wrappers
_update_retry_interval() {
    local interval_var="${1}"
    local max_interval_var="${2}"

    local current_interval=${!interval_var}
    local max_interval=${!max_interval_var}

    current_interval=$((current_interval * 2))
    if [[ "${current_interval}" -gt "${max_interval}" ]]; then
        current_interval="${max_interval}"
    fi

    printf -v "${interval_var}" '%s' "${current_interval}"
}

# Helper to extract HTTP status code and response body from curl output
# Curl is called with "-w \n%{http_code}" so last line is the code
# Returns: http_code on stdout, response_body via global variable
_parse_api_response() {
    local response="${1}"
    local http_code
    http_code=$(echo "${response}" | tail -1)
    local response_body
    response_body=$(echo "${response}" | sed '$d')

    API_HTTP_CODE="${http_code}"
    API_RESPONSE_BODY="${response_body}"
}

# Core curl wrapper for API requests - builds args, executes, parses response
# Usage: _curl_api URL METHOD BODY AUTH_ARGS...
# Returns: 0 on curl success, 1 on curl failure
# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals
_curl_api() {
    local url="${1}"
    local method="${2}"
    local body="${3:-}"
    shift 3

    local args=(
        -s
        -w "\n%{http_code}"
        -X "${method}"
        -H "Content-Type: application/json"
        "$@"
    )

    if [[ -n "${body}" ]]; then
        args+=(-d "${body}")
    fi

    local response
    response=$(curl "${args[@]}" "${url}" 2>&1)
    local curl_exit_code=$?

    _parse_api_response "${response}"

    return ${curl_exit_code}
}

# Helper to handle a single API request attempt with Bearer auth
# Returns: 0 on curl success, 1 on curl failure
# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals
_make_api_request() {
    local base_url="${1}"
    local auth_token="${2}"
    local method="${3}"
    local endpoint="${4}"
    local body="${5:-}"

    _curl_api "${base_url}${endpoint}" "${method}" "${body}" -H "Authorization: Bearer ${auth_token}"
}

# Generic cloud API wrapper - centralized curl wrapper for all cloud providers
# Includes automatic retry logic with exponential backoff for transient failures
# Usage: generic_cloud_api BASE_URL AUTH_TOKEN METHOD ENDPOINT [BODY] [MAX_RETRIES]
# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" GET "/account"
# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" POST "/droplets" "$body"
# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" GET "/account" "" 5
# Retries on: 429 (rate limit), 503 (service unavailable), network errors
# Internal retry loop shared by generic_cloud_api and generic_cloud_api_custom_auth
# Usage: _cloud_api_retry_loop REQUEST_FUNC MAX_RETRIES API_DESCRIPTION [REQUEST_FUNC_ARGS...]
# Classify the result of an API request attempt.
# Returns a retry reason string on stdout if the request failed with a retryable error,
# or empty string on success. Caller checks the return string.
_classify_api_result() {
    local curl_ok="${1}"
    if [[ "${curl_ok}" != "0" ]]; then
        echo "Cloud API network error"
    elif [[ "${API_HTTP_CODE}" == "429" ]]; then
        echo "Cloud API returned rate limit (HTTP 429)"
    elif [[ "${API_HTTP_CODE}" == "503" ]]; then
        echo "Cloud API returned service unavailable (HTTP 503)"
    fi
}

# Report a final API failure after retries are exhausted
_report_api_failure() {
    local retry_reason="${1}"
    local max_retries="${2}"
    log_error "${retry_reason} after ${max_retries} attempts"
    if [[ "${retry_reason}" == "Cloud API network error" ]]; then
        log_warn "Could not reach the cloud provider's API."
        log_warn ""
        log_warn "How to fix:"
        log_warn "  1. Check your internet connection: curl -s https://httpbin.org/ip"
        log_warn "  2. Check DNS resolution: nslookup the provider's API hostname"
        log_warn "  3. If behind a proxy or firewall, ensure HTTPS traffic is allowed"
        log_warn "  4. Try again in a few moments (the API may be temporarily down)"
    else
        log_warn "This is usually caused by rate limiting or temporary provider issues."
        log_warn "Wait a minute and try again, or check the provider's status page."
        echo "${API_RESPONSE_BODY}"
    fi
}

_cloud_api_retry_loop() {
    local request_func="${1}"
    local max_retries="${2}"
    local api_description="${3}"
    shift 3

    local attempt=1
    local interval=2
    local max_interval=30

    while [[ "${attempt}" -le "${max_retries}" ]]; do
        local curl_ok=0
        "${request_func}" "$@" || curl_ok=$?

        local retry_reason
        retry_reason=$(_classify_api_result "${curl_ok}")

        if [[ -z "${retry_reason}" ]]; then
            echo "${API_RESPONSE_BODY}"
            return 0
        fi

        if ! _api_should_retry_on_error "${attempt}" "${max_retries}" "${interval}" "${max_interval}" "${retry_reason}"; then
            _report_api_failure "${retry_reason}" "${max_retries}"
            return 1
        fi
        _update_retry_interval interval max_interval
        attempt=$((attempt + 1))
    done

    log_error "Cloud API request failed after ${max_retries} attempts (${api_description})"
    log_warn "This is usually caused by rate limiting or temporary provider issues."
    log_warn "Wait a minute and try again, or check the provider's status page."
    return 1
}

generic_cloud_api() {
    local base_url="${1}"
    local auth_token="${2}"
    local method="${3}"
    local endpoint="${4}"
    local body="${5:-}"
    local max_retries="${6:-3}"

    _cloud_api_retry_loop _make_api_request "${max_retries}" "${method} ${endpoint}" "${base_url}" "${auth_token}" "${method}" "${endpoint}" "${body}"
}

# Helper to make API request with custom curl auth args (e.g., Basic Auth, custom headers)
# Returns: 0 on curl success, 1 on curl failure
# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals
_make_api_request_custom_auth() {
    local url="${1}"
    local method="${2}"
    local body="${3:-}"
    shift 3

    _curl_api "${url}" "${method}" "${body}" "$@"
}

# Generic cloud API wrapper with custom curl auth args
# Like generic_cloud_api but accepts arbitrary curl flags for authentication
# Usage: generic_cloud_api_custom_auth BASE_URL METHOD ENDPOINT BODY MAX_RETRIES AUTH_ARGS...
# Example: generic_cloud_api_custom_auth "$API_BASE" GET "/account" "" 3 -H "X-Auth-Token: $TOKEN"
# Example: generic_cloud_api_custom_auth "$API_BASE" POST "/servers" "$body" 3 -u "$USER:$PASS"
generic_cloud_api_custom_auth() {
    local base_url="${1}"
    local method="${2}"
    local endpoint="${3}"
    local body="${4:-}"
    local max_retries="${5:-3}"
    shift 5
    # Remaining args are custom curl auth flags

    _cloud_api_retry_loop _make_api_request_custom_auth "${max_retries}" "${method} ${endpoint}" "${base_url}${endpoint}" "${method}" "${body}" "$@"
}

# ============================================================
# Agent verification helpers
# ============================================================

# Check if agent command exists in PATH
_check_agent_in_path() {
    local agent_cmd="$1"
    local agent_name="$2"
    if ! command -v "${agent_cmd}" &> /dev/null; then
        _log_diagnostic \
            "${agent_name} installation failed: command '${agent_cmd}' not found in PATH" \
            "The installation script encountered an error (check logs above)" \
            "The binary was installed to a directory not in PATH" \
            "Network issues prevented the download from completing" \
            --- \
            "Re-run the script to retry the installation" \
            "Install ${agent_name} manually and ensure it is in PATH"
        return 1
    fi
    return 0
}

# Check if agent command executes without error
_check_agent_runs() {
    local agent_cmd="$1"
    local verify_arg="$2"
    local agent_name="$3"
    if ! "${agent_cmd}" "${verify_arg}" &> /dev/null; then
        _log_diagnostic \
            "${agent_name} verification failed: '${agent_cmd} ${verify_arg}' returned an error" \
            "Missing runtime dependencies (Python, Node.js, etc.)" \
            "Incompatible system architecture or OS version" \
            --- \
            "Check ${agent_name}'s installation docs for prerequisites" \
            "Run '${agent_cmd} ${verify_arg}' manually to see the error"
        return 1
    fi
    return 0
}

# Verify that an agent is properly installed by checking if its command exists
# Usage: verify_agent_installed AGENT_COMMAND [VERIFICATION_ARG] [ERROR_MESSAGE]
# Examples:
#   verify_agent_installed "claude" "--version" "Claude Code"
#   verify_agent_installed "codex" "--version" "Codex"
#   verify_agent_installed "cline" "--version" "Cline"
# Returns 0 if agent is installed and working, 1 otherwise
verify_agent_installed() {
    local agent_cmd="${1}"
    local verify_arg="${2:---version}"
    local agent_name="${3:-${agent_cmd}}"

    log_step "Verifying ${agent_name} installation..."

    _check_agent_in_path "${agent_cmd}" "${agent_name}" || return 1
    _check_agent_runs "${agent_cmd}" "${verify_arg}" "${agent_name}" || return 1

    log_info "${agent_name} installation verified successfully"
    return 0
}

# ============================================================
# Non-interactive agent execution
# ============================================================

# Execute an agent in non-interactive mode with a prompt
# Usage: execute_agent_non_interactive SPRITE_NAME AGENT_NAME AGENT_FLAGS PROMPT
# Arguments:
#   SPRITE_NAME    - Name of the sprite/server to execute on
#   AGENT_NAME     - Name of the agent command (e.g., "claude", "codex")
#   AGENT_FLAGS    - Agent-specific flags for non-interactive execution (e.g., "-p" for claude, "--prompt" for codex)
#   PROMPT         - User prompt to execute
#   EXEC_CALLBACK  - Function to execute commands: func(sprite_name, command)
#
# Example (Sprite):
#   execute_agent_non_interactive "$SPRITE_NAME" "claude" "-p" "$PROMPT" "sprite_exec"
#
# Example (SSH):
#   execute_agent_non_interactive "$SERVER_IP" "codex" "--prompt" "$PROMPT" "ssh_exec"
execute_agent_non_interactive() {
    local sprite_name="${1}"
    local agent_name="${2}"
    local agent_flags="${3}"
    local prompt="${4}"
    local exec_callback="${5}"

    log_step "Executing ${agent_name} with prompt in non-interactive mode..."

    # Do NOT use printf '%q' here — the run callback (run_server, sprite exec,
    # ssh) already handles escaping for remote transport. Double-escaping breaks
    # prompts containing quotes, spaces, or special characters on Fly.io.
    # Single-quote the prompt to protect it from shell expansion.
    local safe_prompt
    safe_prompt="'$(printf '%s' "${prompt}" | sed "s/'/'\\\\''/g")'"

    # Build the command based on exec callback type
    if [[ "${exec_callback}" == *"sprite"* ]]; then
        # Sprite execution (no -tty flag for non-interactive)
        sprite exec -s "${sprite_name}" -- zsh -c "source ~/.zshrc && ${agent_name} ${agent_flags} ${safe_prompt}"
    else
        # Generic SSH execution
        ${exec_callback} "${sprite_name}" "source ~/.zshrc && ${agent_name} ${agent_flags} ${safe_prompt}"
    fi
}

# ============================================================
# SSH connectivity helpers
# ============================================================

# Generic SSH wait function - polls until a remote command succeeds with exponential backoff
# Usage: generic_ssh_wait USERNAME IP SSH_OPTS TEST_CMD DESCRIPTION MAX_ATTEMPTS [INITIAL_INTERVAL]
# Implements exponential backoff: starts at INITIAL_INTERVAL (default 5s), doubles up to max 30s
# Adds jitter (±20%) to prevent thundering herd when multiple instances retry simultaneously
# Log progress message based on elapsed time
_log_ssh_wait_progress() {
    local description="${1}"
    local elapsed_time="${2}"

    if [[ ${elapsed_time} -lt 60 ]]; then
        log_step "Waiting for ${description}... (${elapsed_time}s elapsed, still within normal range)"
    elif [[ ${elapsed_time} -lt 120 ]]; then
        log_step "Waiting for ${description}... (${elapsed_time}s elapsed, taking longer than usual)"
    else
        log_warn "Still waiting for ${description}... (${elapsed_time}s elapsed, this is unusually slow)"
    fi
}

# Log timeout error message with troubleshooting steps
_log_ssh_wait_timeout_error() {
    local description="${1}"
    local elapsed_time="${2}"
    local username="${3}"
    local ip="${4}"

    log_error "${description} timed out after ${elapsed_time}s (server: ${ip})"
    log_error ""
    log_error "The server failed to become ready within the expected timeframe."
    log_error ""
    log_error "Common causes:"
    log_error "  - Server is still booting (some cloud providers take 2-3 minutes)"
    log_error "  - Cloud provider API delays or maintenance"
    log_error "  - Firewall blocking SSH on port 22"
    log_error "  - Network connectivity issues"
    log_error ""
    log_error "Troubleshooting steps:"
    log_error "  1. Test SSH manually:  ssh ${username}@${ip}"
    log_error "  2. Check firewall rules in your cloud provider dashboard"
    if [[ -n "${SPAWN_DASHBOARD_URL:-}" ]]; then
        log_error "     Dashboard: ${SPAWN_DASHBOARD_URL}"
    fi
    log_error "  3. Re-run this command to retry (the server may need more time)"
    if [[ -n "${SPAWN_RETRY_CMD:-}" ]]; then
        log_error "     ${SPAWN_RETRY_CMD}"
    fi
}

generic_ssh_wait() {
    local username="${1}"
    local ip="${2}"
    local ssh_opts="${3}"
    local test_cmd="${4}"
    local description="${5}"
    local max_attempts="${6:-30}"
    local initial_interval="${7:-5}"

    local attempt=1
    local interval="${initial_interval}"
    local max_interval=30
    local elapsed_time=0

    log_step "Waiting for ${description} to ${ip} (this usually takes 30-90 seconds)..."
    while [[ "${attempt}" -le "${max_attempts}" ]]; do
        # shellcheck disable=SC2086
        if ssh ${ssh_opts} "${username}@${ip}" "${test_cmd}" < /dev/null >/dev/null 2>&1; then
            log_info "${description} ready (took ${elapsed_time}s)"
            return 0
        fi

        local jitter
        jitter=$(calculate_retry_backoff "${interval}" "${max_interval}")

        _log_ssh_wait_progress "${description}" "${elapsed_time}"
        sleep "${jitter}"

        elapsed_time=$((elapsed_time + jitter))
        _update_retry_interval interval max_interval
        attempt=$((attempt + 1))
    done

    _log_ssh_wait_timeout_error "${description}" "${elapsed_time}" "${username}" "${ip}"
    return 1
}

# Wait for cloud-init to complete on a server
# Usage: wait_for_cloud_init <ip> [max_attempts]
# Default max_attempts is 60 (~5 minutes with exponential backoff)
wait_for_cloud_init() {
    local ip="${1}"
    local max_attempts=${2:-60}
    generic_ssh_wait "root" "${ip}" "${SSH_OPTS}" "test -f /root/.cloud-init-complete" "cloud-init" "${max_attempts}" 5
}

# ============================================================
# Standard SSH server operations
# ============================================================

# Most SSH-based cloud providers share identical implementations for
# run_server, upload_file, interactive_session, and verify_server_connectivity.
# These helpers let providers set SSH_USER (default: root) and get all four
# functions automatically, eliminating ~20 lines of copy-paste per provider.

# Run a command on a remote server via SSH
# Usage: ssh_run_server IP COMMAND
# Requires: SSH_USER (default: root), SSH_OPTS
# SECURITY: Command is properly quoted to prevent shell injection.
# Note: $cmd is always a shell command string (with pipes, semicolons, etc.)
# that is intentionally interpreted by the remote shell. All callers pass
# static command strings — never user-controlled input.
ssh_run_server() {
    local ip="${1}"
    local cmd="${2}"
    # Single-quoted so $HOME/$PATH expand on the remote side, not locally.
    # .npm-global/bin: user-writable npm prefix (AWS Lightsail runs as ubuntu, not root)
    local path_prefix='export PATH="$HOME/.npm-global/bin:$HOME/.local/bin:$HOME/.bun/bin:$PATH"'
    if [[ -n "${SPAWN_DEBUG:-}" ]]; then
        cmd="set -x; ${cmd}"
    fi
    # shellcheck disable=SC2086
    # < /dev/null prevents SSH from consuming the parent script's stdin.
    # Without this, sequential SSH calls can steal input meant for later
    # commands (e.g., safe_read prompts), causing hangs.
    ssh $SSH_OPTS "${SSH_USER:-root}@${ip}" -- "${path_prefix} && ${cmd}" < /dev/null
}

# Upload a file to a remote server via SCP
# Usage: ssh_upload_file IP LOCAL_PATH REMOTE_PATH
# Requires: SSH_USER (default: root), SSH_OPTS
ssh_upload_file() {
    local ip="${1}"
    local local_path="${2}"
    local remote_path="${3}"
    # shellcheck disable=SC2086
    scp $SSH_OPTS "${local_path}" "${SSH_USER:-root}@${ip}:${remote_path}"
}

# Show a post-session summary reminding the user their server is still running.
# Called automatically by ssh_interactive_session after the SSH session ends.
# Uses optional env vars for richer output:
#   SPAWN_DASHBOARD_URL - Cloud provider dashboard URL for managing servers
#   SERVER_NAME         - Server name (set by individual cloud scripts)
# Arguments: IP
_show_post_session_summary() {
    local ip="${1}"
    local dashboard_url="${SPAWN_DASHBOARD_URL:-}"
    local server_name="${SERVER_NAME:-}"

    printf '\n'
    if [[ -n "${server_name}" ]]; then
        log_warn "Session ended. Your server '${server_name}' is still running at ${ip}."
    else
        log_warn "Session ended. Your server is still running at ${ip}."
    fi
    log_warn "Remember to delete it when you're done to avoid ongoing charges."
    log_warn ""
    if [[ -n "${dashboard_url}" ]]; then
        log_warn "Manage or delete it in your dashboard:"
        log_warn "  ${dashboard_url}"
    else
        log_warn "Check your cloud provider dashboard to stop or delete the server."
    fi
    log_warn ""
    log_info "To reconnect:"
    log_info "  ssh ${SSH_USER:-root}@${ip}"
}

# Show a post-session summary for exec-based (non-SSH) cloud providers.
# These use CLI exec commands instead of direct SSH, so the reconnect
# hint differs from the SSH variant.
# Uses optional env vars for richer output:
#   SPAWN_DASHBOARD_URL  - Cloud provider dashboard URL for managing services
#   SERVER_NAME          - Service/sandbox name
#   SPAWN_RECONNECT_CMD  - CLI command to reconnect (shown as reconnect hint)
_show_exec_post_session_summary() {
    local dashboard_url="${SPAWN_DASHBOARD_URL:-}"
    local server_name="${SERVER_NAME:-}"
    local reconnect_cmd="${SPAWN_RECONNECT_CMD:-}"

    printf '\n'
    if [[ -n "${server_name}" ]]; then
        log_warn "Session ended. Your service '${server_name}' is still running."
    else
        log_warn "Session ended. Your service is still running."
    fi
    log_warn "Remember to delete it when you're done to avoid ongoing charges."
    log_warn ""
    if [[ -n "${dashboard_url}" ]]; then
        log_warn "Manage or delete it in your dashboard:"
        log_warn "  ${dashboard_url}"
    else
        log_warn "Check your cloud provider dashboard to stop or delete the service."
    fi
    if [[ -n "${reconnect_cmd}" ]]; then
        log_warn ""
        log_info "To reconnect:"
        log_info "  ${reconnect_cmd}"
    fi
}

# Start an interactive SSH session
# Usage: ssh_interactive_session IP COMMAND
# Requires: SSH_USER (default: root), SSH_OPTS
# SECURITY: Command is properly quoted to prevent shell injection
ssh_interactive_session() {
    local ip="${1}"
    local cmd="${2}"
    local ssh_exit=0
    # shellcheck disable=SC2086
    ssh -t $SSH_OPTS "${SSH_USER:-root}@${ip}" -- "${cmd}" || ssh_exit=$?
    _show_post_session_summary "${ip}"
    return "${ssh_exit}"
}

# Wait for SSH connectivity to a server
# Usage: ssh_verify_connectivity IP [MAX_ATTEMPTS] [INITIAL_INTERVAL]
# Requires: SSH_USER (default: root), SSH_OPTS
ssh_verify_connectivity() {
    local ip="${1}"
    local max_attempts=${2:-30}
    local initial_interval=${3:-5}
    # shellcheck disable=SC2154
    generic_ssh_wait "${SSH_USER:-root}" "${ip}" "$SSH_OPTS -o ConnectTimeout=5" "echo ok" "SSH connectivity" "${max_attempts}" "${initial_interval}"
}

# Extract a value from a JSON response using a Python expression
# Usage: _extract_json_field JSON_STRING PYTHON_EXPR [DEFAULT]
# The Python expression receives 'd' as the parsed JSON dict.
# Returns DEFAULT (or empty string) on parse failure.
_extract_json_field() {
    local json="${1}"
    local py_expr="${2}"
    local default="${3:-}"

    printf '%s' "${json}" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); print(${py_expr})" 2>/dev/null || echo "${default}"
}

# Extract an error message from a JSON API response.
# Tries common error field patterns used by cloud provider APIs:
#   message, error, error.message, error.error_message, reason
# Falls back to the raw response if no known field matches.
# Usage: extract_api_error_message JSON_STRING [FALLBACK]
extract_api_error_message() {
    local json="${1}"
    local fallback="${2:-Unknown error}"

    printf '%s' "${json}" | python3 -c "
import json, sys
try:
    d = json.loads(sys.stdin.read())
    e = d.get('error', '')
    msg = (
        (isinstance(e, dict) and (e.get('message') or e.get('error_message')))
        or d.get('message')
        or d.get('reason')
        or (isinstance(e, str) and e)
        or ''
    )
    if msg:
        print(msg)
    else:
        sys.exit(1)
except:
    sys.exit(1)
" 2>/dev/null || echo "${fallback}"
}

# Generic instance status polling loop
# Polls an API endpoint until the instance reaches the target status, then extracts the IP.
# Usage: generic_wait_for_instance API_FUNC ENDPOINT TARGET_STATUS STATUS_PY IP_PY IP_VAR DESCRIPTION [MAX_ATTEMPTS]
#
# Arguments:
#   API_FUNC       - Cloud API function name (e.g., "vultr_api", "do_api")
#   ENDPOINT       - API endpoint path (e.g., "/instances/$id")
#   TARGET_STATUS  - Status value that means "ready" (e.g., "active", "running")
#   STATUS_PY      - Python expression to extract status from JSON (receives 'd' as parsed dict)
#   IP_PY          - Python expression to extract IP from JSON (receives 'd' as parsed dict)
#   IP_VAR         - Environment variable name to export with the IP (e.g., "VULTR_SERVER_IP")
#   DESCRIPTION    - Human-readable label for logging (e.g., "Vultr instance")
#   MAX_ATTEMPTS   - Optional, defaults to 60
#
# Example:
#   generic_wait_for_instance vultr_api "/instances/$id" "active" \
#       "d['instance']['status']" "d['instance']['main_ip']" \
#       VULTR_SERVER_IP "Instance" 60
# Single polling attempt: fetch status, check readiness, log progress.
# Returns 0 if instance is ready (IP exported), 1 to keep polling, 2 on status mismatch.
# Arguments: API_FUNC ENDPOINT TARGET_STATUS STATUS_PY IP_PY IP_VAR DESCRIPTION ATTEMPT POLL_DELAY
_poll_instance_once() {
    local api_func="${1}" endpoint="${2}" target_status="${3}"
    local status_py="${4}" ip_py="${5}" ip_var="${6}"
    local description="${7}" attempt="${8}" poll_delay="${9}"

    local response
    response=$("${api_func}" GET "${endpoint}" 2>/dev/null) || true

    local status
    status=$(_extract_json_field "${response}" "${status_py}" "unknown")

    if [[ "${status}" != "${target_status}" ]]; then
        log_step "${description} status: ${status} ($((attempt * poll_delay))s elapsed)"
        return 2
    fi

    local ip
    ip=$(_extract_json_field "${response}" "${ip_py}")
    if [[ -n "${ip}" ]]; then
        # SECURITY: Validate ip_var to prevent command injection
        if [[ ! "${ip_var}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
            log_error "SECURITY: Invalid env var name rejected: ${ip_var}"
            return 1
        fi
        export "${ip_var}=${ip}"
        log_info "${description} ready (IP: ${ip})"
        return 0
    fi

    log_step "${description} status: ${status} ($((attempt * poll_delay))s elapsed)"
    return 1
}

# Report timeout when instance polling exhausts all attempts.
_report_instance_timeout() {
    local description="${1}" target_status="${2}" total_time="${3}"
    log_error "${description} did not become ${target_status} within ${total_time}s"
    log_error ""
    log_error "The cloud provider API reported the instance is not yet ready."
    log_error ""
    log_error "This usually means:"
    log_error "  - Cloud provider is experiencing delays (high load, maintenance)"
    log_error "  - The region or instance type has limited capacity"
    log_error "  - The instance failed to provision but the API hasn't reported it yet"
    log_error ""
    log_error "Next steps:"
    log_error "  1. Check your cloud dashboard for instance status and error messages"
    if [[ -n "${SPAWN_DASHBOARD_URL:-}" ]]; then
        log_error "     ${SPAWN_DASHBOARD_URL}"
    fi
    log_error "  2. Wait 2-3 minutes and retry the spawn command"
    log_error "  3. Try a different region or instance size if this persists"
}

generic_wait_for_instance() {
    local api_func="${1}" endpoint="${2}" target_status="${3}"
    local status_py="${4}" ip_py="${5}" ip_var="${6}"
    local description="${7}" max_attempts="${8:-60}"
    local poll_delay="${INSTANCE_STATUS_POLL_DELAY:-5}"

    local attempt=1
    log_step "Waiting for ${description} to become ${target_status}..."

    while [[ "${attempt}" -le "${max_attempts}" ]]; do
        _poll_instance_once "${api_func}" "${endpoint}" "${target_status}" \
            "${status_py}" "${ip_py}" "${ip_var}" \
            "${description}" "${attempt}" "${poll_delay}" && return 0
        sleep "${poll_delay}"
        attempt=$((attempt + 1))
    done

    _report_instance_timeout "${description}" "${target_status}" "$((max_attempts * poll_delay))"
    return 1
}

# ============================================================
# API token management helpers
# ============================================================

# Try to load API token from environment variable
# Returns 0 if found and sets env var, 1 otherwise
_load_token_from_env() {
    local env_var_name="${1}"
    local provider_name="${2}"

    local env_value="${!env_var_name}"
    if [[ -n "${env_value}" ]]; then
        log_info "Using ${provider_name} API token from environment"
        return 0
    fi
    return 1
}

# Try to load API token from config file
# Returns 0 if found and exports env var, 1 otherwise
_load_token_from_config() {
    local config_file="${1}"
    local env_var_name="${2}"
    local provider_name="${3}"

    # SECURITY: Validate env_var_name to prevent command injection
    if [[ ! "${env_var_name}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
        log_error "SECURITY: Invalid env var name rejected: ${env_var_name}"
        return 1
    fi

    if [[ ! -f "${config_file}" ]]; then
        return 1
    fi

    local saved_token
    saved_token=$(python3 -c "import json, sys; data=json.load(open(sys.argv[1])); print(data.get('api_key','') or data.get('token',''))" "${config_file}" 2>/dev/null)
    if [[ -z "${saved_token}" ]]; then
        return 1
    fi

    export "${env_var_name}=${saved_token}"
    log_info "Using ${provider_name} API token from ${config_file}"
    return 0
}

# Validate token with provider API if test function provided
# Returns 0 on success, 1 on validation failure
_validate_token_with_provider() {
    local test_func="${1}"
    local env_var_name="${2}"
    local provider_name="${3}"
    local help_url="${4:-}"

    if [[ -z "${test_func}" ]]; then
        return 0  # No validation needed
    fi

    if ! "${test_func}"; then
        log_error "Authentication failed: Invalid ${provider_name} API token"
        log_error "The token may be expired, revoked, or incorrectly copied."
        log_error ""
        log_error "How to fix:"
        if [[ -n "${help_url}" ]]; then
            log_error "  1. Get a new token from: ${help_url}"
            log_error "  2. Re-run the command and paste the new token"
            log_error "  3. Or set it directly: ${env_var_name}=your-token spawn ..."
        else
            log_error "  1. Re-run the command to enter a new token"
            log_error "  2. Or set it directly: ${env_var_name}=your-token spawn ..."
        fi
        unset "${env_var_name}"
        return 1
    fi
    return 0
}

# Save API token to config file
_save_token_to_config() {
    local config_file="${1}"
    local token="${2}"

    local config_dir
    config_dir=$(dirname "${config_file}")
    mkdir -p "${config_dir}"

    local escaped_token
    escaped_token=$(json_escape "${token}")
    printf '{\n  "api_key": %s,\n  "token": %s\n}\n' "${escaped_token}" "${escaped_token}" > "${config_file}"
    chmod 600 "${config_file}"
    log_info "API token saved to ${config_file}"
}

# Generic ensure API token function - eliminates duplication across providers
# Usage: ensure_api_token_with_provider PROVIDER_NAME ENV_VAR_NAME CONFIG_FILE HELP_URL TEST_FUNC
# Example: ensure_api_token_with_provider "Lambda" "LAMBDA_API_KEY" "$HOME/.config/spawn/lambda.json" \
#            "https://cloud.lambdalabs.com/api-keys" test_lambda_token
# TEST_FUNC should be a function that validates the token and returns 0 on success, 1 on failure
# TEST_FUNC is optional - if empty, no validation is performed
_prompt_for_api_token() {
    local provider_name="${1}"
    local help_url="${2}"

    echo "" >&2
    log_step "${provider_name} API Token Required"
    log_step "Get your token from: ${help_url}"
    echo "" >&2

    validated_read "Enter your ${provider_name} API token: " validate_api_token
}

_validate_env_var_name() {
    local env_var_name="${1}"
    if [[ ! "${env_var_name}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
        log_error "SECURITY: Invalid env var name rejected: ${env_var_name}"
        return 1
    fi
    return 0
}

ensure_api_token_with_provider() {
    local provider_name="${1}"
    local env_var_name="${2}"
    local config_file="${3}"
    local help_url="${4}"
    local test_func="${5:-}"

    check_python_available || return 1

    # Try environment variable
    if _load_token_from_env "${env_var_name}" "${provider_name}"; then
        return 0
    fi

    # Try config file (validate if test function provided, fall through to prompt on failure)
    if _load_token_from_config "${config_file}" "${env_var_name}" "${provider_name}"; then
        if [[ -z "${test_func}" ]] || "${test_func}" 2>/dev/null; then
            return 0
        fi
        log_warn "Saved ${provider_name} token is invalid or expired, requesting a new one..."
        unset "${env_var_name}"
    fi

    # Prompt for new token
    local token
    token=$(_prompt_for_api_token "${provider_name}" "${help_url}") || return 1

    # SECURITY: Validate env_var_name to prevent command injection
    _validate_env_var_name "${env_var_name}" || return 1

    export "${env_var_name}=${token}"

    # Validate with provider API
    if ! _validate_token_with_provider "${test_func}" "${env_var_name}" "${provider_name}" "${help_url}"; then
        return 1
    fi

    # Save to config file
    _save_token_to_config "${config_file}" "${token}"
    return 0
}

# ============================================================
# Multi-credential configuration helpers
# ============================================================

# Load multiple fields from a JSON config file in a single python3 call.
# Outputs each field value on a separate line. Returns 1 if file missing or parse fails.
# Usage: local creds; creds=$(_load_json_config_fields CONFIG_FILE field1 field2 ...)
# Then:  { read -r var1; read -r var2; ... } <<< "${creds}"
_load_json_config_fields() {
    local config_file="${1}"; shift
    [[ -f "${config_file}" ]] || return 1

    local py_fields=""
    for field in "$@"; do
        py_fields="${py_fields}print(d.get('${field}', ''));"
    done

    python3 -c "
import json, sys
d = json.load(open(sys.argv[1]))
${py_fields}
" "${config_file}" 2>/dev/null || return 1
}

# Save key-value pairs to a JSON config file using json_escape for safe encoding.
# Usage: _save_json_config CONFIG_FILE key1 val1 key2 val2 ...
_save_json_config() {
    local config_file="${1}"; shift

    mkdir -p "$(dirname "${config_file}")"

    # Build JSON object from key=value pairs
    local json="{"
    local first=true
    while [[ $# -ge 2 ]]; do
        local key="${1}"; shift
        local val="${1}"; shift
        if [[ "${first}" == "true" ]]; then
            first=false
        else
            json="${json},"
        fi
        json="${json}
  \"${key}\": $(json_escape "${val}")"
    done
    json="${json}
}
"

    printf '%s\n' "${json}" > "${config_file}"
    chmod 600 "${config_file}"
    log_info "Credentials saved to ${config_file}"
}

# Check if all env vars in a list are set (non-empty)
# Returns 0 if all set, 1 if any missing
_multi_creds_all_env_set() {
    local var
    for var in "$@"; do
        if [[ -z "${!var:-}" ]]; then
            return 1
        fi
    done
    return 0
}

# Load multi-credentials from a JSON config file into env vars.
# Returns 0 if all fields loaded, 1 if any missing.
# Usage: _multi_creds_load_config CONFIG_FILE env_vars[@] config_keys[@]
_multi_creds_load_config() {
    local config_file="${1}"
    shift
    local env_count="${1}"
    shift
    local env_vars=("${@:1:$env_count}")
    shift "${env_count}"
    local config_keys=("$@")

    local creds
    creds=$(_load_json_config_fields "${config_file}" "${config_keys[@]}") || return 1

    local i=0
    while IFS= read -r value; do
        if [[ -z "${value}" ]]; then
            return 1
        fi
        # SECURITY: Validate env var name before export
        if [[ ! "${env_vars[$i]}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
            log_error "SECURITY: Invalid env var name rejected: ${env_vars[$i]}"
            return 1
        fi
        export "${env_vars[$i]}=${value}"
        i=$((i + 1))
    done <<< "${creds}"

    [[ "${i}" -eq "${#env_vars[@]}" ]] || return 1
    return 0
}

# Prompt user for each credential interactively.
# Returns 1 if any input is empty or read fails.
_multi_creds_prompt() {
    local provider_name="${1}"
    local help_url="${2}"
    shift 2
    local env_count="${1}"
    shift
    local env_vars=("${@:1:$env_count}")
    shift "${env_count}"
    local labels=("$@")

    echo ""
    log_step "${provider_name} API Credentials Required"
    log_step "Get your credentials from: ${help_url}"
    echo ""

    local idx
    for idx in $(seq 0 $((${#env_vars[@]} - 1))); do
        # SECURITY: Validate env var name before export
        if [[ ! "${env_vars[$idx]}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then
            log_error "SECURITY: Invalid env var name rejected: ${env_vars[$idx]}"
            return 1
        fi
        local val
        val=$(safe_read "Enter ${provider_name} ${labels[$idx]}: ") || return 1
        if [[ -z "${val}" ]]; then
            log_error "${labels[$idx]} is required"
            return 1
        fi
        export "${env_vars[$idx]}=${val}"
    done
    return 0
}

# Validate multi-credentials using a test function.
# Unsets all env vars on failure.
_multi_creds_validate() {
    local test_func="${1}"
    local provider_name="${2}"
    local help_url="${3}"
    shift 3

    if [[ -z "${test_func}" ]]; then
        return 0
    fi

    log_step "Testing ${provider_name} credentials..."
    if ! "${test_func}"; then
        log_error "Invalid ${provider_name} credentials"
        log_error "The credentials may be expired, revoked, or incorrectly copied."
        log_error ""
        log_error "How to fix:"
        log_error "  1. Get new credentials from: ${help_url}"
        log_error "  2. Re-run the command and enter the new credentials"
        local v
        for v in "$@"; do
            unset "${v}"
        done
        return 1
    fi
    return 0
}

# Generic multi-credential ensure function
# Eliminates duplicated env-var/config/prompt/test/save logic across providers
# that need more than one credential (username+password, client_id+secret, etc.)
#
# Usage: ensure_multi_credentials PROVIDER_NAME CONFIG_FILE HELP_URL TEST_FUNC \
#          "ENV_VAR:config_key:Prompt Label" ...
#
# Each credential spec is a colon-delimited triple:
#   ENV_VAR    - Environment variable name (e.g., CONTABO_CLIENT_ID)
#   config_key - JSON key in the config file (e.g., client_id)
#   Prompt Label - Human-readable label for prompting (e.g., "Client ID")
ensure_multi_credentials() {
    local provider_name="${1}"
    local config_file="${2}"
    local help_url="${3}"
    local test_func="${4:-}"
    shift 4

    check_python_available || return 1

    # Parse credential specs into parallel arrays
    local env_vars=() config_keys=() labels=()
    local spec
    for spec in "$@"; do
        env_vars+=("${spec%%:*}")
        local rest="${spec#*:}"
        config_keys+=("${rest%%:*}")
        labels+=("${rest#*:}")
    done

    local n="${#env_vars[@]}"

    # 1. All env vars already set?
    if _multi_creds_all_env_set "${env_vars[@]}"; then
        log_info "Using ${provider_name} credentials from environment"
        return 0
    fi

    # 2. Try loading from config file
    if _multi_creds_load_config "${config_file}" "${n}" "${env_vars[@]}" "${config_keys[@]}"; then
        log_info "Using ${provider_name} credentials from ${config_file}"
        return 0
    fi

    # 3. Prompt for each credential
    _multi_creds_prompt "${provider_name}" "${help_url}" "${n}" "${env_vars[@]}" "${labels[@]}" || return 1

    # 4. Validate credentials
    _multi_creds_validate "${test_func}" "${provider_name}" "${help_url}" "${env_vars[@]}" || return 1

    # 5. Save to config file
    local save_args=()
    local idx
    for idx in $(seq 0 $((n - 1))); do
        save_args+=("${config_keys[$idx]}" "${!env_vars[$idx]}")
    done
    _save_json_config "${config_file}" "${save_args[@]}"
    return 0
}

# ============================================================
# Configuration file helpers
# ============================================================

# Helper to create, upload, and install a config file from a heredoc or string
# Usage: upload_config_file UPLOAD_CALLBACK RUN_CALLBACK CONTENT REMOTE_PATH
# Example: upload_config_file "$upload_func" "$run_func" "$json_content" "\$HOME/.config/app.json"
upload_config_file() {
    local upload_callback="${1}"
    local run_callback="${2}"
    local content="${3}"
    local remote_path="${4}"

    local temp_file
    temp_file=$(mktemp)
    chmod 600 "${temp_file}"
    track_temp_file "${temp_file}"

    printf '%s\n' "${content}" > "${temp_file}"

    # Use mktemp-derived randomness for the remote temp path to avoid predictable names
    local rand_suffix
    rand_suffix=$(basename "${temp_file}")
    local temp_remote="/tmp/spawn_config_${rand_suffix}"
    ${upload_callback} "${temp_file}" "${temp_remote}"
    # SECURITY: remote_path must be double-quoted to prevent injection via spaces/metacharacters
    # Note: Callers should use $HOME instead of ~ since tilde does not expand inside double quotes
    ${run_callback} "mkdir -p \$(dirname \"${remote_path}\") && chmod 600 '${temp_remote}' && mv '${temp_remote}' \"${remote_path}\""
}

# ============================================================
# Claude Code configuration setup
# ============================================================

# Setup Claude Code configuration files (settings.json, .claude.json, CLAUDE.md)
# This consolidates the config setup pattern used by all claude.sh scripts
# Usage: setup_claude_code_config OPENROUTER_KEY UPLOAD_CALLBACK RUN_CALLBACK
#
# Arguments:
#   OPENROUTER_KEY    - OpenRouter API key to inject into config
#   UPLOAD_CALLBACK   - Function to upload files: func(local_path, remote_path)
#   RUN_CALLBACK      - Function to run commands: func(command)
#
# Example (SSH-based clouds):
#   setup_claude_code_config "$OPENROUTER_API_KEY" \
#     "upload_file $SERVER_IP" \
#     "run_server $SERVER_IP"
#
# Example (Sprite):
#   setup_claude_code_config "$OPENROUTER_API_KEY" \
#     "upload_file_sprite $SPRITE_NAME" \
#     "run_sprite $SPRITE_NAME"

# Generate Claude Code settings.json with API key
_generate_claude_code_settings() {
    local openrouter_key="${1}"
    local escaped_key
    escaped_key=$(json_escape "${openrouter_key}")
    cat << EOF
{
  "theme": "dark",
  "editor": "vim",
  "env": {
    "CLAUDE_CODE_ENABLE_TELEMETRY": "0",
    "ANTHROPIC_BASE_URL": "https://openrouter.ai/api",
    "ANTHROPIC_AUTH_TOKEN": ${escaped_key}
  },
  "permissions": {
    "defaultMode": "bypassPermissions",
    "dangerouslySkipPermissions": true
  }
}
EOF
}

# Generate Claude Code global state JSON
_generate_claude_code_state() {
    cat << EOF
{
  "hasCompletedOnboarding": true,
  "bypassPermissionsModeAccepted": true
}
EOF
}

setup_claude_code_config() {
    local openrouter_key="${1}"
    local upload_callback="${2}"
    local run_callback="${3}"

    log_step "Configuring Claude Code..."

    # Create ~/.claude directory
    ${run_callback} "mkdir -p ~/.claude"

    # Create settings.json
    local settings_json
    settings_json=$(_generate_claude_code_settings "${openrouter_key}")
    upload_config_file "${upload_callback}" "${run_callback}" "${settings_json}" "\$HOME/.claude/settings.json"

    # Create .claude.json global state
    local global_state_json
    global_state_json=$(_generate_claude_code_state)
    upload_config_file "${upload_callback}" "${run_callback}" "${global_state_json}" "\$HOME/.claude.json"

    # Create empty CLAUDE.md
    ${run_callback} "touch ~/.claude/CLAUDE.md"
}

# ============================================================
# OpenClaw configuration setup
# ============================================================

# Setup OpenClaw configuration files (openclaw.json)
# This consolidates the config setup pattern used by all openclaw.sh scripts
# Usage: setup_openclaw_config OPENROUTER_KEY MODEL_ID UPLOAD_CALLBACK RUN_CALLBACK
#
# Arguments:
#   OPENROUTER_KEY    - OpenRouter API key to inject into config
#   MODEL_ID          - Model ID to use (e.g., "openrouter/auto", "anthropic/claude-3.5-sonnet")
#   UPLOAD_CALLBACK   - Function to upload files: func(local_path, remote_path)
#   RUN_CALLBACK      - Function to run commands: func(command)
#
# Example (SSH-based clouds):
#   setup_openclaw_config "$OPENROUTER_API_KEY" "$MODEL_ID" \
#     "upload_file $SERVER_IP" \
#     "run_server $SERVER_IP"
#
# Example (Sprite):
#   setup_openclaw_config "$OPENROUTER_API_KEY" "$MODEL_ID" \
#     "upload_file_sprite $SPRITE_NAME" \
#     "run_sprite $SPRITE_NAME"
# Generate openclaw.json configuration with escaped credentials
_generate_openclaw_json() {
    local openrouter_key="${1}"
    local model_id="${2}"
    local gateway_token="${3}"

    local escaped_key escaped_token escaped_model
    escaped_key=$(json_escape "${openrouter_key}")
    escaped_token=$(json_escape "${gateway_token}")
    escaped_model=$(json_escape "${model_id}")

    cat << EOF
{
  "env": {
    "OPENROUTER_API_KEY": ${escaped_key}
  },
  "gateway": {
    "mode": "local",
    "auth": {
      "token": ${escaped_token}
    }
  },
  "agents": {
    "defaults": {
      "model": {
        "primary": ${escaped_model}
      }
    }
  }
}
EOF
}

setup_openclaw_config() {
    local openrouter_key="${1}"
    local model_id="${2}"
    local upload_callback="${3}"
    local run_callback="${4}"

    log_step "Configuring openclaw..."

    # Create ~/.openclaw directory
    ${run_callback} "mkdir -p ~/.openclaw"

    # Generate a random gateway token
    local gateway_token
    gateway_token=$(openssl rand -hex 16)

    # Create and upload openclaw.json config
    local openclaw_json
    openclaw_json=$(_generate_openclaw_json "${openrouter_key}" "${model_id}" "${gateway_token}")
    upload_config_file "${upload_callback}" "${run_callback}" "${openclaw_json}" "\$HOME/.openclaw/openclaw.json"
}

# Start OpenClaw gateway as a fully detached daemon
# Usage: start_openclaw_gateway RUN_CALLBACK
#
# Arguments:
#   RUN_CALLBACK - Function to run commands: func(command)
#
# SSH/exec channels hang if a backgrounded daemon inherits the session's file
# descriptors. setsid creates a new session, fully detaching the gateway so
# the channel can close. Falls back to nohup where setsid is unavailable
# (e.g. macOS local — no SSH, so the hang doesn't apply).
start_openclaw_gateway() {
    local run_callback="${1}"
    log_step "Starting OpenClaw gateway daemon..."
    ${run_callback} "source ~/.spawnrc 2>/dev/null; export PATH=\$(npm prefix -g 2>/dev/null)/bin:\$HOME/.bun/bin:/.sprite/languages/bun/bin:\$HOME/.local/bin:\$PATH; if command -v setsid >/dev/null 2>&1; then setsid openclaw gateway > /tmp/openclaw-gateway.log 2>&1 < /dev/null & else nohup openclaw gateway > /tmp/openclaw-gateway.log 2>&1 < /dev/null & fi"
}

# Wait for OpenClaw gateway to be ready
# Usage: wait_for_openclaw_gateway RUN_CALLBACK
#
# Arguments:
#   RUN_CALLBACK - Function to run commands: func(command)
#
# Returns:
#   0 if gateway starts successfully, 1 if timeout
wait_for_openclaw_gateway() {
    local run_callback="${1}"
    local max_wait=60
    local elapsed=0

    log_step "Waiting for OpenClaw gateway to start..."

    while [ $elapsed -lt $max_wait ]; do
        if ${run_callback} "nc -z 127.0.0.1 18789 2>/dev/null || (command -v telnet >/dev/null && timeout 1 telnet 127.0.0.1 18789 2>&1 | grep -q Connected)"; then
            log_info "Gateway ready after ${elapsed}s"
            return 0
        fi
        sleep 1
        elapsed=$((elapsed + 1))
    done

    log_error "OpenClaw gateway failed to start after ${max_wait}s"
    log_info "Check gateway logs: cat /tmp/openclaw-gateway.log"
    ${run_callback} "tail -10 /tmp/openclaw-gateway.log 2>/dev/null" || true
    return 1
}

# ============================================================
# Codex CLI configuration setup
# ============================================================

# Setup Codex CLI config.toml for OpenRouter
# Uses the native model_provider config instead of OPENAI_BASE_URL env var,
# which fixes "Invalid Responses API request" errors with OpenRouter.
# Usage: setup_codex_config OPENROUTER_KEY UPLOAD_CALLBACK RUN_CALLBACK
setup_codex_config() {
    local openrouter_key="${1}"
    local upload_callback="${2}"
    local run_callback="${3}"

    log_step "Configuring Codex CLI for OpenRouter..."

    local config_toml
    config_toml=$(cat <<TOML
model = "openai/gpt-5-codex"
model_provider = "openrouter"

[model_providers.openrouter]
name = "OpenRouter"
base_url = "https://openrouter.ai/api/v1"
env_key = "OPENROUTER_API_KEY"
wire_api = "responses"
TOML
)

    upload_config_file "${upload_callback}" "${run_callback}" "${config_toml}" "\$HOME/.codex/config.toml"
}

# ============================================================
# Continue configuration setup
# ============================================================

# Setup Continue configuration files (config.json)
# This consolidates the config setup pattern used by all continue.sh scripts
# Usage: setup_continue_config OPENROUTER_KEY UPLOAD_CALLBACK RUN_CALLBACK
#
# Arguments:
#   OPENROUTER_KEY    - OpenRouter API key to inject into config
#   UPLOAD_CALLBACK   - Function to upload files: func(local_path, remote_path)
#   RUN_CALLBACK      - Function to run commands: func(command)
#
# Example (SSH-based clouds):
#   setup_continue_config "$OPENROUTER_API_KEY" \
#     "upload_file $SERVER_IP" \
#     "run_server $SERVER_IP"
#
# Example (container clouds):
#   setup_continue_config "$OPENROUTER_API_KEY" \
#     "upload_file" \
#     "run_server"
setup_continue_config() {
    local openrouter_key="${1}"
    local upload_callback="${2}"
    local run_callback="${3}"

    log_step "Configuring Continue..."

    # Create ~/.continue directory
    ${run_callback} "mkdir -p ~/.continue"

    # Create config.json with json_escape to prevent injection
    local escaped_key
    escaped_key=$(json_escape "${openrouter_key}")
    local continue_json
    continue_json=$(cat << EOF
{
  "models": [
    {
      "title": "OpenRouter",
      "provider": "openrouter",
      "model": "openrouter/auto",
      "apiBase": "https://openrouter.ai/api/v1",
      "apiKey": ${escaped_key}
    }
  ]
}
EOF
)
    upload_config_file "${upload_callback}" "${run_callback}" "${continue_json}" "\$HOME/.continue/config.json"
}

# ============================================================
# Interactive selection helpers
# ============================================================

# Generic interactive picker for numbered menu selection
# Eliminates duplicate _pick_location/_pick_server_type patterns across providers
#
# Usage: interactive_pick ENV_VAR_NAME DEFAULT_VALUE PROMPT_TEXT LIST_CALLBACK [FORMAT_CALLBACK]
#
# Arguments:
#   ENV_VAR_NAME     - Environment variable to check first (e.g., "HETZNER_LOCATION")
#   DEFAULT_VALUE    - Default value if env var unset and list is empty or choice invalid
#   PROMPT_TEXT      - Label shown above the menu (e.g., "locations", "server types")
#   LIST_CALLBACK    - Function that outputs pipe-delimited lines (first field = ID)
#   DEFAULT_ID       - Optional: ID to pre-select as default (e.g., "cx23")
#
# LIST_CALLBACK must output pipe-delimited lines where the first field is the selectable ID.
# Example output: "fsn1|Falkenstein|DE" or "cx23|2 vCPU|4 GB RAM|40 GB disk"
#
# Display a numbered list and read user selection
# Pipe-delimited items: "id|label". Returns selected id via stdout.
# Usage: _display_and_select PROMPT_TEXT DEFAULT_VALUE DEFAULT_ID <<< "$items"
_fzf_select() {
    local prompt_text="${1}"
    local default_value="${2}"
    local default_id="${3}"
    local fzf_input="${4}"
    local default_line="${5}"

    log_step "Select ${prompt_text%s} (type to filter):"

    # Run fzf with default selection
    local selected
    if [[ -n "${default_line}" ]]; then
        selected=$(printf '%s' "${fzf_input}" | fzf --height=~50% --reverse --prompt="Select > " --query="" --select-1 --exit-0 --header="Press ESC to use default (${default_id})" --print-query --query="${default_line%%$'\t'*}" | tail -1)
    else
        selected=$(printf '%s' "${fzf_input}" | fzf --height=~50% --reverse --prompt="Select > " --select-1 --exit-0)
    fi

    # If fzf was cancelled or returned nothing, use default
    if [[ -z "${selected}" ]]; then
        log_info "Using default: ${default_value}"
        echo "${default_value}"
        return
    fi

    # Extract ID from selected line
    local selected_id="${selected%%$'\t'*}"
    echo "${selected_id}"
}

_prepare_fzf_input() {
    local default_id="${1}"
    shift
    local items_array=("$@")

    local fzf_input=""
    local default_line=""
    for line in "${items_array[@]}"; do
        local id="${line%%|*}"
        local display
        display=$(echo "${line}" | tr '|' '\t')
        fzf_input+="${display}"$'\n'
        if [[ -n "${default_id}" && "${id}" == "${default_id}" ]]; then
            default_line="${display}"
        fi
    done

    # Return via globals (bash doesn't have good multi-value returns)
    FZF_INPUT="${fzf_input}"
    FZF_DEFAULT_LINE="${default_line}"
}

_numbered_list_select() {
    local prompt_text="${1}"
    local default_value="${2}"
    local default_id="${3}"
    shift 3
    local items_array=("$@")

    log_step "Available ${prompt_text}:"
    local i=1
    local ids=()
    local default_idx=1
    for line in "${items_array[@]}"; do
        local id="${line%%|*}"
        printf "  %2d) %s\n" "${i}" "$(echo "${line}" | tr '|' '\t')" >&2
        ids+=("${id}")
        if [[ -n "${default_id}" && "${id}" == "${default_id}" ]]; then
            default_idx=${i}
        fi
        i=$((i + 1))
    done

    local choice
    printf "\n" >&2
    choice=$(safe_read "Select ${prompt_text%s} [${default_idx}]: ") || choice=""
    choice="${choice:-${default_idx}}"

    if [[ "${choice}" -ge 1 && "${choice}" -le "${#ids[@]}" ]] 2>/dev/null; then
        echo "${ids[$((choice - 1))]}"
    else
        log_warn "Invalid selection '${choice}' (enter a number between 1 and ${#ids[@]}). Using default: ${default_value}"
        echo "${default_value}"
    fi
}

_display_and_select() {
    local prompt_text="${1}"
    local default_value="${2}"
    local default_id="${3:-}"

    # Read all items into array
    local items_array=()
    while IFS= read -r line; do
        items_array+=("${line}")
    done

    if [[ "${#items_array[@]}" -eq 0 ]]; then
        log_warn "No ${prompt_text} available, using default: ${default_value}"
        echo "${default_value}"
        return
    fi

    # Try to use fzf for interactive filtering if available and stdin is a TTY
    if command -v fzf >/dev/null 2>&1 && [[ -t 0 ]]; then
        _prepare_fzf_input "${default_id}" "${items_array[@]}"
        _fzf_select "${prompt_text}" "${default_value}" "${default_id}" "${FZF_INPUT}" "${FZF_DEFAULT_LINE}"
        return
    fi

    # Try spawn pick for an arrow-key UI (available when the user ran `spawn`)
    if command -v spawn >/dev/null 2>&1; then
        # Convert pipe-delimited "id|label|extra..." → "id\tid\tlabel · extra · ..."
        # so spawn pick shows the id as label and all detail fields as hint.
        local spawn_input
        spawn_input=$(printf '%s\n' "${items_array[@]}" | awk -F'|' '{
            val=$1; hint="";
            for (i=2; i<=NF; i++) { hint = hint (hint ? " \xc2\xb7 " : "") $i }
            printf "%s\t%s\t%s\n", val, val, hint
        }')
        local picked
        local spawn_default="${default_id:-${default_value}}"
        picked=$(printf '%s\n' "${spawn_input}" | \
            spawn pick --prompt "Select ${prompt_text}" --default "${spawn_default}") && {
            echo "${picked}"
            return
        }
    fi

    # Fallback to numbered list when neither fzf nor spawn pick is available
    _numbered_list_select "${prompt_text}" "${default_value}" "${default_id}" "${items_array[@]}"
}

# Returns: selected ID via stdout
interactive_pick() {
    local env_var_name="${1}"
    local default_value="${2}"
    local prompt_text="${3}"
    local list_callback="${4}"
    local default_id="${5:-}"

    # Check environment variable first
    local env_value="${!env_var_name:-}"
    if [[ -n "${env_value}" ]]; then
        echo "${env_value}"
        return
    fi

    log_step "Fetching available ${prompt_text}..."
    local items
    items=$("${list_callback}")

    if [[ -z "${items}" ]]; then
        log_warn "Could not fetch ${prompt_text}, using default: ${default_value}"
        echo "${default_value}"
        return
    fi

    _display_and_select "${prompt_text}" "${default_value}" "${default_id}" <<< "${items}"
}

# ============================================================
# SSH key registration helpers
# ============================================================

# Generic SSH key check: queries the provider's API and greps for the fingerprint.
# Most providers follow this exact pattern. Use this to avoid duplicating 5-line
# check functions across every cloud lib.
# Usage: check_ssh_key_by_fingerprint API_FUNC ENDPOINT FINGERPRINT
# Example: check_ssh_key_by_fingerprint hetzner_api "/ssh_keys" "$fingerprint"
check_ssh_key_by_fingerprint() {
    local api_func="${1}"
    local endpoint="${2}"
    local fingerprint="${3}"

    local existing_keys
    existing_keys=$("${api_func}" GET "${endpoint}")
    echo "${existing_keys}" | grep -q "${fingerprint}"
}

# Generic SSH key registration pattern used by all cloud providers
# Eliminates ~220 lines of duplicate code across 5 provider libraries
#
# Usage: ensure_ssh_key_with_provider \
#          CHECK_CALLBACK \
#          REGISTER_CALLBACK \
#          PROVIDER_NAME \
#          [KEY_PATH]
#
# Arguments:
#   CHECK_CALLBACK    - Function that checks if SSH key exists with provider
#                       Should return 0 if key exists, 1 if not
#                       Function receives: fingerprint, pub_key_path
#   REGISTER_CALLBACK - Function that registers SSH key with provider
#                       Should return 0 on success, 1 on error
#                       Function receives: key_name, pub_key_path
#   PROVIDER_NAME     - Display name of the provider (for logging)
#   KEY_PATH          - Optional: Path to SSH private key (default: $HOME/.ssh/id_ed25519)
#
# Example:
#   ensure_ssh_key_with_provider \
#     hetzner_check_ssh_key \
#     hetzner_register_ssh_key \
#     "Hetzner"
#
# Callback implementations should use provider-specific API calls but follow
# this contract to enable shared logic for key generation and registration flow.
ensure_ssh_key_with_provider() {
    local check_callback="${1}"
    local register_callback="${2}"
    local provider_name="${3}"
    local key_path="${4:-${HOME}/.ssh/id_ed25519}"
    local pub_path="${key_path}.pub"

    # Generate key if needed (shared function)
    generate_ssh_key_if_missing "${key_path}"

    # Get fingerprint (shared function)
    local fingerprint
    fingerprint=$(get_ssh_fingerprint "${pub_path}")

    # Check if already registered (provider-specific)
    if "${check_callback}" "${fingerprint}" "${pub_path}"; then
        log_info "SSH key already registered with ${provider_name}"
        return 0
    fi

    # Register the key (provider-specific)
    log_step "Registering SSH key with ${provider_name}..."
    local key_name
    key_name="spawn-$(hostname)-$(date +%s)"

    if "${register_callback}" "${key_name}" "${pub_path}"; then
        log_info "SSH key registered with ${provider_name}"
        return 0
    else
        log_error "Failed to register SSH key with ${provider_name}"
        log_error "The API may have rejected the key format or the token lacks write permissions."
        log_error "Verify your API token has SSH key management permissions, then try again."
        return 1
    fi
}

# ============================================================
# Agent install commands (run remotely on provisioned servers)
# ============================================================

# Robust OpenCode install command that downloads to a file first instead of
# piping curl|tar, which breaks in container exec environments (Sprite, E2B,
# Modal, Daytona) where the binary stream can get corrupted through the exec
# layer. The upstream installer's "curl -#" flag also interferes in non-TTY
# environments.
opencode_install_cmd() {
    printf '%s' 'OC_ARCH=$(uname -m); case "$OC_ARCH" in aarch64) OC_ARCH=arm64;; x86_64) OC_ARCH=x64;; esac; OC_OS=$(uname -s | tr A-Z a-z); mkdir -p /tmp/opencode-install "$HOME/.opencode/bin" && curl -fsSL -o /tmp/opencode-install/oc.tar.gz "https://github.com/anomalyco/opencode/releases/latest/download/opencode-${OC_OS}-${OC_ARCH}.tar.gz" && tar xzf /tmp/opencode-install/oc.tar.gz -C /tmp/opencode-install && mv /tmp/opencode-install/opencode "$HOME/.opencode/bin/" && rm -rf /tmp/opencode-install && grep -q ".opencode/bin" "$HOME/.bashrc" 2>/dev/null || echo '"'"'export PATH="$HOME/.opencode/bin:$PATH"'"'"' >> "$HOME/.bashrc"; grep -q ".opencode/bin" "$HOME/.zshrc" 2>/dev/null || echo '"'"'export PATH="$HOME/.opencode/bin:$PATH"'"'"' >> "$HOME/.zshrc" 2>/dev/null; export PATH="$HOME/.opencode/bin:$PATH"'
}

# ============================================================
# VM Connection Tracking
# ============================================================

# Save VM connection info for spawn list reconnect functionality.
# This allows users to reconnect to previously spawned VMs via `spawn list`.
# Usage: save_vm_connection IP USER [SERVER_ID] [SERVER_NAME] [CLOUD] [METADATA_JSON]
# Example: save_vm_connection "$DO_SERVER_IP" "root" "$DO_DROPLET_ID" "$DROPLET_NAME" "digitalocean"
# Example: save_vm_connection "$GCP_IP" "root" "" "$NAME" "gcp" '{"zone":"us-central1-a"}'
save_vm_connection() {
    local ip="${1}"
    local user="${2}"
    local server_id="${3:-}"
    local server_name="${4:-}"
    local cloud="${5:-}"
    local metadata="${6:-}"

    local spawn_dir="${HOME}/.spawn"
    mkdir -p "${spawn_dir}"

    local conn_file="${spawn_dir}/last-connection.json"

    # Build JSON using json_escape to prevent injection via special characters
    local json="{\"ip\":$(json_escape "${ip}"),\"user\":$(json_escape "${user}")"
    if [[ -n "${server_id}" ]]; then
        json="${json},\"server_id\":$(json_escape "${server_id}")"
    fi
    if [[ -n "${server_name}" ]]; then
        json="${json},\"server_name\":$(json_escape "${server_name}")"
    fi
    if [[ -n "${cloud}" ]]; then
        json="${json},\"cloud\":$(json_escape "${cloud}")"
    fi
    if [[ -n "${metadata}" ]]; then
        json="${json},\"metadata\":${metadata}"
    fi
    json="${json}}"

    printf '%s\n' "${json}" > "${conn_file}"
}

# ============================================================
# Auto-initialization
# ============================================================

# Auto-register cleanup trap when this file is sourced
register_cleanup_trap