You can close this tab and return to your terminal.
Invalid or missing state parameter (CSRF protection). Please try again.
The authorization code format is invalid.
'); - setTimeout(() => { server.close(); process.exit(1); }, 500); - return; - } - fs.writeFileSync('${code_file}', code); - res.writeHead(200, {'Content-Type':'text/html','Connection':'close'}); - res.end(html); - setTimeout(() => { server.close(); process.exit(0); }, 500); - } else { - res.writeHead(200, {'Content-Type':'text/html'}); - res.end('Waiting for OAuth callback...'); - } -}); -let currentPort = ${starting_port}; -const maxPort = ${starting_port} + 10; -function tryListen() { - server.listen(currentPort, '127.0.0.1', () => { - fs.writeFileSync('${port_file}', currentPort.toString()); - fs.writeFileSync('/dev/fd/1', ''); - }); -} -server.on('error', (err) => { - if (err.code === 'EADDRINUSE' && currentPort < maxPort) { - currentPort++; - tryListen(); - } else { - process.exit(1); - } -}); -setTimeout(() => process.exit(0), 300000); -tryListen(); -" -} - -# Start OAuth callback server using Node.js/Bun HTTP server -# Proper HTTP server — handles multiple connections, favicon requests, etc. -# Tries a range of ports if the initial port is busy -# $1=starting_port $2=code_file $3=port_file (writes actual port used) $4=state_file (CSRF token) -# Returns: server PID -# SECURITY: Validates port number and CSRF state parameter -start_oauth_server() { - local starting_port="${1}" - local code_file="${2}" - local port_file="${3}" - local state_file="${4}" - - _validate_oauth_server_args "${starting_port}" "${state_file}" || return 1 - - _generate_oauth_html - local script - script=$(_generate_oauth_server_script "${OAUTH_STATE}" "${OAUTH_SUCCESS_HTML}" "${OAUTH_ERROR_HTML}" \ - "${code_file}" "${port_file}" "${starting_port}") - - "${OAUTH_RUNTIME}" -e "${script}" /dev/null 2>&1 & - - echo $! -} - -# Wait for OAuth code with timeout, returns 0 if code received -wait_for_oauth_code() { - local code_file="${1}" - local timeout="${2:-120}" - local elapsed=0 - - log_step "Waiting for authentication in browser (this usually takes 10-30 seconds, timeout: ${timeout}s)..." - while [[ ! -f "${code_file}" ]] && [[ ${elapsed} -lt ${timeout} ]]; do - sleep "${POLL_INTERVAL}" - # Use bun for float addition since bash arithmetic only handles integers - # If POLL_INTERVAL is 0.5, bash $(( )) would fail. Fallback keeps timeout working. - if command -v bun &>/dev/null; then - elapsed=$(_E="${elapsed}" _P="${POLL_INTERVAL}" bun -e "process.stdout.write(String(Math.floor(Number(process.env._E) + Number(process.env._P))))" 2>/dev/null || echo "$((elapsed + 1))") - else - # No bun available - fall back to integer seconds (may timeout early with fractional POLL_INTERVAL) - elapsed=$((elapsed + 1)) - fi - done - - [[ -f "${code_file}" ]] -} - -# Exchange OAuth code for API key -exchange_oauth_code() { - local oauth_code="${1}" - - # SECURITY: Use json_escape to prevent JSON injection via crafted OAuth codes - local escaped_code - escaped_code=$(json_escape "${oauth_code}") - - local key_response curl_exit - key_response=$(curl -s --max-time 30 -X POST "https://openrouter.ai/api/v1/auth/keys" \ - -H "Content-Type: application/json" \ - -d "{\"code\": ${escaped_code}}" 2>&1) - curl_exit=$? - - if [[ ${curl_exit} -ne 0 ]]; then - log_error "Failed to contact OpenRouter API (curl exit code: ${curl_exit})" - log_warn "This may indicate a network issue or temporary service outage" - log_warn "Please check your internet connection and try again" - return 1 - fi - - local api_key - api_key=$(echo "${key_response}" | grep -o '"key":"[^"]*"' | sed 's/"key":"//;s/"$//') - - if [[ -z "${api_key}" ]]; then - log_error "Failed to exchange OAuth code for API key" - log_warn "Server response: ${key_response}" - log_warn "This may indicate the OAuth code expired or was already used" - log_warn "Please try again, or set OPENROUTER_API_KEY manually" - return 1 - fi - - echo "${api_key}" -} - -# Clean up OAuth session resources -cleanup_oauth_session() { - local server_pid="${1}" - local oauth_dir="${2}" - - if [[ -n "${server_pid}" ]]; then - # Verify PID still exists before killing to prevent race conditions - if kill -0 "${server_pid}" 2>/dev/null; then - # Kill process group to catch any child processes (netcat listeners, etc) - kill -TERM "-${server_pid}" 2>/dev/null || kill "${server_pid}" 2>/dev/null || true - # Give it time to shut down gracefully - sleep 0.5 - # Force kill if still running - kill -KILL "-${server_pid}" 2>/dev/null || true - wait "${server_pid}" 2>/dev/null || true - fi - fi - - # SAFETY: Validate path before rm -rf to prevent accidental deletion of system directories - # Only delete if: - # 1. Variable is non-empty - # 2. Directory exists - # 3. Path starts with /tmp/ (mktemp always creates in /tmp) - # 4. Path contains more than just /tmp (prevent rm -rf /tmp) - if [[ -n "${oauth_dir}" && -d "${oauth_dir}" && "${oauth_dir}" == /tmp/* && "${oauth_dir}" != "/tmp" && "${oauth_dir}" != "/tmp/" ]]; then - rm -rf "${oauth_dir}" - fi -} - -# Check network connectivity to OpenRouter -# Returns 0 if reachable, 1 if network is unreachable -check_openrouter_connectivity() { - local host="openrouter.ai" - local port="443" - local timeout=5 - - # Try curl with short timeout if available - if command -v curl &> /dev/null; then - if curl -s --connect-timeout "${timeout}" --max-time "${timeout}" "https://${host}" -o /dev/null 2>/dev/null; then - return 0 - fi - fi - - # Fallback to nc/telnet test - if command -v nc &> /dev/null; then - if timeout "${timeout}" nc -z "${host}" "${port}" 2>/dev/null; then - return 0 - fi - elif command -v timeout &> /dev/null && command -v bash &> /dev/null; then - # Bash TCP socket test as last resort - if timeout "${timeout}" bash -c "exec 3<>/dev/tcp/${host}/${port}" 2>/dev/null; then - return 0 - fi - fi - - return 1 -} - -# Start OAuth server and wait for it to be ready -# Returns: "port_number" on success, "" on failure (cleanup handled by caller) -start_and_verify_oauth_server() { - local callback_port="${1}" - local code_file="${2}" - local port_file="${3}" - local state_file="${4}" - local server_pid="${5}" - - sleep "${POLL_INTERVAL}" - if ! kill -0 "${server_pid}" 2>/dev/null; then - log_warn "Failed to start OAuth server - ports ${callback_port}-$((callback_port + 10)) may be in use" - log_warn "Try closing other dev servers or set OPENROUTER_API_KEY to skip OAuth" - return 1 - fi - - # Wait for port file to be created (server successfully bound to a port) - local wait_count=0 - while [[ ! -f "${port_file}" ]] && [[ ${wait_count} -lt 10 ]]; do - sleep 0.2 - wait_count=$((wait_count + 1)) - done - - if [[ ! -f "${port_file}" ]]; then - log_warn "OAuth server failed to allocate a port after 2 seconds" - log_warn "Another process may be using ports ${callback_port}-$((callback_port + 10))" - return 1 - fi - - cat "${port_file}" -} - -# Validate OAuth prerequisites (network, Node.js runtime) -# Returns 0 if all checks pass, 1 otherwise -_check_oauth_prerequisites() { - if ! check_openrouter_connectivity; then - log_warn "Cannot reach openrouter.ai - network may be unavailable" - log_warn "Please check your internet connection and try again" - log_warn "Alternatively, set OPENROUTER_API_KEY in your environment to skip OAuth" - return 1 - fi - - local runtime - runtime=$(find_node_runtime) - if [[ -z "${runtime}" ]]; then - log_warn "No Node.js runtime (bun/node) found - required for the OAuth callback server" - log_warn "Install one with: brew install node OR curl -fsSL https://bun.sh/install | bash" - return 1 - fi - - return 0 -} - -# Start OAuth server and return actual port, cleanup on failure -# Sets server_pid and returns 0 on success, 1 on failure -_setup_oauth_server() { - local callback_port="${1}" - local code_file="${2}" - local port_file="${3}" - local state_file="${4}" - local pid_file="${5}" - - log_step "Starting local OAuth server (trying ports ${callback_port}-$((callback_port + 10)))..." - local server_pid - server_pid=$(start_oauth_server "${callback_port}" "${code_file}" "${port_file}" "${state_file}") - - # Persist server PID to file for reliable retrieval - if [[ -n "${pid_file}" && -n "${server_pid}" ]]; then - printf '%s' "${server_pid}" > "${pid_file}" - fi - - local actual_port - actual_port=$(start_and_verify_oauth_server "${callback_port}" "${code_file}" "${port_file}" "${state_file}" "${server_pid}") - if [[ -z "${actual_port}" ]]; then - return 1 - fi - - log_info "OAuth server listening on port ${actual_port}" - echo "${actual_port}" - return 0 -} - -# Wait for OAuth code with timeout and cleanup on failure -# Returns 0 on success, 1 on failure -_wait_for_oauth() { - local code_file="${1}" - - if ! wait_for_oauth_code "${code_file}" 120; then - log_warn "OAuth timeout - no response received" - return 1 - fi - return 0 -} - -# Try OAuth flow (orchestrates the helper functions above) -# SECURITY: Generates CSRF state token to prevent OAuth code interception -_generate_csrf_state() { - if command -v openssl &>/dev/null; then - openssl rand -hex 16 - elif [[ -r /dev/urandom ]]; then - od -An -N16 -tx1 /dev/urandom | tr -d ' \n' - else - log_error "Cannot generate secure CSRF token: neither openssl nor /dev/urandom available" - log_error "Install openssl or ensure /dev/urandom is readable" - return 1 - fi -} - -# Create temp directory with OAuth session files and CSRF state -_init_oauth_session() { - local oauth_dir - oauth_dir=$(mktemp -d) || { - log_error "Failed to create temporary directory for OAuth session" - log_error "Check disk space and /tmp permissions" - return 1 - } - - # SAFETY: Verify mktemp succeeded before proceeding - if [[ -z "${oauth_dir}" || ! -d "${oauth_dir}" ]]; then - log_error "Failed to create temporary directory for OAuth session" - log_error "Check disk space and /tmp permissions" - return 1 - fi - - # SECURITY: Generate random CSRF state token (32 hex chars = 128 bits) - local csrf_state - csrf_state=$(_generate_csrf_state) - printf '%s' "${csrf_state}" > "${oauth_dir}/state" || { - rm -rf "${oauth_dir}" - log_error "Failed to write OAuth state file" - return 1 - } - chmod 600 "${oauth_dir}/state" - - echo "${oauth_dir}" -} - -# Open browser and wait for OAuth callback, returning the auth code -# Outputs the OAuth code on success, returns 1 on timeout -_await_oauth_callback() { - local code_file="${1}" - local server_pid="${2}" - local oauth_dir="${3}" - local actual_port="${4}" - local csrf_state="${5}" - local spawn_agent_slug="${6:-}" - local spawn_cloud_slug="${7:-}" - - local callback_url="http://localhost:${actual_port}/callback" - local auth_url="https://openrouter.ai/auth?callback_url=${callback_url}&state=${csrf_state}" - if [[ -n "${spawn_agent_slug}" ]]; then auth_url="${auth_url}&spawn_agent=${spawn_agent_slug}"; fi - if [[ -n "${spawn_cloud_slug}" ]]; then auth_url="${auth_url}&spawn_cloud=${spawn_cloud_slug}"; fi - log_step "Opening browser to authenticate with OpenRouter..." - open_browser "${auth_url}" - - if ! _wait_for_oauth "${code_file}"; then - cleanup_oauth_session "${server_pid}" "${oauth_dir}" - log_error "OAuth authentication timed out after 120 seconds" - log_error "" - log_error "The authentication flow was not completed in time." - log_error "" - log_error "Troubleshooting:" - log_error " 1. Check if your browser opened to openrouter.ai" - log_error " 2. Complete the authentication and allow the redirect" - log_error " 3. Ensure port ${actual_port} is not blocked by firewall/proxy" - log_error "" - log_error "Alternative: Use a manual API key instead" - log_error " export OPENROUTER_API_KEY=sk-or-v1-..." - log_error " Get a key at: https://openrouter.ai/settings/keys" - return 1 - fi - - cat "${code_file}" -} - -# Helper: Start OAuth server and get session details -# Returns: "port|pid|oauth_dir" on success, "" on failure -_start_oauth_session_with_server() { - local callback_port="${1}" - - local oauth_dir - oauth_dir=$(_init_oauth_session) - local code_file="${oauth_dir}/code" - local pid_file="${oauth_dir}/server_pid" - - local actual_port - actual_port=$(_setup_oauth_server "${callback_port}" "${code_file}" "${oauth_dir}/port" "${oauth_dir}/state" "${pid_file}") || { - cleanup_oauth_session "" "${oauth_dir}" - return 1 - } - - local server_pid - server_pid=$(cat "${pid_file}" 2>/dev/null || echo "") - if [[ -z "${server_pid}" ]]; then - log_error "Failed to retrieve OAuth server PID" - cleanup_oauth_session "" "${oauth_dir}" - return 1 - fi - - echo "${actual_port}|${server_pid}|${oauth_dir}" -} - -try_oauth_flow() { - local callback_port=${1:-5180} - local spawn_agent_slug="${2:-}" - local spawn_cloud_slug="${3:-}" - - log_step "Attempting OAuth authentication..." - - if ! _check_oauth_prerequisites; then - return 1 - fi - - local session_info - session_info=$(_start_oauth_session_with_server "${callback_port}") || return 1 - - local actual_port server_pid oauth_dir - IFS='|' read -r actual_port server_pid oauth_dir <<< "${session_info}" - - local csrf_state - csrf_state=$(cat "${oauth_dir}/state") - - # Open browser and wait for callback - local oauth_code - oauth_code=$(_await_oauth_callback "${oauth_dir}/code" "${server_pid}" "${oauth_dir}" "${actual_port}" "${csrf_state}" "${spawn_agent_slug}" "${spawn_cloud_slug}") || return 1 - cleanup_oauth_session "${server_pid}" "${oauth_dir}" - - # Exchange code for API key - log_step "Exchanging OAuth code for API key..." - local api_key - api_key=$(exchange_oauth_code "${oauth_code}") || return 1 - - log_info "Successfully obtained OpenRouter API key via OAuth!" - echo "${api_key}" -} - -# Main function: Try OAuth, fallback to manual entry -get_openrouter_api_key_oauth() { - local callback_port=${1:-5180} - local spawn_agent_slug="${2:-}" - local spawn_cloud_slug="${3:-}" - - # Try OAuth flow first - local api_key - api_key=$(try_oauth_flow "${callback_port}" "${spawn_agent_slug}" "${spawn_cloud_slug}") - - if [[ -n "${api_key}" ]]; then - echo "${api_key}" - return 0 - fi - - # OAuth failed, offer manual entry - echo "" >&2 - log_warn "Browser-based OAuth login was not completed." - log_warn "This is normal on remote servers, SSH sessions, or headless environments." - log_info "You can paste an API key instead. Create one at: https://openrouter.ai/settings/keys" - echo "" >&2 - local manual_choice - manual_choice=$(safe_read "Paste your API key manually? (Y/n): ") || { - log_error "Cannot prompt for manual entry in non-interactive mode" - log_warn "Set OPENROUTER_API_KEY environment variable before running spawn" - return 1 - } - - if [[ "${manual_choice}" =~ ^[Nn]$ ]]; then - log_error "Authentication cancelled. An OpenRouter API key is required to use spawn." - log_warn "To authenticate, either:" - log_warn " - Re-run this command and complete the OAuth flow in your browser" - log_warn " - Set OPENROUTER_API_KEY=sk-or-v1-... before running spawn" - log_warn " - Create a key at: https://openrouter.ai/settings/keys" - return 1 - fi - - api_key=$(get_openrouter_api_key_manual) - echo "${api_key}" -} - -# ============================================================ -# Environment injection helpers -# ============================================================ - -# Generate environment variable config content -# Usage: generate_env_config KEY1=val1 KEY2=val2 ... -# Outputs the env config to stdout -# SECURITY: Values are single-quoted to prevent shell injection when sourced. -# Single quotes prevent all interpretation of special characters ($, `, \, etc.) -generate_env_config() { - echo "" - echo "# [spawn:env]" - # All spawn environments are disposable cloud VMs — mark as sandbox - echo "export IS_SANDBOX='1'" - for env_pair in "$@"; do - local key="${env_pair%%=*}" - local value="${env_pair#*=}" - - # SECURITY: Validate environment variable names to prevent injection - # Only allow uppercase letters, numbers, and underscores (standard env var format) - if [[ ! "${key}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - log_error "SECURITY: Invalid environment variable name rejected: ${key}" - continue - fi - - # Escape any single quotes in the value: replace ' with '\'' - # Use sed instead of ${//} pattern substitution for bash 3.2 (macOS) compat - local escaped_value - escaped_value=$(printf '%s' "$value" | sed "s/'/'\\\\''/g") - echo "export ${key}='${escaped_value}'" - done -} - -# Inject environment variables into remote server's shell config (SSH-based clouds) -# Usage: inject_env_vars_ssh SERVER_IP UPLOAD_FUNC RUN_FUNC KEY1=val1 KEY2=val2 ... -# Example: inject_env_vars_ssh "$DO_SERVER_IP" upload_file run_server \ -# "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \ -# "ANTHROPIC_BASE_URL=https://openrouter.ai/api" -inject_env_vars_ssh() { - local server_ip="${1}" - local upload_func="${2}" - local run_func="${3}" - shift 3 - - local env_temp - env_temp=$(mktemp) - chmod 600 "${env_temp}" - track_temp_file "${env_temp}" - - generate_env_config "$@" > "${env_temp}" - - # SECURITY: Use unpredictable temp file name to prevent race condition - # Attacker could create symlink at /tmp/env_config to exfiltrate credentials - local rand_suffix - rand_suffix=$(basename "${env_temp}") - local temp_remote="/tmp/spawn_env_${rand_suffix}" - - # Append to .bashrc and .zshrc only — do NOT write to .profile or .bash_profile - "${upload_func}" "${server_ip}" "${env_temp}" "${temp_remote}" - "${run_func}" "${server_ip}" "cat '${temp_remote}' >> ~/.bashrc; cat '${temp_remote}' >> ~/.zshrc; rm -f '${temp_remote}'" - - # Note: temp file will be cleaned up by trap handler - - # Offer optional GitHub CLI setup - offer_github_auth "${run_func} ${server_ip}" -} - -# Inject environment variables for providers without SSH (modal, e2b, sprite) -# For providers where upload_file and run_server don't take server_ip as first arg -# Usage: inject_env_vars_local upload_file run_server KEY1=VAL1 KEY2=VAL2 ... -# Example: inject_env_vars_local upload_file run_server \ -# "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \ -# "ANTHROPIC_BASE_URL=https://openrouter.ai/api" -inject_env_vars_local() { - local upload_func="${1}" - local run_func="${2}" - shift 2 - - local env_temp - env_temp=$(mktemp) - chmod 600 "${env_temp}" - track_temp_file "${env_temp}" - - generate_env_config "$@" > "${env_temp}" - - # SECURITY: Use unpredictable temp file name to prevent race condition - local rand_suffix - rand_suffix=$(basename "${env_temp}") - local temp_remote="/tmp/spawn_env_${rand_suffix}" - - # Append to .bashrc and .zshrc only - "${upload_func}" "${env_temp}" "${temp_remote}" - "${run_func}" "cat '${temp_remote}' >> ~/.bashrc; cat '${temp_remote}' >> ~/.zshrc; rm -f '${temp_remote}'" - - # Note: temp file will be cleaned up by trap handler - - # Offer optional GitHub CLI setup - offer_github_auth "${run_func}" -} - -# Prompt user about GitHub CLI setup BEFORE provisioning. -# Stores the answer so the actual install can happen later (after the -# server is up) without re-prompting. -# Usage: prompt_github_auth (call before create_server) -prompt_github_auth() { - SPAWN_GITHUB_AUTH_PROMPTED=1 - - # Skip in non-interactive or if user opted out - if [[ -n "${SPAWN_SKIP_GITHUB_AUTH:-}" ]]; then - return 0 - fi - - printf '\n' - local choice - choice=$(safe_read "Set up GitHub CLI (gh) on this machine? (y/N): ") || return 0 - if [[ "${choice}" =~ ^[Yy]$ ]]; then - SPAWN_GITHUB_AUTH_REQUESTED=1 - - # Capture local GitHub token for passthrough to remote VM - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - SPAWN_GITHUB_TOKEN="${GITHUB_TOKEN}" - elif command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then - SPAWN_GITHUB_TOKEN="$(gh auth token 2>/dev/null)" || true - fi - fi -} - -# Run GitHub CLI setup on remote VM if previously requested via prompt_github_auth. -# If prompt_github_auth was never called, falls back to prompting interactively. -# Usage (SSH clouds): offer_github_auth "run_server SERVER_IP" -# Usage (local): offer_github_auth "run_server" -offer_github_auth() { - local run_callback="${1}" - - # Skip if user opted out via env var - if [[ -n "${SPAWN_SKIP_GITHUB_AUTH:-}" ]]; then - return 0 - fi - - # Build the remote command with optional token export. - # Prefer the local copy (running from a checkout) so fixes don't wait for - # a merge to main. Base64-encode it for safe inline transport. - local gh_cmd - local _local_gh="${SCRIPT_DIR:-}/../../shared/github-auth.sh" - if [[ -n "${SCRIPT_DIR:-}" && -f "${_local_gh}" && ! -L "${_local_gh}" ]]; then - local _gh_b64 - _gh_b64=$(base64 < "${_local_gh}" | tr -d '\n') - gh_cmd="printf '%s' '${_gh_b64}' | base64 -d | bash" - else - gh_cmd="curl -fsSL https://raw.githubusercontent.com/OpenRouterTeam/spawn/main/shared/github-auth.sh | bash" - fi - if [[ -n "${SPAWN_GITHUB_TOKEN:-}" ]]; then - local escaped_token - escaped_token=$(printf '%q' "${SPAWN_GITHUB_TOKEN}") - gh_cmd="export GITHUB_TOKEN=${escaped_token}; ${gh_cmd}" - fi - - # If prompt_github_auth was already called, use its stored answer - if [[ "${SPAWN_GITHUB_AUTH_PROMPTED:-}" == "1" ]]; then - if [[ "${SPAWN_GITHUB_AUTH_REQUESTED:-}" == "1" ]]; then - log_step "Installing and authenticating GitHub CLI..." - ${run_callback} "${gh_cmd}" || log_warn "GitHub CLI setup failed (non-fatal, continuing)" - fi - return 0 - fi - - # Fallback: prompt_github_auth was never called, ask now - printf '\n' - local choice - choice=$(safe_read "Set up GitHub CLI (gh) on this machine? (y/N): ") || return 0 - if [[ ! "${choice}" =~ ^[Yy]$ ]]; then - return 0 - fi - - # Attempt token capture in fallback path too - if [[ -z "${SPAWN_GITHUB_TOKEN:-}" ]]; then - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - SPAWN_GITHUB_TOKEN="${GITHUB_TOKEN}" - elif command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then - SPAWN_GITHUB_TOKEN="$(gh auth token 2>/dev/null)" || true - fi - if [[ -n "${SPAWN_GITHUB_TOKEN:-}" ]]; then - local escaped_token - escaped_token=$(printf '%q' "${SPAWN_GITHUB_TOKEN}") - gh_cmd="export GITHUB_TOKEN=${escaped_token}; ${gh_cmd}" - fi - fi - - log_step "Installing and authenticating GitHub CLI..." - ${run_callback} "${gh_cmd}" || log_warn "GitHub CLI setup failed (non-fatal, continuing)" -} - -# ============================================================ -# Resource cleanup trap handlers -# ============================================================ - -# Array to track temporary files for cleanup -CLEANUP_TEMP_FILES=() - -# Track a temporary file for cleanup on exit -# Usage: track_temp_file PATH -track_temp_file() { - local temp_file="${1}" - CLEANUP_TEMP_FILES+=("${temp_file}") -} - -# Cleanup function for temporary files -# Called automatically on EXIT, INT, TERM signals -cleanup_temp_files() { - local exit_code=$? - - for temp_file in "${CLEANUP_TEMP_FILES[@]}"; do - if [[ -f "${temp_file}" ]]; then - # Securely remove temp files (may contain credentials) - shred -f -u "${temp_file}" 2>/dev/null || rm -f "${temp_file}" - fi - done - - return "${exit_code}" -} - -# Register cleanup trap handler -# Call this at the start of scripts that create temp files -register_cleanup_trap() { - trap cleanup_temp_files EXIT INT TERM -} - -# ============================================================ -# Agent setup helpers (composable, callback-based) -# ============================================================ -# These helpers accept pre-applied RUN/UPLOAD/SESSION callbacks, -# following the same callback pattern used by offer_github_auth -# and setup_claude_code_config. -# -# Usage pattern in agent scripts: -# RUN="run_server ${SERVER_IP}" -# UPLOAD="upload_file ${SERVER_IP}" -# SESSION="interactive_session ${SERVER_IP}" -# -# install_agent "Codex" "npm install -g @openai/codex" "$RUN" -# verify_agent "Codex" "command -v codex" "npm install -g @openai/codex" "$RUN" -# get_or_prompt_api_key -# inject_env_vars_cb "$RUN" "$UPLOAD" "OPENROUTER_API_KEY=${OPENROUTER_API_KEY}" -# launch_session "Hetzner server" "$SESSION" "source ~/.zshrc && codex" - -# Run an agent's install command on the target machine -# Usage: install_agent AGENT_NAME INSTALL_CMD RUN_CB -install_agent() { - local agent_name="$1" install_cmd="$2" run_cb="$3" - log_step "Installing ${agent_name}..." - # Pass the raw command to the run callback — do NOT use printf '%q' + bash -c - # here. The run callback (run_server, run_sprite, ssh) already handles escaping - # for remote transport. Double-escaping breaks shell operators (&&, ||, >, |) - # inside install commands. - if ! ${run_cb} "${install_cmd}"; then - log_install_failed "${agent_name}" "${install_cmd}" - return 1 - fi - log_info "${agent_name} installation completed" -} - -# Verify an agent installed correctly; exit 1 on failure -# Usage: verify_agent AGENT_NAME VERIFY_CMD INSTALL_CMD RUN_CB -verify_agent() { - local agent_name="$1" verify_cmd="$2" install_cmd="$3" run_cb="$4" - if ! ${run_cb} "${verify_cmd}" >/dev/null 2>&1; then - log_install_failed "${agent_name}" "${install_cmd}" - exit 1 - fi - log_info "${agent_name} installation verified successfully" -} - -# Install Claude Code with multi-method fallback and detailed error reporting. -# Tries: 1) curl installer (standalone binary) 2) bun 3) npm -# The curl installer bundles its own runtime. npm/bun install a Node.js package -# whose shebang needs 'node', so we ensure a node runtime exists after those. -# Usage: install_claude_code RUN_CB -_finalize_claude_install() { - local run_cb="$1" - local claude_path="$2" - log_step "Setting up Claude Code shell integration..." - ${run_cb} "${claude_path} && claude install --force" >/dev/null 2>&1 || true - # Write claude PATH to .bashrc and .zshrc - ${run_cb} "for rc in ~/.bashrc ~/.zshrc; do grep -q '.claude/local/bin' \"\$rc\" 2>/dev/null || printf '\\n# Claude Code PATH\\nexport PATH=\"\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH\"\\n' >> \"\$rc\"; done" >/dev/null 2>&1 || true -} - -_verify_claude_installed() { - local run_cb="$1" - local claude_path="$2" - ${run_cb} "${claude_path} && command -v claude" >/dev/null 2>&1 -} - -_install_via_curl() { - local run_cb="$1" - local claude_path="$2" - log_step "Installing Claude Code (method 1/2: curl installer)..." - if ${run_cb} "curl -fsSL https://claude.ai/install.sh | bash" 2>&1; then - if _verify_claude_installed "$run_cb" "$claude_path"; then - log_info "Claude Code installed via curl installer" - _finalize_claude_install "$run_cb" "$claude_path" - return 0 - fi - log_warn "curl installer exited 0 but claude not found on PATH" - else - log_warn "curl installer failed (site may be temporarily unavailable)" - fi - return 1 -} - -_ensure_nodejs_runtime() { - local run_cb="$1" - local claude_path="$2" - if ! ${run_cb} "${claude_path} && command -v node" >/dev/null 2>&1; then - log_step "Installing Node.js runtime (required for claude package)..." - if ${run_cb} "DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nodejs npm && npm install -g n && n 22 && ln -sf /usr/local/bin/node /usr/bin/node && ln -sf /usr/local/bin/npm /usr/bin/npm && ln -sf /usr/local/bin/npx /usr/bin/npx" >/dev/null 2>&1; then - log_info "Node.js installed via n" - else - log_warn "Could not install Node.js - bun method may fail" - fi - fi -} - -_install_via_npm() { - local run_cb="$1" - local claude_path="$2" - log_step "Installing Claude Code (method 2/3: npm)..." - if ${run_cb} "${claude_path} && npm install -g @anthropic-ai/claude-code 2>&1" 2>&1; then - if _verify_claude_installed "$run_cb" "$claude_path"; then - log_info "Claude Code installed via npm" - _finalize_claude_install "$run_cb" "$claude_path" - return 0 - fi - log_warn "npm install exited 0 but claude binary not found" - else - log_warn "npm install failed" - fi - return 1 -} - -_install_via_bun() { - local run_cb="$1" - local claude_path="$2" - log_step "Installing Claude Code (method 3/3: bun)..." - if ${run_cb} "${claude_path} && bun i -g @anthropic-ai/claude-code 2>&1" 2>&1; then - if _verify_claude_installed "$run_cb" "$claude_path"; then - log_info "Claude Code installed via bun" - _finalize_claude_install "$run_cb" "$claude_path" - return 0 - fi - log_warn "bun install exited 0 but claude binary not found" - else - log_warn "bun install failed" - fi - return 1 -} - -install_claude_code() { - local run_cb="$1" - local claude_path='export PATH=$HOME/.npm-global/bin:$HOME/.claude/local/bin:$HOME/.local/bin:$HOME/.bun/bin:$PATH' - - # Clean up ~/.bash_profile if it was created by a previous broken deployment. - ${run_cb} "if [ -f ~/.bash_profile ] && grep -q 'spawn:env\|Claude Code PATH\|spawn:path' ~/.bash_profile 2>/dev/null; then rm -f ~/.bash_profile; fi" >/dev/null 2>&1 || true - - # Already installed? - if _verify_claude_installed "$run_cb" "$claude_path"; then - log_info "Claude Code already installed" - _finalize_claude_install "$run_cb" "$claude_path" - return 0 - fi - - # Try curl installer first - if _install_via_curl "$run_cb" "$claude_path"; then - return 0 - fi - - # Ensure Node.js runtime for npm/bun methods - _ensure_nodejs_runtime "$run_cb" "$claude_path" - - # Try npm (most reliable for global installs) - if _install_via_npm "$run_cb" "$claude_path"; then - return 0 - fi - - # Try bun as last resort - if _install_via_bun "$run_cb" "$claude_path"; then - return 0 - fi - - # All methods failed - log_install_failed "Claude Code" "npm install -g @anthropic-ai/claude-code" - exit 1 -} - -# Get OpenRouter API key from environment or prompt via OAuth -# Sets the global OPENROUTER_API_KEY variable -get_or_prompt_api_key() { - echo "" - if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then - log_info "Using OpenRouter API key from environment" - if ! verify_openrouter_key "${OPENROUTER_API_KEY}"; then - log_warn "Environment key failed validation, prompting for a new one..." - OPENROUTER_API_KEY="" - fi - fi - - local max_attempts=3 attempt=0 - while [[ -z "${OPENROUTER_API_KEY:-}" ]]; do - attempt=$((attempt + 1)) - if [[ ${attempt} -gt ${max_attempts} ]]; then - log_error "No valid API key after ${max_attempts} attempts" - exit 1 - fi - OPENROUTER_API_KEY=$(get_openrouter_api_key_oauth 5180 "${SPAWN_AGENT_SLUG:-}" "${SPAWN_CLOUD_SLUG:-}") || true - if [[ -n "${OPENROUTER_API_KEY:-}" ]] && ! verify_openrouter_key "${OPENROUTER_API_KEY}"; then - OPENROUTER_API_KEY="" - fi - done -} - -# Inject environment variables using pre-applied callbacks -# Usage: inject_env_vars_cb RUN_CB UPLOAD_CB KEY1=val1 KEY2=val2 ... -# Example: inject_env_vars_cb "$RUN" "$UPLOAD" \ -# "OPENROUTER_API_KEY=$OPENROUTER_API_KEY" \ -# "ANTHROPIC_BASE_URL=https://openrouter.ai/api" -inject_env_vars_cb() { - local run_cb="$1" upload_cb="$2" - shift 2 - - log_step "Setting up environment variables..." - - local env_temp - env_temp=$(mktemp) - chmod 600 "${env_temp}" - track_temp_file "${env_temp}" - - generate_env_config "$@" > "${env_temp}" - - # SECURITY: Use unpredictable temp file name to prevent race condition - local rand_suffix - rand_suffix=$(basename "${env_temp}") - local temp_remote="/tmp/spawn_env_${rand_suffix}" - - ${upload_cb} "${env_temp}" "${temp_remote}" - ${run_cb} "cat '${temp_remote}' >> ~/.bashrc; cat '${temp_remote}' >> ~/.zshrc; rm -f '${temp_remote}'" - - # Offer optional GitHub CLI setup - offer_github_auth "${run_cb}" -} - -# Print success message and launch an interactive agent session -# Usage: launch_session CLOUD_MSG SESSION_CB LAUNCH_CMD -launch_session() { - local cloud_msg="$1" session_cb="$2" launch_cmd="$3" - echo "" - log_info "${cloud_msg} setup completed successfully!" - echo "" - log_step "Starting agent..." - sleep 1 - clear 2>/dev/null || true - ${session_cb} "${launch_cmd}" -} - -# ============================================================ -# Cloud adapter runner (spawn_agent) -# ============================================================ -# Orchestrates the standard agent deployment flow using cloud_* adapter -# functions. Agent scripts define hooks (agent_install, agent_env_vars, -# agent_launch_cmd, etc.) and call spawn_agent to run them. -# -# Required cloud_* functions (defined in {cloud}/lib/common.sh): -# cloud_authenticate, cloud_provision, cloud_wait_ready, -# cloud_run, cloud_upload, cloud_interactive, cloud_label -# -# Required agent hooks: -# agent_env_vars — print env config lines to stdout (via generate_env_config) -# agent_launch_cmd — print the shell command to launch the agent -# -# Optional agent hooks: -# agent_pre_provision — run before provisioning (e.g., prompt_github_auth) -# agent_install — install the agent on the server -# agent_configure — agent-specific config (settings files, etc.) -# agent_save_connection — save connection info for `spawn list` -# agent_pre_launch — run before launching (e.g., start daemon) -# -# Optional agent variables: -# AGENT_MODEL_PROMPT — if set, prompt for model selection -# AGENT_MODEL_DEFAULT — default model ID (default: openrouter/auto) - -# Check if a function is defined (bash 3.2 compatible) -_fn_exists() { type "$1" 2>/dev/null | head -1 | grep -q 'function'; } - -# Inject env vars using cloud_* adapter functions -_spawn_inject_env_vars() { - log_step "Setting up environment variables..." - local env_temp - env_temp=$(mktemp) - chmod 600 "${env_temp}" - track_temp_file "${env_temp}" - - agent_env_vars > "${env_temp}" - - # SECURITY: Use unpredictable temp file name to prevent symlink attacks - local rand_suffix - rand_suffix=$(basename "${env_temp}") - local temp_remote="/tmp/spawn_env_${rand_suffix}" - - cloud_upload "${env_temp}" "${temp_remote}" - - # Write env vars to ~/.spawnrc instead of inlining into .bashrc/.zshrc. - # Ubuntu's default .bashrc has an interactive-shell guard that exits early — - # anything appended after the guard is never loaded when SSH runs a command string. - cloud_run "cp '${temp_remote}' ~/.spawnrc && chmod 600 ~/.spawnrc; rm -f '${temp_remote}'" - - # Hook .spawnrc into .bashrc and .zshrc so interactive shells pick up the vars too - cloud_run "grep -q 'source ~/.spawnrc' ~/.bashrc 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> ~/.bashrc" || log_warn "Could not hook .spawnrc into .bashrc" - cloud_run "grep -q 'source ~/.spawnrc' ~/.zshrc 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> ~/.zshrc" || log_warn "Could not hook .spawnrc into .zshrc" - - offer_github_auth cloud_run -} - -# Main orchestration runner for agent deployment -# Usage: spawn_agent AGENT_DISPLAY_NAME -spawn_agent() { - local agent_name="$1" - SPAWN_AGENT_SLUG="${2:-}" - SPAWN_CLOUD_SLUG="${3:-}" - - # 1. Authenticate with cloud provider - cloud_authenticate - - # 2. Pre-provision hooks (e.g., prompt for GitHub auth) - if _fn_exists agent_pre_provision; then agent_pre_provision || true; fi - - # 3. Get API key (before provisioning so user isn't waiting on server) - get_or_prompt_api_key - - # 4. Model selection (if agent needs it) - if [[ -n "${AGENT_MODEL_PROMPT:-}" ]]; then - MODEL_ID=$(get_model_id_interactive "${AGENT_MODEL_DEFAULT:-openrouter/auto}" "${agent_name}") || exit 1 - fi - - # 5. Provision server - local server_name - server_name=$(get_server_name) - cloud_provision "${server_name}" - - # 6. Wait for readiness (may already be done after OAuth) - cloud_wait_ready - - # 7. Install agent - if _fn_exists agent_install; then - agent_install || exit 1 - fi - - # 8. Inject environment variables - _spawn_inject_env_vars - - # 9. Agent-specific configuration (non-fatal — agent may work with defaults) - if _fn_exists agent_configure; then agent_configure || log_warn "Agent configuration failed (continuing with defaults)"; fi - - # 10. Save connection info (non-fatal — convenience feature only) - if _fn_exists agent_save_connection; then agent_save_connection || log_warn "Could not save connection info"; fi - - # 11. Pre-launch hooks (non-fatal — e.g., gateway daemon may start slowly) - if _fn_exists agent_pre_launch; then agent_pre_launch || log_warn "Pre-launch hook failed (continuing)"; fi - - # 12. Launch interactive session - log_info "${agent_name} is ready" - local launch_cmd - launch_cmd=$(agent_launch_cmd) - - # Save the launch command to connection file for `spawn list` → "Enter agent" - _save_launch_cmd "${launch_cmd}" - - launch_session "$(cloud_label)" cloud_interactive "${launch_cmd}" -} - -# ============================================================ -# SSH configuration -# ============================================================ - -# Validate SSH_OPTS to prevent command injection -# Only allow safe SSH option patterns (dash-prefixed flags and values) -_validate_ssh_opts() { - local opts="${1}" - # Allow empty - if [[ -z "${opts}" ]]; then - return 0 - fi - # Pattern: SSH opts must start with dash and contain only safe characters - # Allows: -o Option=value -i /path/to/key -p 22 etc. - # Blocks: semicolons, pipes, backticks, $() and other shell metacharacters - if [[ "${opts}" =~ [\;\|\&\`\$\(\)\<\>] ]]; then - log_error "SECURITY: SSH_OPTS contains shell metacharacters" - log_error "Rejected value: ${opts}" - return 1 - fi - return 0 -} - -# Default SSH options for all cloud providers -# Clouds can override this if they need provider-specific settings -if [[ -z "${SSH_OPTS:-}" ]]; then - SSH_OPTS="-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3 -o ConnectTimeout=10 -i ${HOME}/.ssh/id_ed25519" -else - # Validate user-provided SSH_OPTS for security - if ! _validate_ssh_opts "${SSH_OPTS}"; then - log_error "Invalid SSH_OPTS provided. Using secure defaults." - SSH_OPTS="-o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3 -o ConnectTimeout=10 -i ${HOME}/.ssh/id_ed25519" - fi -fi - -# ============================================================ -# SSH key management helpers -# ============================================================ - -# Generate SSH key if it doesn't exist -# Usage: generate_ssh_key_if_missing KEY_PATH -generate_ssh_key_if_missing() { - local key_path="${1}" - if [[ -f "${key_path}" ]]; then - return 0 - fi - log_step "Generating SSH key at ${key_path}..." - mkdir -p "$(dirname "${key_path}")" || { - log_error "Failed to create SSH key directory: $(dirname "${key_path}")" - log_error "Check that you have write permissions to this directory." - return 1 - } - ssh-keygen -t ed25519 -f "${key_path}" -N "" -q || { - log_error "Failed to generate SSH key at ${key_path}" - log_error "" - log_error "How to fix:" - log_error " 1. Check disk space: df -h $(dirname "${key_path}")" - log_error " 2. Check permissions: ls -la $(dirname "${key_path}")" - log_error " 3. Generate manually: ssh-keygen -t ed25519 -f ${key_path}" - return 1 - } - log_info "SSH key generated at ${key_path}" -} - -# Get MD5 fingerprint of SSH public key -# Usage: get_ssh_fingerprint PUB_KEY_PATH -get_ssh_fingerprint() { - local pub_path="${1}" - if [[ ! -f "${pub_path}" ]]; then - log_error "SSH public key not found: ${pub_path}" - log_error "Expected a public key file alongside your private key." - log_error "Regenerate with: ssh-keygen -t ed25519 -f ${pub_path%.pub}" - return 1 - fi - local fingerprint - fingerprint=$(ssh-keygen -lf "${pub_path}" -E md5 2>/dev/null | awk '{print $2}' | sed 's/MD5://') - if [[ -z "${fingerprint}" ]]; then - log_error "Failed to read SSH public key fingerprint from ${pub_path}" - log_error "The key file may be corrupted or in an unsupported format." - log_error "Regenerate with: ssh-keygen -t ed25519 -f ${pub_path%.pub}" - return 1 - fi - echo "${fingerprint}" -} - -# JSON-escape a string (for embedding in JSON bodies) -# Usage: json_escape STRING -json_escape() { - local string="${1}" - _INPUT="${string}" bun -e "process.stdout.write(JSON.stringify(process.env._INPUT) + '\n')" 2>/dev/null || { - # Fallback: manually escape backslashes, quotes, and JSON control characters - local escaped="${string//\\/\\\\}" - escaped="${escaped//\"/\\\"}" - escaped="${escaped//$'\n'/\\n}" - escaped="${escaped//$'\r'/\\r}" - escaped="${escaped//$'\t'/\\t}" - echo "\"${escaped}\"" - } -} - -# Extract SSH key IDs from cloud provider API response -# Usage: extract_ssh_key_ids API_RESPONSE KEY_FIELD -# KEY_FIELD: "ssh_keys" (DigitalOcean/Vultr) or "data" (Linode) -extract_ssh_key_ids() { - local api_response="${1}" - local key_field="${2:-ssh_keys}" - # Use jq with --arg to safely pass key_field (prevents code injection). - if command -v jq &>/dev/null; then - printf '%s' "${api_response}" | jq --arg field "${key_field}" '[.[$field][]?.id]' 2>/dev/null || { - log_error "Failed to parse SSH key IDs from API response" - return 1 - } - else - _DATA="${api_response}" _FIELD="${key_field}" bun -e " -const d = JSON.parse(process.env._DATA); -const ids = (d[process.env._FIELD] || []).map(k => k.id); -process.stdout.write(JSON.stringify(ids) + '\n'); -" 2>/dev/null || { - log_error "Failed to parse SSH key IDs from API response" - log_error "The API response may be malformed or bun is unavailable" - return 1 - } - fi -} - -# ============================================================ -# Cloud provisioning helpers -# ============================================================ - -# Generate cloud-init userdata YAML for server provisioning -# This is the default userdata used by all cloud providers -# Clouds can override this function if they need provider-specific cloud-init config -get_cloud_init_userdata() { - cat << 'CLOUD_INIT_EOF' -#cloud-config -package_update: true -packages: - - curl - - unzip - - git - - zsh - - nodejs - - npm - -runcmd: - # Set up 2G swap to prevent OOM kills on small VMs - - fallocate -l 2G /swapfile - - chmod 600 /swapfile - - mkswap /swapfile - - swapon /swapfile - # Upgrade Node.js to v22 LTS (apt has v18, agents like Cline need v20+) - # n installs to /usr/local/bin but apt's v18 at /usr/bin can shadow it, so symlink over - - npm install -g n && n 22 && ln -sf /usr/local/bin/node /usr/bin/node && ln -sf /usr/local/bin/npm /usr/bin/npm && ln -sf /usr/local/bin/npx /usr/bin/npx - # Install Bun - - su - root -c 'curl -fsSL https://bun.sh/install | bash' - # Install Claude Code - - su - root -c 'curl -fsSL https://claude.ai/install.sh | bash' - # Mark as sandbox environment (disposable cloud VM) - - echo 'export IS_SANDBOX=1' >> /root/.bashrc - - echo 'export IS_SANDBOX=1' >> /root/.zshrc - # Configure PATH in .bashrc and .zshrc (include claude installer path) - - echo 'export PATH="${HOME}/.claude/local/bin:${HOME}/.local/bin:${HOME}/.bun/bin:${PATH}"' >> /root/.bashrc - - echo 'export PATH="${HOME}/.claude/local/bin:${HOME}/.local/bin:${HOME}/.bun/bin:${PATH}"' >> /root/.zshrc - # Signal completion - - touch /root/.cloud-init-complete -CLOUD_INIT_EOF -} - -# ============================================================ -# Cloud API helpers -# ============================================================ - -# Calculate exponential backoff with jitter for retry logic -# Usage: calculate_retry_backoff CURRENT_INTERVAL MAX_INTERVAL -# Returns: backoff interval with ±20% jitter -calculate_retry_backoff() { - local interval="${1}" - local max_interval="${2}" - - # Validate inputs to prevent empty or invalid intervals - if [[ -z "${interval}" ]] || [[ "${interval}" -lt 1 ]]; then - echo "1" - return 0 - fi - - # Add jitter: ±20% randomization to prevent thundering herd - # Fallback to no-jitter interval if bun is unavailable - _INTERVAL="${interval}" bun -e "process.stdout.write(String(Math.floor(Number(process.env._INTERVAL) * (0.8 + Math.random() * 0.4))) + '\n')" 2>/dev/null || printf '%s\n' "${interval}" -} - -# Handle API retry decision with backoff - extracted to reduce duplication across API wrappers -# Usage: _api_should_retry_on_error ATTEMPT MAX_RETRIES INTERVAL MAX_INTERVAL MESSAGE -# Returns: 0 to continue/retry, 1 to fail -# Caller updates interval and attempt variables after success -_api_should_retry_on_error() { - local attempt="${1}" - local max_retries="${2}" - local interval="${3}" - local max_interval="${4}" - local message="${5}" - - if [[ "${attempt}" -ge "${max_retries}" ]]; then - return 1 # Don't retry - max attempts exhausted - fi - - local jitter - jitter=$(calculate_retry_backoff "${interval}" "${max_interval}") - log_warn "${message} (attempt ${attempt}/${max_retries}), retrying in ${jitter}s..." - sleep "${jitter}" - - return 0 # Do retry -} - -# Helper to update retry interval with backoff -# Usage: _update_retry_interval INTERVAL_VAR MAX_INTERVAL_VAR -# This eliminates repeated interval update logic across API wrappers -_update_retry_interval() { - local interval_var="${1}" - local max_interval_var="${2}" - - local current_interval=${!interval_var} - local max_interval=${!max_interval_var} - - current_interval=$((current_interval * 2)) - if [[ "${current_interval}" -gt "${max_interval}" ]]; then - current_interval="${max_interval}" - fi - - eval "${interval_var}=\${current_interval}" -} - -# Helper to extract HTTP status code and response body from curl output -# Curl is called with "-w \n%{http_code}" so last line is the code -# Returns: http_code on stdout, response_body via global variable -_parse_api_response() { - local response="${1}" - local http_code - http_code=$(echo "${response}" | tail -1) - local response_body - response_body=$(echo "${response}" | sed '$d') - - API_HTTP_CODE="${http_code}" - API_RESPONSE_BODY="${response_body}" -} - -# Core curl wrapper for API requests - builds args, executes, parses response -# Usage: _curl_api URL METHOD BODY AUTH_ARGS... -# Returns: 0 on curl success, 1 on curl failure -# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals -# SECURITY: Authorization headers are passed via curl's -K (config from stdin) -# instead of command-line args, so tokens don't appear in `ps` output. -_curl_api() { - local url="${1}" - local method="${2}" - local body="${3:-}" - shift 3 - - # SECURITY: Separate Authorization headers from other args so we can pass - # them via stdin (-K -) instead of command-line, hiding tokens from `ps`. - local auth_header="" - local extra_args=() - while [[ $# -gt 0 ]]; do - if [[ "$1" == "-H" && "${2:-}" == Authorization:* ]]; then - auth_header="$2" - shift 2 - else - extra_args+=("$1") - shift - fi - done - - local args=( - -s - -w "\n%{http_code}" - -X "${method}" - -H "Content-Type: application/json" - "${extra_args[@]}" - ) - - if [[ -n "${body}" ]]; then - args+=(-d "${body}") - fi - - local response - if [[ -n "${auth_header}" ]]; then - # Pass auth header via stdin to keep it out of process argument list - response=$(printf 'header = "%s"\n' "${auth_header}" | curl "${args[@]}" -K - "${url}" 2>&1) - else - response=$(curl "${args[@]}" "${url}" 2>&1) - fi - local curl_exit_code=$? - - _parse_api_response "${response}" - - return ${curl_exit_code} -} - -# Helper to handle a single API request attempt with Bearer auth -# Returns: 0 on curl success, 1 on curl failure -# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals -_make_api_request() { - local base_url="${1}" - local auth_token="${2}" - local method="${3}" - local endpoint="${4}" - local body="${5:-}" - - _curl_api "${base_url}${endpoint}" "${method}" "${body}" -H "Authorization: Bearer ${auth_token}" -} - -# Generic cloud API wrapper - centralized curl wrapper for all cloud providers -# Includes automatic retry logic with exponential backoff for transient failures -# Usage: generic_cloud_api BASE_URL AUTH_TOKEN METHOD ENDPOINT [BODY] [MAX_RETRIES] -# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" GET "/account" -# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" POST "/droplets" "$body" -# Example: generic_cloud_api "$DO_API_BASE" "$DO_API_TOKEN" GET "/account" "" 5 -# Retries on: 429 (rate limit), 503 (service unavailable), network errors -# Internal retry loop shared by generic_cloud_api and generic_cloud_api_custom_auth -# Usage: _cloud_api_retry_loop REQUEST_FUNC MAX_RETRIES API_DESCRIPTION [REQUEST_FUNC_ARGS...] -# Classify the result of an API request attempt. -# Returns a retry reason string on stdout if the request failed with a retryable error, -# or empty string on success. Caller checks the return string. -_classify_api_result() { - local curl_ok="${1}" - if [[ "${curl_ok}" != "0" ]]; then - echo "Cloud API network error" - elif [[ "${API_HTTP_CODE}" == "429" ]]; then - echo "Cloud API returned rate limit (HTTP 429)" - elif [[ "${API_HTTP_CODE}" == "503" ]]; then - echo "Cloud API returned service unavailable (HTTP 503)" - fi -} - -# Report a final API failure after retries are exhausted -_report_api_failure() { - local retry_reason="${1}" - local max_retries="${2}" - log_error "${retry_reason} after ${max_retries} attempts" - if [[ "${retry_reason}" == "Cloud API network error" ]]; then - log_warn "Could not reach the cloud provider's API." - log_warn "" - log_warn "How to fix:" - log_warn " 1. Check your internet connection: curl -s https://httpbin.org/ip" - log_warn " 2. Check DNS resolution: nslookup the provider's API hostname" - log_warn " 3. If behind a proxy or firewall, ensure HTTPS traffic is allowed" - log_warn " 4. Try again in a few moments (the API may be temporarily down)" - else - log_warn "This is usually caused by rate limiting or temporary provider issues." - log_warn "Wait a minute and try again, or check the provider's status page." - echo "${API_RESPONSE_BODY}" - fi -} - -_cloud_api_retry_loop() { - local request_func="${1}" - local max_retries="${2}" - local api_description="${3}" - shift 3 - - local attempt=1 - local interval=2 - local max_interval=30 - - while [[ "${attempt}" -le "${max_retries}" ]]; do - local curl_ok=0 - "${request_func}" "$@" || curl_ok=$? - - local retry_reason - retry_reason=$(_classify_api_result "${curl_ok}") - - if [[ -z "${retry_reason}" ]]; then - echo "${API_RESPONSE_BODY}" - return 0 - fi - - if ! _api_should_retry_on_error "${attempt}" "${max_retries}" "${interval}" "${max_interval}" "${retry_reason}"; then - _report_api_failure "${retry_reason}" "${max_retries}" - return 1 - fi - _update_retry_interval interval max_interval - attempt=$((attempt + 1)) - done - - log_error "Cloud API request failed after ${max_retries} attempts (${api_description})" - log_warn "This is usually caused by rate limiting or temporary provider issues." - log_warn "Wait a minute and try again, or check the provider's status page." - return 1 -} - -generic_cloud_api() { - local base_url="${1}" - local auth_token="${2}" - local method="${3}" - local endpoint="${4}" - local body="${5:-}" - local max_retries="${6:-3}" - - _cloud_api_retry_loop _make_api_request "${max_retries}" "${method} ${endpoint}" "${base_url}" "${auth_token}" "${method}" "${endpoint}" "${body}" -} - -# Helper to make API request with custom curl auth args (e.g., Basic Auth, custom headers) -# Returns: 0 on curl success, 1 on curl failure -# Sets: API_HTTP_CODE and API_RESPONSE_BODY globals -_make_api_request_custom_auth() { - local url="${1}" - local method="${2}" - local body="${3:-}" - shift 3 - - _curl_api "${url}" "${method}" "${body}" "$@" -} - -# Generic cloud API wrapper with custom curl auth args -# Like generic_cloud_api but accepts arbitrary curl flags for authentication -# Usage: generic_cloud_api_custom_auth BASE_URL METHOD ENDPOINT BODY MAX_RETRIES AUTH_ARGS... -# Example: generic_cloud_api_custom_auth "$API_BASE" GET "/account" "" 3 -H "X-Auth-Token: $TOKEN" -# Example: generic_cloud_api_custom_auth "$API_BASE" POST "/servers" "$body" 3 -u "$USER:$PASS" -generic_cloud_api_custom_auth() { - local base_url="${1}" - local method="${2}" - local endpoint="${3}" - local body="${4:-}" - local max_retries="${5:-3}" - shift 5 - # Remaining args are custom curl auth flags - - _cloud_api_retry_loop _make_api_request_custom_auth "${max_retries}" "${method} ${endpoint}" "${base_url}${endpoint}" "${method}" "${body}" "$@" -} - -# ============================================================ -# Agent verification helpers -# ============================================================ - -# Check if agent command exists in PATH -_check_agent_in_path() { - local agent_cmd="$1" - local agent_name="$2" - if ! command -v "${agent_cmd}" &> /dev/null; then - _log_diagnostic \ - "${agent_name} installation failed: command '${agent_cmd}' not found in PATH" \ - "The installation script encountered an error (check logs above)" \ - "The binary was installed to a directory not in PATH" \ - "Network issues prevented the download from completing" \ - --- \ - "Re-run the script to retry the installation" \ - "Install ${agent_name} manually and ensure it is in PATH" - return 1 - fi - return 0 -} - -# Check if agent command executes without error -_check_agent_runs() { - local agent_cmd="$1" - local verify_arg="$2" - local agent_name="$3" - if ! "${agent_cmd}" "${verify_arg}" &> /dev/null; then - _log_diagnostic \ - "${agent_name} verification failed: '${agent_cmd} ${verify_arg}' returned an error" \ - "Missing runtime dependencies (Python, Node.js, etc.)" \ - "Incompatible system architecture or OS version" \ - --- \ - "Check ${agent_name}'s installation docs for prerequisites" \ - "Run '${agent_cmd} ${verify_arg}' manually to see the error" - return 1 - fi - return 0 -} - -# Verify that an agent is properly installed by checking if its command exists -# Usage: verify_agent_installed AGENT_COMMAND [VERIFICATION_ARG] [ERROR_MESSAGE] -# Examples: -# verify_agent_installed "claude" "--version" "Claude Code" -# verify_agent_installed "codex" "--version" "Codex" -# verify_agent_installed "cline" "--version" "Cline" -# Returns 0 if agent is installed and working, 1 otherwise -verify_agent_installed() { - local agent_cmd="${1}" - local verify_arg="${2:---version}" - local agent_name="${3:-${agent_cmd}}" - - log_step "Verifying ${agent_name} installation..." - - _check_agent_in_path "${agent_cmd}" "${agent_name}" || return 1 - _check_agent_runs "${agent_cmd}" "${verify_arg}" "${agent_name}" || return 1 - - log_info "${agent_name} installation verified successfully" - return 0 -} - -# ============================================================ -# Non-interactive agent execution -# ============================================================ - -# Execute an agent in non-interactive mode with a prompt -# Usage: execute_agent_non_interactive SPRITE_NAME AGENT_NAME AGENT_FLAGS PROMPT -# Arguments: -# SPRITE_NAME - Name of the sprite/server to execute on -# AGENT_NAME - Name of the agent command (e.g., "claude", "codex") -# AGENT_FLAGS - Agent-specific flags for non-interactive execution (e.g., "-p" for claude, "--prompt" for codex) -# PROMPT - User prompt to execute -# EXEC_CALLBACK - Function to execute commands: func(sprite_name, command) -# -# Example (Sprite): -# execute_agent_non_interactive "$SPRITE_NAME" "claude" "-p" "$PROMPT" "sprite_exec" -# -# Example (SSH): -# execute_agent_non_interactive "$SERVER_IP" "codex" "--prompt" "$PROMPT" "ssh_exec" -execute_agent_non_interactive() { - local sprite_name="${1}" - local agent_name="${2}" - local agent_flags="${3}" - local prompt="${4}" - local exec_callback="${5}" - - log_step "Executing ${agent_name} with prompt in non-interactive mode..." - - # Do NOT use printf '%q' here — the run callback (run_server, sprite exec, - # ssh) already handles escaping for remote transport. Double-escaping breaks - # prompts containing quotes, spaces, or special characters on Fly.io. - # Single-quote the prompt to protect it from shell expansion. - local safe_prompt - safe_prompt="'$(printf '%s' "${prompt}" | sed "s/'/'\\\\''/g")'" - - # Build the command based on exec callback type - if [[ "${exec_callback}" == *"sprite"* ]]; then - # Sprite execution (no -tty flag for non-interactive) - sprite exec -s "${sprite_name}" -- zsh -c "source ~/.zshrc && ${agent_name} ${agent_flags} ${safe_prompt}" - else - # Generic SSH execution - ${exec_callback} "${sprite_name}" "source ~/.zshrc && ${agent_name} ${agent_flags} ${safe_prompt}" - fi -} - -# ============================================================ -# SSH connectivity helpers -# ============================================================ - -# Generic SSH wait function - polls until a remote command succeeds with exponential backoff -# Usage: generic_ssh_wait USERNAME IP SSH_OPTS TEST_CMD DESCRIPTION MAX_ATTEMPTS [INITIAL_INTERVAL] -# Implements exponential backoff: starts at INITIAL_INTERVAL (default 5s), doubles up to max 30s -# Adds jitter (±20%) to prevent thundering herd when multiple instances retry simultaneously -# Log progress message based on elapsed time -_log_ssh_wait_progress() { - local description="${1}" - local elapsed_time="${2}" - - if [[ ${elapsed_time} -lt 60 ]]; then - log_step "Waiting for ${description}... (${elapsed_time}s elapsed, still within normal range)" - elif [[ ${elapsed_time} -lt 120 ]]; then - log_step "Waiting for ${description}... (${elapsed_time}s elapsed, taking longer than usual)" - else - log_warn "Still waiting for ${description}... (${elapsed_time}s elapsed, this is unusually slow)" - fi -} - -# Log timeout error message with troubleshooting steps -_log_ssh_wait_timeout_error() { - local description="${1}" - local elapsed_time="${2}" - local username="${3}" - local ip="${4}" - - log_error "${description} timed out after ${elapsed_time}s (server: ${ip})" - log_error "" - log_error "The server failed to become ready within the expected timeframe." - log_error "" - log_error "Common causes:" - log_error " - Server is still booting (some cloud providers take 2-3 minutes)" - log_error " - Cloud provider API delays or maintenance" - log_error " - Firewall blocking SSH on port 22" - log_error " - Network connectivity issues" - log_error "" - log_error "Troubleshooting steps:" - log_error " 1. Test SSH manually: ssh ${username}@${ip}" - log_error " 2. Check firewall rules in your cloud provider dashboard" - if [[ -n "${SPAWN_DASHBOARD_URL:-}" ]]; then - log_error " Dashboard: ${SPAWN_DASHBOARD_URL}" - fi - log_error " 3. Re-run this command to retry (the server may need more time)" - if [[ -n "${SPAWN_RETRY_CMD:-}" ]]; then - log_error " ${SPAWN_RETRY_CMD}" - fi -} - -generic_ssh_wait() { - local username="${1}" - local ip="${2}" - local ssh_opts="${3}" - local test_cmd="${4}" - local description="${5}" - local max_attempts="${6:-30}" - local initial_interval="${7:-5}" - - local attempt=1 - local interval="${initial_interval}" - local max_interval=30 - local elapsed_time=0 - - log_step "Waiting for ${description} to ${ip} (this usually takes 30-90 seconds)..." - while [[ "${attempt}" -le "${max_attempts}" ]]; do - # shellcheck disable=SC2086 - if ssh ${ssh_opts} "${username}@${ip}" "${test_cmd}" < /dev/null >/dev/null 2>&1; then - log_info "${description} ready (took ${elapsed_time}s)" - return 0 - fi - - local jitter - jitter=$(calculate_retry_backoff "${interval}" "${max_interval}") - - _log_ssh_wait_progress "${description}" "${elapsed_time}" - sleep "${jitter}" - - elapsed_time=$((elapsed_time + jitter)) - _update_retry_interval interval max_interval - attempt=$((attempt + 1)) - done - - _log_ssh_wait_timeout_error "${description}" "${elapsed_time}" "${username}" "${ip}" - return 1 -} - -# Wait for cloud-init to complete on a server -# Usage: wait_for_cloud_init