#!/bin/bash # e2e/lib/provision.sh — Provision an agent VM via spawn CLI (cloud-agnostic) set -eo pipefail # --------------------------------------------------------------------------- # provision_agent AGENT APP_NAME LOG_DIR # # Runs spawn in headless mode with a timeout. The provision process hangs on # the interactive SSH session (step 12 of the orchestration), so we kill it # after PROVISION_TIMEOUT seconds. The install itself usually succeeds; we # verify via instance existence and .spawnrc presence afterward. # # Uses cloud driver functions: # cloud_headless_env — cloud-specific env var exports # cloud_provision_verify — check instance exists, write IP + metadata # cloud_exec — remote command execution # --------------------------------------------------------------------------- provision_agent() { local agent="$1" local app_name="$2" local log_dir="$3" local exit_file="${log_dir}/${app_name}.exit" local stdout_file="${log_dir}/${app_name}.stdout" local stderr_file="${log_dir}/${app_name}.stderr" # Resolve CLI entry point # SPAWN_CLI_DIR overrides auto-resolution — use this to force local source code local cli_entry if [ -n "${SPAWN_CLI_DIR:-}" ]; then cli_entry="${SPAWN_CLI_DIR}/packages/cli/src/index.ts" else cli_entry="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/packages/cli/src/index.ts" fi if [ ! -f "${cli_entry}" ]; then log_err "CLI entry point not found: ${cli_entry}" return 1 fi log_step "Provisioning ${agent} as ${app_name} on ${ACTIVE_CLOUD} (timeout: ${PROVISION_TIMEOUT}s)" # Remove stale exit file rm -f "${exit_file}" # Environment for headless provisioning # MODEL_ID bypasses the interactive model selection prompt (required by openclaw) ( export SPAWN_NON_INTERACTIVE=1 export SPAWN_SKIP_GITHUB_AUTH=1 export SPAWN_SKIP_API_VALIDATION=1 export SPAWN_NO_UPDATE_CHECK=1 export BUN_RUNTIME_TRANSPILER_CACHE_PATH=0 export SPAWN_CLI_DIR="${SPAWN_CLI_DIR:-}" export MODEL_ID="${MODEL_ID:-openrouter/auto}" export OPENROUTER_API_KEY="${OPENROUTER_API_KEY}" # Apply cloud-specific env vars (safe: only processes export VAR="VALUE" lines) while IFS= read -r _env_line; do if [[ "${_env_line}" =~ ^export[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)=\"(.*)\"$ ]]; then export "${BASH_REMATCH[1]}"="${BASH_REMATCH[2]}" fi done < "${stdout_file}" 2> "${stderr_file}" printf '%s' "$?" > "${exit_file}" ) & local pid=$! # Poll for completion or timeout (bash 3.2 compatible — no wait -n) local waited=0 while [ "${waited}" -lt "${PROVISION_TIMEOUT}" ]; do if [ -f "${exit_file}" ]; then break fi sleep 5 waited=$((waited + 5)) done # Kill if still running (the interactive SSH/CLI session hangs) if [ ! -f "${exit_file}" ]; then log_warn "Provision timed out after ${PROVISION_TIMEOUT}s — killing (install may still succeed)" # Kill the entire process tree — the subshell spawns bun → sprite exec -tty # which won't die from just killing the subshell PID. Without this, orphaned # sprite exec sessions keep running and corrupt the sprite config file. pkill -P "${pid}" 2>/dev/null || true kill "${pid}" 2>/dev/null || true wait "${pid}" 2>/dev/null || true # Also kill any lingering sprite exec processes for this specific app pkill -f "sprite.*exec.*${app_name}" 2>/dev/null || true sleep 1 fi # Even if provision "failed" (timeout), the instance may exist and install may have completed. # Verify instance existence via cloud driver. if ! cloud_provision_verify "${app_name}" "${log_dir}"; then log_err "Instance ${app_name} does not exist after provisioning" if [ -f "${stderr_file}" ]; then log_err "Stderr tail:" tail -20 "${stderr_file}" >&2 || true fi return 1 fi log_ok "Instance ${app_name} verified" # Wait for install to complete (.spawnrc is written near the end) local effective_install_wait effective_install_wait=$(cloud_install_wait) log_step "Waiting for install to complete (polling .spawnrc, up to ${effective_install_wait}s)..." local install_waited=0 local install_ok=0 while [ "${install_waited}" -lt "${effective_install_wait}" ]; do if cloud_exec "${app_name}" "test -f ~/.spawnrc" >/dev/null 2>&1; then install_ok=1 break fi sleep 10 install_waited=$((install_waited + 10)) done if [ "${install_ok}" -eq 1 ]; then # Settle time for agent binary install to finish after .spawnrc is written sleep 5 log_ok "Install completed (.spawnrc found)" return 0 else log_warn ".spawnrc not found after ${effective_install_wait}s — install may still be running" return 0 # Continue to verification; it will catch real failures fi }