mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-29 12:29:31 +00:00
* fix(sprite): fix all 6 Sprite agent installs for E2E - Use `npm install -g --prefix` instead of `npm config set prefix` to avoid creating .npmrc that conflicts with nvm on Sprite VMs - Fix shell environment setup to only modify .bash_profile (not .bashrc) so non-interactive bash -c commands retain PATH config - Add $HOME/.cargo/bin to PATH for zeroclaw (Sprite has no ~/.cargo/env) - Add $HOME/.local/bin to PATH config for Sprite shell environment - Add sprite E2E cloud driver with org detection, config corruption fix, direct command embedding (not $1 positional), and retry logic - Fix provision.sh to kill full process tree after timeout (prevents orphaned sprite exec sessions from corrupting config) - Fix verify.sh zeroclaw check to not rely on ~/.cargo/env existing Tested: 6/6 Sprite agents pass E2E (claude, codex, openclaw, zeroclaw, opencode, kilocode). Hermes is not in the Sprite manifest. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: biome format - collapse runSprite call to single line Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: L <6723574+louisgv@users.noreply.github.com>
134 lines
4.9 KiB
Bash
134 lines
4.9 KiB
Bash
#!/bin/bash
|
|
# e2e/lib/provision.sh — Provision an agent VM via spawn CLI (cloud-agnostic)
|
|
set -eo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# provision_agent AGENT APP_NAME LOG_DIR
|
|
#
|
|
# Runs spawn in headless mode with a timeout. The provision process hangs on
|
|
# the interactive SSH session (step 12 of the orchestration), so we kill it
|
|
# after PROVISION_TIMEOUT seconds. The install itself usually succeeds; we
|
|
# verify via instance existence and .spawnrc presence afterward.
|
|
#
|
|
# Uses cloud driver functions:
|
|
# cloud_headless_env — cloud-specific env var exports
|
|
# cloud_provision_verify — check instance exists, write IP + metadata
|
|
# cloud_exec — remote command execution
|
|
# ---------------------------------------------------------------------------
|
|
provision_agent() {
|
|
local agent="$1"
|
|
local app_name="$2"
|
|
local log_dir="$3"
|
|
|
|
local exit_file="${log_dir}/${app_name}.exit"
|
|
local stdout_file="${log_dir}/${app_name}.stdout"
|
|
local stderr_file="${log_dir}/${app_name}.stderr"
|
|
|
|
# Resolve CLI entry point
|
|
# SPAWN_CLI_DIR overrides auto-resolution — use this to force local source code
|
|
local cli_entry
|
|
if [ -n "${SPAWN_CLI_DIR:-}" ]; then
|
|
cli_entry="${SPAWN_CLI_DIR}/packages/cli/src/index.ts"
|
|
else
|
|
cli_entry="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/packages/cli/src/index.ts"
|
|
fi
|
|
|
|
if [ ! -f "${cli_entry}" ]; then
|
|
log_err "CLI entry point not found: ${cli_entry}"
|
|
return 1
|
|
fi
|
|
|
|
log_step "Provisioning ${agent} as ${app_name} on ${ACTIVE_CLOUD} (timeout: ${PROVISION_TIMEOUT}s)"
|
|
|
|
# Remove stale exit file
|
|
rm -f "${exit_file}"
|
|
|
|
# Environment for headless provisioning
|
|
# MODEL_ID bypasses the interactive model selection prompt (required by openclaw)
|
|
(
|
|
export SPAWN_NON_INTERACTIVE=1
|
|
export SPAWN_SKIP_GITHUB_AUTH=1
|
|
export SPAWN_SKIP_API_VALIDATION=1
|
|
export SPAWN_NO_UPDATE_CHECK=1
|
|
export BUN_RUNTIME_TRANSPILER_CACHE_PATH=0
|
|
export SPAWN_CLI_DIR="${SPAWN_CLI_DIR:-}"
|
|
export MODEL_ID="${MODEL_ID:-openrouter/auto}"
|
|
export OPENROUTER_API_KEY="${OPENROUTER_API_KEY}"
|
|
|
|
# Apply cloud-specific env vars (safe: only processes export VAR="VALUE" lines)
|
|
while IFS= read -r _env_line; do
|
|
if [[ "${_env_line}" =~ ^export[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)=\"(.*)\"$ ]]; then
|
|
export "${BASH_REMATCH[1]}"="${BASH_REMATCH[2]}"
|
|
fi
|
|
done <<CLOUD_ENV
|
|
$(cloud_headless_env "${app_name}" "${agent}")
|
|
CLOUD_ENV
|
|
|
|
bun run "${cli_entry}" "${agent}" "${ACTIVE_CLOUD}" --headless --output json \
|
|
> "${stdout_file}" 2> "${stderr_file}"
|
|
printf '%s' "$?" > "${exit_file}"
|
|
) &
|
|
local pid=$!
|
|
|
|
# Poll for completion or timeout (bash 3.2 compatible — no wait -n)
|
|
local waited=0
|
|
while [ "${waited}" -lt "${PROVISION_TIMEOUT}" ]; do
|
|
if [ -f "${exit_file}" ]; then
|
|
break
|
|
fi
|
|
sleep 5
|
|
waited=$((waited + 5))
|
|
done
|
|
|
|
# Kill if still running (the interactive SSH/CLI session hangs)
|
|
if [ ! -f "${exit_file}" ]; then
|
|
log_warn "Provision timed out after ${PROVISION_TIMEOUT}s — killing (install may still succeed)"
|
|
# Kill the entire process tree — the subshell spawns bun → sprite exec -tty
|
|
# which won't die from just killing the subshell PID. Without this, orphaned
|
|
# sprite exec sessions keep running and corrupt the sprite config file.
|
|
pkill -P "${pid}" 2>/dev/null || true
|
|
kill "${pid}" 2>/dev/null || true
|
|
wait "${pid}" 2>/dev/null || true
|
|
# Also kill any lingering sprite exec processes for this specific app
|
|
pkill -f "sprite.*exec.*${app_name}" 2>/dev/null || true
|
|
sleep 1
|
|
fi
|
|
|
|
# Even if provision "failed" (timeout), the instance may exist and install may have completed.
|
|
# Verify instance existence via cloud driver.
|
|
if ! cloud_provision_verify "${app_name}" "${log_dir}"; then
|
|
log_err "Instance ${app_name} does not exist after provisioning"
|
|
if [ -f "${stderr_file}" ]; then
|
|
log_err "Stderr tail:"
|
|
tail -20 "${stderr_file}" >&2 || true
|
|
fi
|
|
return 1
|
|
fi
|
|
|
|
log_ok "Instance ${app_name} verified"
|
|
|
|
# Wait for install to complete (.spawnrc is written near the end)
|
|
local effective_install_wait
|
|
effective_install_wait=$(cloud_install_wait)
|
|
log_step "Waiting for install to complete (polling .spawnrc, up to ${effective_install_wait}s)..."
|
|
local install_waited=0
|
|
local install_ok=0
|
|
while [ "${install_waited}" -lt "${effective_install_wait}" ]; do
|
|
if cloud_exec "${app_name}" "test -f ~/.spawnrc" >/dev/null 2>&1; then
|
|
install_ok=1
|
|
break
|
|
fi
|
|
sleep 10
|
|
install_waited=$((install_waited + 10))
|
|
done
|
|
|
|
if [ "${install_ok}" -eq 1 ]; then
|
|
# Settle time for agent binary install to finish after .spawnrc is written
|
|
sleep 5
|
|
log_ok "Install completed (.spawnrc found)"
|
|
return 0
|
|
else
|
|
log_warn ".spawnrc not found after ${effective_install_wait}s — install may still be running"
|
|
return 0 # Continue to verification; it will catch real failures
|
|
fi
|
|
}
|