spawn/sh/e2e/lib/provision.sh
Ahmed Abushagur d5461adc16
feat: SPAWN_CLI_DIR env var to force local source in e2e (#2015)
* feat: SPAWN_CLI_DIR env var to force local source in e2e and shell scripts

When SPAWN_CLI_DIR is set, the entire toolchain uses local TypeScript
source instead of downloading pre-bundled scripts from GitHub releases:

- e2e.sh: auto-sets SPAWN_CLI_DIR to repo root when running locally
- provision.sh: exports SPAWN_CLI_DIR into the headless subshell
- commands.ts: reads local shell scripts instead of fetching from CDN
- All 36 cloud/agent shell scripts: exec local main.ts when set

This enables e2e tests to validate local changes before they're released.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix(security): add path traversal defense to SPAWN_CLI_DIR script loading

Canonicalize the path via realpathSync and verify it stays inside the
resolved CLI directory before reading. Prevents SPAWN_CLI_DIR from
being used to read arbitrary files via ../ traversal.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix(security): harden SPAWN_CLI_DIR path traversal defense

- Validate cloud/agent names don't contain '..', '/' or '\' before
  constructing file paths
- Fix root-directory edge case in prefix check by handling trailing
  separator correctly

Agent: pr-maintainer
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
2026-02-28 04:14:36 -05:00

125 lines
4.3 KiB
Bash

#!/bin/bash
# e2e/lib/provision.sh — Provision an agent VM via spawn CLI (cloud-agnostic)
set -eo pipefail
# ---------------------------------------------------------------------------
# provision_agent AGENT APP_NAME LOG_DIR
#
# Runs spawn in headless mode with a timeout. The provision process hangs on
# the interactive SSH session (step 12 of the orchestration), so we kill it
# after PROVISION_TIMEOUT seconds. The install itself usually succeeds; we
# verify via instance existence and .spawnrc presence afterward.
#
# Uses cloud driver functions:
# cloud_headless_env — cloud-specific env var exports
# cloud_provision_verify — check instance exists, write IP + metadata
# cloud_exec — remote command execution
# ---------------------------------------------------------------------------
provision_agent() {
local agent="$1"
local app_name="$2"
local log_dir="$3"
local exit_file="${log_dir}/${app_name}.exit"
local stdout_file="${log_dir}/${app_name}.stdout"
local stderr_file="${log_dir}/${app_name}.stderr"
# Resolve CLI entry point
# SPAWN_CLI_DIR overrides auto-resolution — use this to force local source code
local cli_entry
if [ -n "${SPAWN_CLI_DIR:-}" ]; then
cli_entry="${SPAWN_CLI_DIR}/packages/cli/src/index.ts"
else
cli_entry="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/packages/cli/src/index.ts"
fi
if [ ! -f "${cli_entry}" ]; then
log_err "CLI entry point not found: ${cli_entry}"
return 1
fi
log_step "Provisioning ${agent} as ${app_name} on ${ACTIVE_CLOUD} (timeout: ${PROVISION_TIMEOUT}s)"
# Remove stale exit file
rm -f "${exit_file}"
# Get cloud-specific env var exports
local cloud_env
cloud_env=$(cloud_headless_env "${app_name}" "${agent}")
# Environment for headless provisioning
# MODEL_ID bypasses the interactive model selection prompt (required by openclaw)
(
export SPAWN_NON_INTERACTIVE=1
export SPAWN_SKIP_GITHUB_AUTH=1
export SPAWN_SKIP_API_VALIDATION=1
export SPAWN_NO_UPDATE_CHECK=1
export BUN_RUNTIME_TRANSPILER_CACHE_PATH=0
export SPAWN_CLI_DIR="${SPAWN_CLI_DIR:-}"
export MODEL_ID="${MODEL_ID:-openrouter/auto}"
export OPENROUTER_API_KEY="${OPENROUTER_API_KEY}"
# Apply cloud-specific env vars
eval "${cloud_env}"
bun run "${cli_entry}" "${agent}" "${ACTIVE_CLOUD}" --headless --output json \
> "${stdout_file}" 2> "${stderr_file}"
printf '%s' "$?" > "${exit_file}"
) &
local pid=$!
# Poll for completion or timeout (bash 3.2 compatible — no wait -n)
local waited=0
while [ "${waited}" -lt "${PROVISION_TIMEOUT}" ]; do
if [ -f "${exit_file}" ]; then
break
fi
sleep 5
waited=$((waited + 5))
done
# Kill if still running (the interactive SSH session hangs)
if [ ! -f "${exit_file}" ]; then
log_warn "Provision timed out after ${PROVISION_TIMEOUT}s — killing (install may still succeed)"
kill "${pid}" 2>/dev/null || true
wait "${pid}" 2>/dev/null || true
fi
# Even if provision "failed" (timeout), the instance may exist and install may have completed.
# Verify instance existence via cloud driver.
if ! cloud_provision_verify "${app_name}" "${log_dir}"; then
log_err "Instance ${app_name} does not exist after provisioning"
if [ -f "${stderr_file}" ]; then
log_err "Stderr tail:"
tail -20 "${stderr_file}" >&2 || true
fi
return 1
fi
log_ok "Instance ${app_name} verified"
# Wait for install to complete (.spawnrc is written near the end)
local effective_install_wait
effective_install_wait=$(cloud_install_wait)
log_step "Waiting for install to complete (polling .spawnrc, up to ${effective_install_wait}s)..."
local install_waited=0
local install_ok=0
while [ "${install_waited}" -lt "${effective_install_wait}" ]; do
if cloud_exec "${app_name}" "test -f ~/.spawnrc" >/dev/null 2>&1; then
install_ok=1
break
fi
sleep 10
install_waited=$((install_waited + 10))
done
if [ "${install_ok}" -eq 1 ]; then
# Settle time for agent binary install to finish after .spawnrc is written
sleep 5
log_ok "Install completed (.spawnrc found)"
return 0
else
log_warn ".spawnrc not found after ${effective_install_wait}s — install may still be running"
return 0 # Continue to verification; it will catch real failures
fi
}