mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-28 03:49:31 +00:00
Set umask 077 before mktemp so the temp .ts file is created with 0600 permissions, preventing other users on shared systems from reading it. Umask is restored immediately after file creation. Agent: code-health Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
898 lines
30 KiB
Bash
Executable file
898 lines
30 KiB
Bash
Executable file
#!/bin/bash
|
|
# sh/e2e/e2e.sh — Unified multi-cloud E2E test orchestrator
|
|
#
|
|
# Usage:
|
|
# e2e.sh --cloud aws # AWS only, all agents
|
|
# e2e.sh --cloud hetzner claude codex # Hetzner, specific agents
|
|
# e2e.sh --cloud aws --cloud hetzner # Both clouds IN PARALLEL
|
|
# e2e.sh --cloud all # ALL clouds IN PARALLEL
|
|
# e2e.sh --cloud all --parallel 3 # All clouds, 3 agents parallel per cloud
|
|
# e2e.sh --cloud aws --skip-input-test # Skip live input tests
|
|
# e2e.sh --cloud aws --sequential # Force sequential agents (no parallelism)
|
|
set -eo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Resolve script directory and source libraries
|
|
# ---------------------------------------------------------------------------
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# Auto-set SPAWN_CLI_DIR to repo root so shell scripts use local source instead
|
|
# of downloading pre-bundled .js from GitHub releases. Can be overridden by env.
|
|
if [ -z "${SPAWN_CLI_DIR:-}" ]; then
|
|
_repo_root="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
if [ -f "${_repo_root}/packages/cli/src/index.ts" ]; then
|
|
export SPAWN_CLI_DIR="${_repo_root}"
|
|
fi
|
|
unset _repo_root
|
|
fi
|
|
|
|
source "${SCRIPT_DIR}/lib/common.sh"
|
|
source "${SCRIPT_DIR}/lib/provision.sh"
|
|
source "${SCRIPT_DIR}/lib/verify.sh"
|
|
source "${SCRIPT_DIR}/lib/teardown.sh"
|
|
source "${SCRIPT_DIR}/lib/soak.sh"
|
|
source "${SCRIPT_DIR}/lib/interactive.sh"
|
|
source "${SCRIPT_DIR}/lib/ai-review.sh"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Auto-load Resend email credentials when not already set.
|
|
# Sources /etc/spawn-key-server-auth.env (QA VM) or ~/.config/spawn/resend.env
|
|
# (local dev) to populate RESEND_API_KEY and KEY_REQUEST_EMAIL.
|
|
# This ensures send_matrix_email fires on manual runs, not just QA-cycle runs.
|
|
# ---------------------------------------------------------------------------
|
|
if [ -z "${RESEND_API_KEY:-}" ] || [ -z "${KEY_REQUEST_EMAIL:-}" ]; then
|
|
for _cred_file in /etc/spawn-key-server-auth.env "${HOME}/.config/spawn/resend.env"; do
|
|
if [ -f "${_cred_file}" ]; then
|
|
# shellcheck source=/dev/null # path is dynamic
|
|
set -a; source "${_cred_file}" 2>/dev/null; set +a
|
|
break
|
|
fi
|
|
done
|
|
unset _cred_file
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# All supported clouds (excluding local — no infra to provision)
|
|
# ---------------------------------------------------------------------------
|
|
ALL_CLOUDS="aws hetzner digitalocean gcp daytona sprite"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parse arguments
|
|
# ---------------------------------------------------------------------------
|
|
CLOUDS=""
|
|
AGENTS_TO_TEST=""
|
|
PARALLEL_COUNT=99
|
|
SKIP_CLEANUP=0
|
|
SKIP_INPUT_TEST="${SKIP_INPUT_TEST:-0}"
|
|
SEQUENTIAL_MODE=0
|
|
SOAK_MODE=0
|
|
INTERACTIVE_MODE=0
|
|
FAST_MODE=0
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--cloud)
|
|
shift
|
|
if [ $# -eq 0 ]; then
|
|
printf "Error: --cloud requires a cloud name\n" >&2
|
|
exit 1
|
|
fi
|
|
if [ "$1" = "all" ]; then
|
|
CLOUDS="${ALL_CLOUDS}"
|
|
else
|
|
# Validate cloud name
|
|
local_valid=0
|
|
for c in ${ALL_CLOUDS}; do
|
|
if [ "$1" = "${c}" ]; then
|
|
local_valid=1
|
|
break
|
|
fi
|
|
done
|
|
if [ "${local_valid}" -eq 0 ]; then
|
|
printf "Unknown cloud: %s\nAvailable: %s all\n" "$1" "${ALL_CLOUDS}" >&2
|
|
exit 1
|
|
fi
|
|
if [ -z "${CLOUDS}" ]; then
|
|
CLOUDS="$1"
|
|
else
|
|
CLOUDS="${CLOUDS} $1"
|
|
fi
|
|
fi
|
|
shift
|
|
;;
|
|
--parallel)
|
|
shift
|
|
if [ $# -eq 0 ]; then
|
|
printf "Error: --parallel requires a number\n" >&2
|
|
exit 1
|
|
fi
|
|
PARALLEL_COUNT="$1"
|
|
if ! printf '%s' "${PARALLEL_COUNT}" | grep -qE '^[0-9]+$' || [ "${PARALLEL_COUNT}" -lt 1 ] || [ "${PARALLEL_COUNT}" -gt 50 ]; then
|
|
printf "Error: --parallel must be between 1 and 50\n" >&2
|
|
exit 1
|
|
fi
|
|
shift
|
|
;;
|
|
--sequential)
|
|
SEQUENTIAL_MODE=1
|
|
shift
|
|
;;
|
|
--skip-cleanup)
|
|
SKIP_CLEANUP=1
|
|
shift
|
|
;;
|
|
--skip-input-test)
|
|
SKIP_INPUT_TEST=1
|
|
shift
|
|
;;
|
|
--soak)
|
|
SOAK_MODE=1
|
|
shift
|
|
;;
|
|
--interactive)
|
|
INTERACTIVE_MODE=1
|
|
shift
|
|
;;
|
|
--fast)
|
|
FAST_MODE=1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
printf "Usage: %s --cloud CLOUD [--cloud CLOUD2 ...] [agents...] [options]\n\n" "$0"
|
|
printf "Clouds: %s\n" "${ALL_CLOUDS}"
|
|
printf " Use --cloud all for all clouds in parallel.\n\n"
|
|
printf "Agents: %s\n\n" "${ALL_AGENTS}"
|
|
printf "Options:\n"
|
|
printf " --cloud CLOUD Cloud to test (repeatable, or 'all')\n"
|
|
printf " --parallel N Run N agents in parallel per cloud (default: all at once)\n"
|
|
printf " --sequential Force sequential agent execution\n"
|
|
printf " --skip-cleanup Skip stale e2e-* instance cleanup\n"
|
|
printf " --skip-input-test Skip live input tests\n"
|
|
printf " --fast Provision with --fast flag (images + tarballs + parallel)\n"
|
|
printf " --soak Run Telegram soak test (OpenClaw on Sprite)\n"
|
|
printf " --interactive AI-driven interactive test (requires ANTHROPIC_API_KEY)\n"
|
|
printf " --help Show this help\n"
|
|
exit 0
|
|
;;
|
|
-*)
|
|
printf "Unknown option: %s\n" "$1" >&2
|
|
exit 1
|
|
;;
|
|
*)
|
|
# Agent name
|
|
local_valid=0
|
|
for a in ${ALL_AGENTS}; do
|
|
if [ "$1" = "${a}" ]; then
|
|
local_valid=1
|
|
break
|
|
fi
|
|
done
|
|
if [ "${local_valid}" -eq 0 ]; then
|
|
printf "Unknown agent: %s\nAvailable: %s\n" "$1" "${ALL_AGENTS}" >&2
|
|
exit 1
|
|
fi
|
|
if [ -z "${AGENTS_TO_TEST}" ]; then
|
|
AGENTS_TO_TEST="$1"
|
|
else
|
|
AGENTS_TO_TEST="${AGENTS_TO_TEST} $1"
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Soak mode: run Telegram soak test and exit (no --cloud required)
|
|
if [ "${SOAK_MODE}" -eq 1 ]; then
|
|
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX")
|
|
export LOG_DIR
|
|
run_soak_test "${LOG_DIR}"
|
|
exit $?
|
|
fi
|
|
|
|
# Require at least one cloud
|
|
if [ -z "${CLOUDS}" ]; then
|
|
printf "Error: --cloud is required. Use --cloud aws, --cloud all, etc.\n" >&2
|
|
printf "Run %s --help for usage.\n" "$0" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Default to all agents
|
|
if [ -z "${AGENTS_TO_TEST}" ]; then
|
|
AGENTS_TO_TEST="${ALL_AGENTS}"
|
|
fi
|
|
|
|
# Sanity-check list sizes to prevent unbounded string growth (#3190)
|
|
_cloud_count=$(printf '%s\n' "${CLOUDS}" | wc -w | tr -d ' ')
|
|
_agent_count=$(printf '%s\n' "${AGENTS_TO_TEST}" | wc -w | tr -d ' ')
|
|
if [ "${_cloud_count}" -gt 50 ]; then
|
|
printf "Error: too many clouds (%s) — max 50\n" "${_cloud_count}" >&2
|
|
exit 1
|
|
fi
|
|
if [ "${_agent_count}" -gt 100 ]; then
|
|
printf "Error: too many agents (%s) — max 100\n" "${_agent_count}" >&2
|
|
exit 1
|
|
fi
|
|
unset _cloud_count _agent_count
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Count clouds to decide single vs multi-cloud mode
|
|
# ---------------------------------------------------------------------------
|
|
cloud_count=$(printf '%s\n' "${CLOUDS}" | wc -w | tr -d ' ')
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_single_agent AGENT
|
|
#
|
|
# Provisions, verifies, and tears down a single agent.
|
|
# Sets result in a temp file for parallel collection.
|
|
# ---------------------------------------------------------------------------
|
|
run_single_agent() {
|
|
local agent="$1"
|
|
local result_file="${2:-}"
|
|
local agent_start
|
|
agent_start=$(date +%s)
|
|
|
|
log_header "Testing agent: ${agent}"
|
|
|
|
local app_name
|
|
app_name=$(make_app_name "${agent}")
|
|
track_app "${app_name}"
|
|
|
|
local status="fail"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-agent timeout: run provision/verify/input_test in a subshell with a
|
|
# wall-clock timeout. This prevents any single step from hanging indefinitely
|
|
# and ensures a result file is always written (pass, fail, or timeout).
|
|
# Fixes #2714: digitalocean-opencode stalling with no result.
|
|
# ---------------------------------------------------------------------------
|
|
local effective_agent_timeout
|
|
effective_agent_timeout=$(get_agent_timeout "${agent}")
|
|
log_info "Agent timeout: ${effective_agent_timeout}s"
|
|
|
|
local status_file="${LOG_DIR}/${app_name}.agent-status"
|
|
rm -f "${status_file}"
|
|
|
|
# Run core logic in a subshell so we can kill it on timeout
|
|
(
|
|
local _inner_status="fail"
|
|
if [ "${INTERACTIVE_MODE}" -eq 1 ]; then
|
|
# AI-driven interactive mode: harness drives the CLI through PTY.
|
|
# After harness exits (on "Starting agent..." marker), the install is still
|
|
# running on the remote VM. Run verify_agent to wait for .spawnrc before
|
|
# the input test — same as headless mode.
|
|
if interactive_provision "${agent}" "${app_name}" "${LOG_DIR}"; then
|
|
if verify_agent "${agent}" "${app_name}"; then
|
|
if run_input_test "${agent}" "${app_name}"; then
|
|
_inner_status="pass"
|
|
fi
|
|
fi
|
|
fi
|
|
else
|
|
# Standard headless mode
|
|
if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then
|
|
# AI review of provision logs — advisory only, runs regardless of verify result
|
|
ai_review_logs "${agent}" "${app_name}" "${LOG_DIR}" || true
|
|
if verify_agent "${agent}" "${app_name}"; then
|
|
if run_input_test "${agent}" "${app_name}"; then
|
|
_inner_status="pass"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
printf '%s' "${_inner_status}" > "${status_file}"
|
|
) &
|
|
local agent_pid=$!
|
|
|
|
# Poll for completion or timeout (bash 3.2 compatible — no wait -n)
|
|
local agent_waited=0
|
|
while [ "${agent_waited}" -lt "${effective_agent_timeout}" ]; do
|
|
if [ -f "${status_file}" ]; then
|
|
break
|
|
fi
|
|
# Also break if the subshell exited without writing (crash/error)
|
|
if ! kill -0 "${agent_pid}" 2>/dev/null; then
|
|
break
|
|
fi
|
|
sleep 5
|
|
agent_waited=$((agent_waited + 5))
|
|
done
|
|
|
|
# Collect result or handle timeout
|
|
if [ -f "${status_file}" ]; then
|
|
status=$(cat "${status_file}")
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
elif kill -0 "${agent_pid}" 2>/dev/null; then
|
|
# Timed out — kill the subshell and its children
|
|
log_err "${agent} timed out after ${effective_agent_timeout}s — killing"
|
|
pkill -P "${agent_pid}" 2>/dev/null || true
|
|
kill "${agent_pid}" 2>/dev/null || true
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
status="fail"
|
|
else
|
|
# Subshell exited without writing status file (unexpected error)
|
|
log_err "${agent} subshell exited without writing status"
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
status="fail"
|
|
fi
|
|
|
|
rm -f "${status_file}"
|
|
|
|
# Teardown (always attempt, even after timeout)
|
|
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
|
|
|
|
local agent_end
|
|
agent_end=$(date +%s)
|
|
local agent_duration=$((agent_end - agent_start))
|
|
local duration_str
|
|
duration_str=$(format_duration "${agent_duration}")
|
|
|
|
if [ "${status}" = "pass" ]; then
|
|
log_ok "${agent} PASSED (${duration_str})"
|
|
else
|
|
log_err "${agent} FAILED (${duration_str})"
|
|
fi
|
|
|
|
# Write result to file (for parallel collection)
|
|
if [ -n "${result_file}" ]; then
|
|
printf '%s' "${status}" > "${result_file}"
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_agents_for_cloud CLOUD LOG_DIR
|
|
#
|
|
# Runs all agents for a single cloud. Supports parallel batching.
|
|
# Writes per-agent results to LOG_DIR/{cloud}-{agent}.result.
|
|
# Writes cloud summary to LOG_DIR/{cloud}.summary.
|
|
# ---------------------------------------------------------------------------
|
|
run_agents_for_cloud() {
|
|
local cloud="$1"
|
|
local log_dir="$2"
|
|
local cloud_start
|
|
cloud_start=$(date +%s)
|
|
|
|
# Load the cloud driver
|
|
load_cloud_driver "${cloud}"
|
|
|
|
# Set log prefix for multi-cloud output
|
|
if [ "${cloud_count}" -gt 1 ]; then
|
|
CLOUD_LOG_PREFIX="[${cloud}] "
|
|
fi
|
|
|
|
log_header "E2E Tests: ${cloud}"
|
|
log_info "Agents: ${AGENTS_TO_TEST}"
|
|
|
|
# Validate environment for this cloud
|
|
if ! require_env; then
|
|
log_warn "Credentials not configured for ${cloud} — skipping"
|
|
printf 'SKIPPED (no credentials)' > "${log_dir}/${cloud}.summary"
|
|
return 0
|
|
fi
|
|
|
|
local cloud_passed=""
|
|
local cloud_failed=""
|
|
|
|
# Pre-run stale cleanup: remove orphaned e2e instances from previous
|
|
# interrupted runs before starting new agents. Uses a shorter max_age (5 min)
|
|
# than the default (30 min) so that orphans from recently-failed runs are
|
|
# cleaned before they can exhaust the account's instance quota (#2793).
|
|
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
|
|
_CLEANUP_MAX_AGE=300 cloud_cleanup_stale || log_warn "Pre-run stale cleanup encountered errors"
|
|
fi
|
|
|
|
# Resolve effective parallelism (respect per-cloud cap)
|
|
local effective_parallel="${PARALLEL_COUNT}"
|
|
if [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
local cloud_max
|
|
cloud_max=$(cloud_max_parallel)
|
|
if [ "${effective_parallel}" -gt "${cloud_max}" ]; then
|
|
effective_parallel="${cloud_max}"
|
|
fi
|
|
fi
|
|
|
|
# Bail out early if the cloud reports zero capacity (e.g. droplet limit reached).
|
|
# All agents would fail anyway — skip with an actionable error instead of wasting
|
|
# time on retries that cannot succeed. (#3059)
|
|
if [ "${effective_parallel}" -eq 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
log_err "No capacity available on ${cloud} — all ${cloud} agents will be marked as failed."
|
|
log_err "Delete existing instances or request a limit increase, then re-run."
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
printf 'fail' > "${log_dir}/${cloud}-${agent}.result"
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
|
|
done
|
|
printf '%s %s %s %s %s' "0" "$(printf '%s\n' "${AGENTS_TO_TEST}" | wc -w | tr -d ' ')" "0s" "" "|${cloud_failed}" \
|
|
> "${log_dir}/${cloud}.summary"
|
|
return 1
|
|
fi
|
|
|
|
if [ "${effective_parallel}" -gt 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
# Parallel mode: batch agents
|
|
log_info "Running agents in parallel (batch size: ${effective_parallel})"
|
|
|
|
local batch_agents=""
|
|
local batch_count=0
|
|
local batch_num=0
|
|
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
batch_agents="${batch_agents} ${agent}"
|
|
batch_count=$((batch_count + 1))
|
|
|
|
if [ "${batch_count}" -ge "${effective_parallel}" ]; then
|
|
batch_num=$((batch_num + 1))
|
|
log_header "Batch ${batch_num} (${cloud})"
|
|
|
|
# Refresh auth before each batch — prevents token expiry in long
|
|
# E2E runs (60+ min). No-op for clouds without refresh support. #2934
|
|
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
|
|
|
|
pids=""
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
run_single_agent "${ba}" "${local_result_file}" &
|
|
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
|
|
done
|
|
|
|
for p in ${pids}; do
|
|
wait "${p}" 2>/dev/null || true
|
|
done
|
|
|
|
# Collect batch results
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
|
|
fi
|
|
done
|
|
|
|
batch_agents=""
|
|
batch_count=0
|
|
fi
|
|
done
|
|
|
|
# Handle remaining agents in last partial batch
|
|
if [ -n "${batch_agents}" ]; then
|
|
batch_num=$((batch_num + 1))
|
|
log_header "Batch ${batch_num} (${cloud})"
|
|
|
|
# Refresh auth before partial batch too — same reason as above. #2934
|
|
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
|
|
|
|
pids=""
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
run_single_agent "${ba}" "${local_result_file}" &
|
|
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
|
|
done
|
|
|
|
for p in ${pids}; do
|
|
wait "${p}" 2>/dev/null || true
|
|
done
|
|
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
else
|
|
# Sequential mode
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
local_result_file="${log_dir}/${cloud}-${agent}.result"
|
|
run_single_agent "${agent}" "${local_result_file}"
|
|
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${agent}"; else cloud_passed="${cloud_passed} ${agent}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Stale cleanup
|
|
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
|
|
cloud_cleanup_stale || log_warn "Stale cleanup encountered errors"
|
|
fi
|
|
|
|
# Write cloud summary
|
|
local cloud_end
|
|
cloud_end=$(date +%s)
|
|
local cloud_duration=$((cloud_end - cloud_start))
|
|
local cloud_duration_str
|
|
cloud_duration_str=$(format_duration "${cloud_duration}")
|
|
|
|
local pass_count=0
|
|
local fail_count=0
|
|
if [ -n "${cloud_passed}" ]; then pass_count=$(printf '%s\n' "${cloud_passed}" | wc -w | tr -d ' '); fi
|
|
if [ -n "${cloud_failed}" ]; then fail_count=$(printf '%s\n' "${cloud_failed}" | wc -w | tr -d ' '); fi
|
|
|
|
printf '%s %s %s %s %s' "${pass_count}" "${fail_count}" "${cloud_duration_str}" "${cloud_passed}" "|${cloud_failed}" \
|
|
> "${log_dir}/${cloud}.summary"
|
|
|
|
if [ "${fail_count}" -gt 0 ]; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# send_matrix_email LOG_DIR CLOUDS AGENTS TOTAL_PASS TOTAL_FAIL DURATION_STR
|
|
#
|
|
# Sends an agent x cloud matrix report via Resend.
|
|
# Requires: RESEND_API_KEY, KEY_REQUEST_EMAIL env vars (silently skips if absent).
|
|
# ---------------------------------------------------------------------------
|
|
send_matrix_email() {
|
|
local log_dir="$1"
|
|
local clouds="$2"
|
|
local agents="$3"
|
|
local total_pass="$4"
|
|
local total_fail="$5"
|
|
local duration_str="$6"
|
|
|
|
# Skip email for targeted re-runs (partial agent/cloud subset).
|
|
# Set SPAWN_E2E_SKIP_EMAIL=1 to suppress the email (used by quality cycle
|
|
# when re-running only failed agents — a partial email looks like all-passed).
|
|
if [ "${SPAWN_E2E_SKIP_EMAIL:-0}" = "1" ]; then
|
|
log_info "Matrix email skipped (SPAWN_E2E_SKIP_EMAIL=1)"
|
|
return 0
|
|
fi
|
|
|
|
local resend_key="${RESEND_API_KEY:-}"
|
|
local to_email="${KEY_REQUEST_EMAIL:-}"
|
|
|
|
if [ -z "${resend_key}" ] || [ -z "${to_email}" ]; then
|
|
log_info "Matrix email skipped (RESEND_API_KEY or KEY_REQUEST_EMAIL not set)"
|
|
return 0
|
|
fi
|
|
|
|
# Build results string: "cloud:agent:result,..." for bun to process
|
|
# Sanitize cloud/agent names to alphanumeric, dash, underscore only (#3189)
|
|
local results=""
|
|
for cloud in ${clouds}; do
|
|
local safe_cloud
|
|
safe_cloud=$(printf '%s' "${cloud}" | tr -cd 'a-zA-Z0-9_-')
|
|
for agent in ${agents}; do
|
|
local safe_agent
|
|
safe_agent=$(printf '%s' "${agent}" | tr -cd 'a-zA-Z0-9_-')
|
|
local result="skip"
|
|
local result_file="${log_dir}/${cloud}-${agent}.result"
|
|
if [ -f "${result_file}" ]; then
|
|
result=$(cat "${result_file}")
|
|
fi
|
|
# Sanitize result to known values only
|
|
case "${result}" in
|
|
pass|fail|skip) ;;
|
|
*) result="skip" ;;
|
|
esac
|
|
if [ -n "${results}" ]; then results="${results},"; fi
|
|
results="${results}${safe_cloud}:${safe_agent}:${result}"
|
|
done
|
|
done
|
|
|
|
local ts_file old_umask
|
|
old_umask=$(umask)
|
|
umask 077
|
|
ts_file=$(mktemp /tmp/e2e-email-XXXXXX.ts)
|
|
umask "${old_umask}"
|
|
|
|
cat > "${ts_file}" << 'TS_EOF'
|
|
const results = (process.env._E2E_RESULTS ?? "").split(",").filter(Boolean);
|
|
const clouds = (process.env._E2E_CLOUDS ?? "").split(" ").filter(Boolean);
|
|
const agents = (process.env._E2E_AGENTS ?? "").split(" ").filter(Boolean);
|
|
const totalPass = process.env._E2E_TOTAL_PASS ?? "0";
|
|
const totalFail = process.env._E2E_TOTAL_FAIL ?? "0";
|
|
const duration = process.env._E2E_DURATION ?? "?";
|
|
const toEmail = process.env.KEY_REQUEST_EMAIL ?? "";
|
|
const resendKey = process.env.RESEND_API_KEY ?? "";
|
|
const timestamp = new Date().toUTCString();
|
|
|
|
// Build lookup map: "cloud:agent" -> result
|
|
const resultMap: Record<string, string> = {};
|
|
for (const entry of results) {
|
|
const parts = entry.split(":");
|
|
resultMap[`${parts[0]}:${parts[1]}`] = parts[2] ?? "skip";
|
|
}
|
|
|
|
// Cell styles per result
|
|
const cellStyle = (result: string): string => {
|
|
if (result === "pass") return "background:#22c55e;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
|
|
if (result === "fail") return "background:#ef4444;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
|
|
return "background:#e2e8f0;color:#94a3b8;padding:4px 10px;border-radius:4px;";
|
|
};
|
|
|
|
const headerCells = clouds
|
|
.map(c => `<th style="padding:8px 14px;background:#1e293b;color:#fff;text-transform:uppercase;font-size:11px;letter-spacing:.05em;">${c}</th>`)
|
|
.join("");
|
|
|
|
const bodyRows = agents
|
|
.map(agent => {
|
|
const cells = clouds
|
|
.map(cloud => {
|
|
const r = resultMap[`${cloud}:${agent}`] ?? "skip";
|
|
return `<td style="padding:6px 14px;text-align:center;"><span style="${cellStyle(r)}">${r.toUpperCase()}</span></td>`;
|
|
})
|
|
.join("");
|
|
return `<tr><td style="padding:6px 14px;font-weight:600;white-space:nowrap;color:#1e293b;">${agent}</td>${cells}</tr>`;
|
|
})
|
|
.join("");
|
|
|
|
const status = totalFail === "0" ? "✅ All Passed" : `❌ ${totalFail} Failed`;
|
|
|
|
const html = `<!DOCTYPE html>
|
|
<html><body style="font-family:system-ui,-apple-system,sans-serif;max-width:860px;margin:0 auto;padding:24px;color:#1e293b;">
|
|
<h2 style="margin:0 0 4px;">${status} — Spawn E2E Matrix</h2>
|
|
<p style="margin:0 0 20px;color:#64748b;font-size:14px;">Completed ${timestamp}</p>
|
|
<table style="border-collapse:collapse;width:100%;">
|
|
<thead>
|
|
<tr>
|
|
<th style="padding:8px 14px;background:#1e293b;color:#fff;text-align:left;font-size:11px;text-transform:uppercase;letter-spacing:.05em;">Agent</th>
|
|
${headerCells}
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
${bodyRows}
|
|
</tbody>
|
|
</table>
|
|
<p style="margin-top:18px;color:#64748b;font-size:13px;">
|
|
<strong style="color:#1e293b;">Total:</strong> ${totalPass} passed, ${totalFail} failed
|
|
·
|
|
<strong style="color:#1e293b;">Duration:</strong> ${duration}
|
|
</p>
|
|
</body></html>`;
|
|
|
|
const subject = totalFail === "0"
|
|
? `✅ E2E Matrix: ${totalPass} passed · ${duration}`
|
|
: `❌ E2E Matrix: ${totalFail} failed, ${totalPass} passed · ${duration}`;
|
|
|
|
const res = await fetch("https://api.resend.com/emails", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"Authorization": `Bearer ${resendKey}`,
|
|
},
|
|
body: JSON.stringify({
|
|
from: "Spawn QA <onboarding@resend.dev>",
|
|
to: [toEmail],
|
|
subject,
|
|
html,
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const body = await res.text();
|
|
console.error(`Resend API error ${res.status}: ${body}`);
|
|
process.exit(1);
|
|
}
|
|
console.log(`Matrix email sent to ${toEmail}`);
|
|
TS_EOF
|
|
|
|
log_info "Sending matrix email to ${to_email}..."
|
|
_E2E_RESULTS="${results}" \
|
|
_E2E_CLOUDS="${clouds}" \
|
|
_E2E_AGENTS="${agents}" \
|
|
_E2E_TOTAL_PASS="${total_pass}" \
|
|
_E2E_TOTAL_FAIL="${total_fail}" \
|
|
_E2E_DURATION="${duration_str}" \
|
|
bun run "${ts_file}" 2>&1 || log_warn "Failed to send matrix email"
|
|
|
|
rm -f "${ts_file}" 2>/dev/null || true
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Final cleanup trap
|
|
# ---------------------------------------------------------------------------
|
|
final_cleanup() {
|
|
if [ -n "${_TRACKED_APPS}" ]; then
|
|
printf "\n"
|
|
log_warn "Cleaning up tracked instances on exit..."
|
|
for app in ${_TRACKED_APPS}; do
|
|
log_step "Tearing down ${app}..."
|
|
teardown_agent "${app}" 2>/dev/null || log_warn "Failed to tear down ${app}"
|
|
done
|
|
fi
|
|
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR:-}" ]; then
|
|
if [ "${LOG_DIR}" != "${_E2E_CREATED_LOG_DIR:-}" ]; then
|
|
log_warn "Refusing to rm -rf LOG_DIR not created by this script: ${LOG_DIR}"
|
|
else
|
|
# Reject symlinks to prevent TOCTOU races (CWE-367, #3233):
|
|
# Previous code resolved symlinks then operated on the resolved path,
|
|
# but an attacker could swap the symlink target between resolve and rm.
|
|
# Fix: refuse to delete symlinks entirely — LOG_DIR should never be one.
|
|
if [ -L "${LOG_DIR}" ]; then
|
|
log_warn "LOG_DIR is a symlink, refusing deletion to prevent symlink attacks: ${LOG_DIR}"
|
|
return
|
|
fi
|
|
SAFE_TMP_ROOT="${TMP_ROOT:-${TMPDIR:-/tmp}}"
|
|
SAFE_TMP_ROOT="${SAFE_TMP_ROOT%/}"
|
|
# Use realpath -P to resolve, then verify the original path matches
|
|
# (ensures LOG_DIR is not inside a symlinked parent directory)
|
|
local resolved_log_dir
|
|
resolved_log_dir=$(realpath -P "${LOG_DIR}" 2>/dev/null)
|
|
if [ -z "${resolved_log_dir}" ]; then
|
|
log_warn "Failed to resolve LOG_DIR path, skipping cleanup"
|
|
return
|
|
fi
|
|
# Re-check symlink after resolve to narrow the TOCTOU window
|
|
if [ -L "${LOG_DIR}" ]; then
|
|
log_warn "LOG_DIR became a symlink during cleanup, aborting: ${LOG_DIR}"
|
|
return
|
|
fi
|
|
# Verify ownership on the original path (not the resolved one)
|
|
if [ ! -O "${LOG_DIR}" ]; then
|
|
log_warn "LOG_DIR not owned by current user, refusing deletion: ${LOG_DIR}"
|
|
else
|
|
case "${resolved_log_dir}" in
|
|
"${SAFE_TMP_ROOT}"/spawn-e2e.*)
|
|
# Delete the original path — if it became a symlink between check
|
|
# and here, rm -rf on a symlink just removes the link itself when
|
|
# the target no longer matches. The double -L check above minimizes
|
|
# this window.
|
|
rm -rf "${LOG_DIR}"
|
|
;;
|
|
*)
|
|
log_warn "Refusing to rm -rf unexpected path: ${resolved_log_dir}"
|
|
;;
|
|
esac
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
trap final_cleanup EXIT
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
log_header "Spawn E2E Test Suite (Multi-Cloud)"
|
|
log_info "Clouds: ${CLOUDS}"
|
|
log_info "Agents: ${AGENTS_TO_TEST}"
|
|
if [ "${SEQUENTIAL_MODE}" -eq 1 ]; then
|
|
log_info "Agent parallelism: sequential"
|
|
elif [ "${PARALLEL_COUNT}" -ge 99 ]; then
|
|
log_info "Agent parallelism: all at once (per-cloud caps may apply)"
|
|
else
|
|
log_info "Agent parallelism: ${PARALLEL_COUNT} per cloud"
|
|
fi
|
|
if [ "${SKIP_INPUT_TEST}" -eq 1 ]; then
|
|
log_info "Input tests: SKIPPED"
|
|
fi
|
|
if [ "${FAST_MODE}" -eq 1 ]; then
|
|
log_info "Fast mode: ENABLED (--fast passed to spawn)"
|
|
fi
|
|
|
|
# Export FAST_MODE so provision.sh can read it
|
|
export E2E_FAST_MODE="${FAST_MODE}"
|
|
|
|
# Create temp log directory
|
|
TMP_ROOT="${TMPDIR:-/tmp}"
|
|
TMP_ROOT="${TMP_ROOT%/}"
|
|
LOG_DIR=$(mktemp -d "${TMP_ROOT}/spawn-e2e.XXXXXX")
|
|
_E2E_CREATED_LOG_DIR="${LOG_DIR}"
|
|
export LOG_DIR
|
|
log_info "Log directory: ${LOG_DIR}"
|
|
|
|
START_TIME=$(date +%s)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Execute: single-cloud or multi-cloud
|
|
# ---------------------------------------------------------------------------
|
|
if [ "${cloud_count}" -eq 1 ]; then
|
|
# Single cloud — run directly in this process
|
|
run_agents_for_cloud "${CLOUDS}" "${LOG_DIR}" || true
|
|
|
|
else
|
|
# Multi-cloud — each cloud runs as a separate background process
|
|
cloud_pids=""
|
|
for cloud in ${CLOUDS}; do
|
|
(
|
|
# Reset parent's EXIT trap — the main process handles LOG_DIR cleanup
|
|
trap - EXIT
|
|
_TRACKED_APPS=""
|
|
run_agents_for_cloud "${cloud}" "${LOG_DIR}"
|
|
) > "${LOG_DIR}/${cloud}.log" 2>&1 &
|
|
cloud_pid=$!
|
|
if [ -z "${cloud_pids}" ]; then
|
|
cloud_pids="${cloud_pid}"
|
|
else
|
|
cloud_pids="${cloud_pids} ${cloud_pid}"
|
|
fi
|
|
log_info "Started ${cloud} tests (PID: ${cloud_pid})"
|
|
done
|
|
|
|
# Wait for all clouds to finish
|
|
any_failed=0
|
|
for pid in ${cloud_pids}; do
|
|
wait "${pid}" 2>/dev/null || any_failed=1
|
|
done
|
|
|
|
# Print per-cloud logs
|
|
for cloud in ${CLOUDS}; do
|
|
if [ -f "${LOG_DIR}/${cloud}.log" ]; then
|
|
printf "\n"
|
|
log_header "Output: ${cloud}"
|
|
cat "${LOG_DIR}/${cloud}.log"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unified Summary
|
|
# ---------------------------------------------------------------------------
|
|
END_TIME=$(date +%s)
|
|
TOTAL_DURATION=$((END_TIME - START_TIME))
|
|
DURATION_STR=$(format_duration "${TOTAL_DURATION}")
|
|
|
|
printf "\n"
|
|
log_header "E2E Test Summary"
|
|
|
|
total_pass=0
|
|
total_fail=0
|
|
any_cloud_failed=0
|
|
|
|
for cloud in ${CLOUDS}; do
|
|
printf "\n ${BOLD}%s:${NC}\n" "${cloud}"
|
|
|
|
cloud_pass=0
|
|
cloud_fail=0
|
|
cloud_skip=0
|
|
|
|
# Check if this cloud was skipped (no credentials) — no result files written
|
|
cloud_has_results=0
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
if [ -f "${LOG_DIR}/${cloud}-${agent}.result" ]; then
|
|
cloud_has_results=1
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ "${cloud_has_results}" -eq 0 ]; then
|
|
printf " ${YELLOW}(skipped — credentials not configured)${NC}\n"
|
|
continue
|
|
fi
|
|
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
result_file="${LOG_DIR}/${cloud}-${agent}.result"
|
|
if [ -f "${result_file}" ] && [ "$(cat "${result_file}")" = "pass" ]; then
|
|
printf " ${GREEN}%-12s PASS${NC}\n" "${agent}"
|
|
cloud_pass=$((cloud_pass + 1))
|
|
total_pass=$((total_pass + 1))
|
|
else
|
|
printf " ${RED}%-12s FAIL${NC}\n" "${agent}"
|
|
cloud_fail=$((cloud_fail + 1))
|
|
total_fail=$((total_fail + 1))
|
|
fi
|
|
done
|
|
|
|
if [ "${cloud_fail}" -gt 0 ]; then
|
|
printf " ${RED}%d passed, %d failed${NC}\n" "${cloud_pass}" "${cloud_fail}"
|
|
any_cloud_failed=1
|
|
else
|
|
printf " ${GREEN}%d passed, 0 failed${NC}\n" "${cloud_pass}"
|
|
fi
|
|
done
|
|
|
|
printf "\n"
|
|
printf " ${BOLD}Total:${NC} ${GREEN}%d passed${NC}" "${total_pass}"
|
|
if [ "${total_fail}" -gt 0 ]; then
|
|
printf ", ${RED}%d failed${NC}" "${total_fail}"
|
|
fi
|
|
printf "\n Duration: %s\n" "${DURATION_STR}"
|
|
|
|
# Send matrix email report
|
|
send_matrix_email "${LOG_DIR}" "${CLOUDS}" "${AGENTS_TO_TEST}" "${total_pass}" "${total_fail}" "${DURATION_STR}"
|
|
|
|
# Exit with failure if any agent on any cloud failed
|
|
if [ "${total_fail}" -gt 0 ]; then
|
|
exit 1
|
|
fi
|
|
|
|
# All tests passed — advance the e2e-last-green tag for diff-aware reviews
|
|
mark_e2e_green
|
|
|
|
exit 0
|