mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-28 03:49:31 +00:00
* fix(zeroclaw): remove broken zeroclaw agent (repo 404) The zeroclaw-labs/zeroclaw GitHub repository returns 404 — all installs fail. Remove zeroclaw entirely from the matrix: agent definition, setup code, shell scripts, e2e tests, packer config, skill files, and documentation. Fixes #3102 Agent: code-health Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix(zeroclaw): remove stale zeroclaw reference from discovery.md ARM agents list Addresses security review on PR #3107 — the last remaining zeroclaw reference in .claude/rules/discovery.md is now removed. Agent: issue-fixer Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix(zeroclaw): remove remaining stale zeroclaw references from CI/packer Remove zeroclaw from: - .github/workflows/agent-tarballs.yml ARM build matrix - .github/workflows/docker.yml agent matrix - packer/digitalocean.pkr.hcl comment - sh/e2e/e2e.sh comment Addresses all 5 stale references flagged in security review of PR #3107. Agent: issue-fixer Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
833 lines
28 KiB
Bash
Executable file
833 lines
28 KiB
Bash
Executable file
#!/bin/bash
|
|
# sh/e2e/e2e.sh — Unified multi-cloud E2E test orchestrator
|
|
#
|
|
# Usage:
|
|
# e2e.sh --cloud aws # AWS only, all agents
|
|
# e2e.sh --cloud hetzner claude codex # Hetzner, specific agents
|
|
# e2e.sh --cloud aws --cloud hetzner # Both clouds IN PARALLEL
|
|
# e2e.sh --cloud all # ALL clouds IN PARALLEL
|
|
# e2e.sh --cloud all --parallel 3 # All clouds, 3 agents parallel per cloud
|
|
# e2e.sh --cloud aws --skip-input-test # Skip live input tests
|
|
# e2e.sh --cloud aws --sequential # Force sequential agents (no parallelism)
|
|
set -eo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Resolve script directory and source libraries
|
|
# ---------------------------------------------------------------------------
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# Auto-set SPAWN_CLI_DIR to repo root so shell scripts use local source instead
|
|
# of downloading pre-bundled .js from GitHub releases. Can be overridden by env.
|
|
if [ -z "${SPAWN_CLI_DIR:-}" ]; then
|
|
_repo_root="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
if [ -f "${_repo_root}/packages/cli/src/index.ts" ]; then
|
|
export SPAWN_CLI_DIR="${_repo_root}"
|
|
fi
|
|
unset _repo_root
|
|
fi
|
|
|
|
source "${SCRIPT_DIR}/lib/common.sh"
|
|
source "${SCRIPT_DIR}/lib/provision.sh"
|
|
source "${SCRIPT_DIR}/lib/verify.sh"
|
|
source "${SCRIPT_DIR}/lib/teardown.sh"
|
|
source "${SCRIPT_DIR}/lib/soak.sh"
|
|
source "${SCRIPT_DIR}/lib/interactive.sh"
|
|
source "${SCRIPT_DIR}/lib/ai-review.sh"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Auto-load Resend email credentials when not already set.
|
|
# Sources /etc/spawn-key-server-auth.env (QA VM) or ~/.config/spawn/resend.env
|
|
# (local dev) to populate RESEND_API_KEY and KEY_REQUEST_EMAIL.
|
|
# This ensures send_matrix_email fires on manual runs, not just QA-cycle runs.
|
|
# ---------------------------------------------------------------------------
|
|
if [ -z "${RESEND_API_KEY:-}" ] || [ -z "${KEY_REQUEST_EMAIL:-}" ]; then
|
|
for _cred_file in /etc/spawn-key-server-auth.env "${HOME}/.config/spawn/resend.env"; do
|
|
if [ -f "${_cred_file}" ]; then
|
|
# shellcheck source=/dev/null # path is dynamic
|
|
set -a; source "${_cred_file}" 2>/dev/null; set +a
|
|
break
|
|
fi
|
|
done
|
|
unset _cred_file
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# All supported clouds (excluding local — no infra to provision)
|
|
# ---------------------------------------------------------------------------
|
|
ALL_CLOUDS="aws hetzner digitalocean gcp sprite"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Parse arguments
|
|
# ---------------------------------------------------------------------------
|
|
CLOUDS=""
|
|
AGENTS_TO_TEST=""
|
|
PARALLEL_COUNT=99
|
|
SKIP_CLEANUP=0
|
|
SKIP_INPUT_TEST="${SKIP_INPUT_TEST:-0}"
|
|
SEQUENTIAL_MODE=0
|
|
SOAK_MODE=0
|
|
INTERACTIVE_MODE=0
|
|
FAST_MODE=0
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--cloud)
|
|
shift
|
|
if [ $# -eq 0 ]; then
|
|
printf "Error: --cloud requires a cloud name\n" >&2
|
|
exit 1
|
|
fi
|
|
if [ "$1" = "all" ]; then
|
|
CLOUDS="${ALL_CLOUDS}"
|
|
else
|
|
# Validate cloud name
|
|
local_valid=0
|
|
for c in ${ALL_CLOUDS}; do
|
|
if [ "$1" = "${c}" ]; then
|
|
local_valid=1
|
|
break
|
|
fi
|
|
done
|
|
if [ "${local_valid}" -eq 0 ]; then
|
|
printf "Unknown cloud: %s\nAvailable: %s all\n" "$1" "${ALL_CLOUDS}" >&2
|
|
exit 1
|
|
fi
|
|
if [ -z "${CLOUDS}" ]; then
|
|
CLOUDS="$1"
|
|
else
|
|
CLOUDS="${CLOUDS} $1"
|
|
fi
|
|
fi
|
|
shift
|
|
;;
|
|
--parallel)
|
|
shift
|
|
if [ $# -eq 0 ]; then
|
|
printf "Error: --parallel requires a number\n" >&2
|
|
exit 1
|
|
fi
|
|
PARALLEL_COUNT="$1"
|
|
if ! printf '%s' "${PARALLEL_COUNT}" | grep -qE '^[0-9]+$' || [ "${PARALLEL_COUNT}" -lt 1 ] || [ "${PARALLEL_COUNT}" -gt 50 ]; then
|
|
printf "Error: --parallel must be between 1 and 50\n" >&2
|
|
exit 1
|
|
fi
|
|
shift
|
|
;;
|
|
--sequential)
|
|
SEQUENTIAL_MODE=1
|
|
shift
|
|
;;
|
|
--skip-cleanup)
|
|
SKIP_CLEANUP=1
|
|
shift
|
|
;;
|
|
--skip-input-test)
|
|
SKIP_INPUT_TEST=1
|
|
shift
|
|
;;
|
|
--soak)
|
|
SOAK_MODE=1
|
|
shift
|
|
;;
|
|
--interactive)
|
|
INTERACTIVE_MODE=1
|
|
shift
|
|
;;
|
|
--fast)
|
|
FAST_MODE=1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
printf "Usage: %s --cloud CLOUD [--cloud CLOUD2 ...] [agents...] [options]\n\n" "$0"
|
|
printf "Clouds: %s\n" "${ALL_CLOUDS}"
|
|
printf " Use --cloud all for all clouds in parallel.\n\n"
|
|
printf "Agents: %s\n\n" "${ALL_AGENTS}"
|
|
printf "Options:\n"
|
|
printf " --cloud CLOUD Cloud to test (repeatable, or 'all')\n"
|
|
printf " --parallel N Run N agents in parallel per cloud (default: all at once)\n"
|
|
printf " --sequential Force sequential agent execution\n"
|
|
printf " --skip-cleanup Skip stale e2e-* instance cleanup\n"
|
|
printf " --skip-input-test Skip live input tests\n"
|
|
printf " --fast Provision with --fast flag (images + tarballs + parallel)\n"
|
|
printf " --soak Run Telegram soak test (OpenClaw on Sprite)\n"
|
|
printf " --interactive AI-driven interactive test (requires ANTHROPIC_API_KEY)\n"
|
|
printf " --help Show this help\n"
|
|
exit 0
|
|
;;
|
|
-*)
|
|
printf "Unknown option: %s\n" "$1" >&2
|
|
exit 1
|
|
;;
|
|
*)
|
|
# Agent name
|
|
local_valid=0
|
|
for a in ${ALL_AGENTS}; do
|
|
if [ "$1" = "${a}" ]; then
|
|
local_valid=1
|
|
break
|
|
fi
|
|
done
|
|
if [ "${local_valid}" -eq 0 ]; then
|
|
printf "Unknown agent: %s\nAvailable: %s\n" "$1" "${ALL_AGENTS}" >&2
|
|
exit 1
|
|
fi
|
|
if [ -z "${AGENTS_TO_TEST}" ]; then
|
|
AGENTS_TO_TEST="$1"
|
|
else
|
|
AGENTS_TO_TEST="${AGENTS_TO_TEST} $1"
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Soak mode: run Telegram soak test and exit (no --cloud required)
|
|
if [ "${SOAK_MODE}" -eq 1 ]; then
|
|
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX")
|
|
export LOG_DIR
|
|
run_soak_test "${LOG_DIR}"
|
|
exit $?
|
|
fi
|
|
|
|
# Require at least one cloud
|
|
if [ -z "${CLOUDS}" ]; then
|
|
printf "Error: --cloud is required. Use --cloud aws, --cloud all, etc.\n" >&2
|
|
printf "Run %s --help for usage.\n" "$0" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Default to all agents
|
|
if [ -z "${AGENTS_TO_TEST}" ]; then
|
|
AGENTS_TO_TEST="${ALL_AGENTS}"
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Count clouds to decide single vs multi-cloud mode
|
|
# ---------------------------------------------------------------------------
|
|
cloud_count=$(printf '%s\n' "${CLOUDS}" | wc -w | tr -d ' ')
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_single_agent AGENT
|
|
#
|
|
# Provisions, verifies, and tears down a single agent.
|
|
# Sets result in a temp file for parallel collection.
|
|
# ---------------------------------------------------------------------------
|
|
run_single_agent() {
|
|
local agent="$1"
|
|
local result_file="${2:-}"
|
|
local agent_start
|
|
agent_start=$(date +%s)
|
|
|
|
log_header "Testing agent: ${agent}"
|
|
|
|
local app_name
|
|
app_name=$(make_app_name "${agent}")
|
|
track_app "${app_name}"
|
|
|
|
local status="fail"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-agent timeout: run provision/verify/input_test in a subshell with a
|
|
# wall-clock timeout. This prevents any single step from hanging indefinitely
|
|
# and ensures a result file is always written (pass, fail, or timeout).
|
|
# Fixes #2714: digitalocean-opencode stalling with no result.
|
|
# ---------------------------------------------------------------------------
|
|
local effective_agent_timeout
|
|
effective_agent_timeout=$(get_agent_timeout "${agent}")
|
|
log_info "Agent timeout: ${effective_agent_timeout}s"
|
|
|
|
local status_file="${LOG_DIR}/${app_name}.agent-status"
|
|
rm -f "${status_file}"
|
|
|
|
# Run core logic in a subshell so we can kill it on timeout
|
|
(
|
|
local _inner_status="fail"
|
|
if [ "${INTERACTIVE_MODE}" -eq 1 ]; then
|
|
# AI-driven interactive mode: harness drives the CLI through PTY.
|
|
# After harness exits (on "Starting agent..." marker), the install is still
|
|
# running on the remote VM. Run verify_agent to wait for .spawnrc before
|
|
# the input test — same as headless mode.
|
|
if interactive_provision "${agent}" "${app_name}" "${LOG_DIR}"; then
|
|
if verify_agent "${agent}" "${app_name}"; then
|
|
if run_input_test "${agent}" "${app_name}"; then
|
|
_inner_status="pass"
|
|
fi
|
|
fi
|
|
fi
|
|
else
|
|
# Standard headless mode
|
|
if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then
|
|
# AI review of provision logs — advisory only, runs regardless of verify result
|
|
ai_review_logs "${agent}" "${app_name}" "${LOG_DIR}" || true
|
|
if verify_agent "${agent}" "${app_name}"; then
|
|
if run_input_test "${agent}" "${app_name}"; then
|
|
_inner_status="pass"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
printf '%s' "${_inner_status}" > "${status_file}"
|
|
) &
|
|
local agent_pid=$!
|
|
|
|
# Poll for completion or timeout (bash 3.2 compatible — no wait -n)
|
|
local agent_waited=0
|
|
while [ "${agent_waited}" -lt "${effective_agent_timeout}" ]; do
|
|
if [ -f "${status_file}" ]; then
|
|
break
|
|
fi
|
|
# Also break if the subshell exited without writing (crash/error)
|
|
if ! kill -0 "${agent_pid}" 2>/dev/null; then
|
|
break
|
|
fi
|
|
sleep 5
|
|
agent_waited=$((agent_waited + 5))
|
|
done
|
|
|
|
# Collect result or handle timeout
|
|
if [ -f "${status_file}" ]; then
|
|
status=$(cat "${status_file}")
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
elif kill -0 "${agent_pid}" 2>/dev/null; then
|
|
# Timed out — kill the subshell and its children
|
|
log_err "${agent} timed out after ${effective_agent_timeout}s — killing"
|
|
pkill -P "${agent_pid}" 2>/dev/null || true
|
|
kill "${agent_pid}" 2>/dev/null || true
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
status="fail"
|
|
else
|
|
# Subshell exited without writing status file (unexpected error)
|
|
log_err "${agent} subshell exited without writing status"
|
|
wait "${agent_pid}" 2>/dev/null || true
|
|
status="fail"
|
|
fi
|
|
|
|
rm -f "${status_file}"
|
|
|
|
# Teardown (always attempt, even after timeout)
|
|
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
|
|
|
|
local agent_end
|
|
agent_end=$(date +%s)
|
|
local agent_duration=$((agent_end - agent_start))
|
|
local duration_str
|
|
duration_str=$(format_duration "${agent_duration}")
|
|
|
|
if [ "${status}" = "pass" ]; then
|
|
log_ok "${agent} PASSED (${duration_str})"
|
|
else
|
|
log_err "${agent} FAILED (${duration_str})"
|
|
fi
|
|
|
|
# Write result to file (for parallel collection)
|
|
if [ -n "${result_file}" ]; then
|
|
printf '%s' "${status}" > "${result_file}"
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_agents_for_cloud CLOUD LOG_DIR
|
|
#
|
|
# Runs all agents for a single cloud. Supports parallel batching.
|
|
# Writes per-agent results to LOG_DIR/{cloud}-{agent}.result.
|
|
# Writes cloud summary to LOG_DIR/{cloud}.summary.
|
|
# ---------------------------------------------------------------------------
|
|
run_agents_for_cloud() {
|
|
local cloud="$1"
|
|
local log_dir="$2"
|
|
local cloud_start
|
|
cloud_start=$(date +%s)
|
|
|
|
# Load the cloud driver
|
|
load_cloud_driver "${cloud}"
|
|
|
|
# Set log prefix for multi-cloud output
|
|
if [ "${cloud_count}" -gt 1 ]; then
|
|
CLOUD_LOG_PREFIX="[${cloud}] "
|
|
fi
|
|
|
|
log_header "E2E Tests: ${cloud}"
|
|
log_info "Agents: ${AGENTS_TO_TEST}"
|
|
|
|
# Validate environment for this cloud
|
|
if ! require_env; then
|
|
log_warn "Credentials not configured for ${cloud} — skipping"
|
|
printf 'SKIPPED (no credentials)' > "${log_dir}/${cloud}.summary"
|
|
return 0
|
|
fi
|
|
|
|
local cloud_passed=""
|
|
local cloud_failed=""
|
|
|
|
# Pre-run stale cleanup: remove orphaned e2e instances from previous
|
|
# interrupted runs before starting new agents. Uses a shorter max_age (5 min)
|
|
# than the default (30 min) so that orphans from recently-failed runs are
|
|
# cleaned before they can exhaust the account's instance quota (#2793).
|
|
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
|
|
_CLEANUP_MAX_AGE=300 cloud_cleanup_stale || log_warn "Pre-run stale cleanup encountered errors"
|
|
fi
|
|
|
|
# Resolve effective parallelism (respect per-cloud cap)
|
|
local effective_parallel="${PARALLEL_COUNT}"
|
|
if [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
local cloud_max
|
|
cloud_max=$(cloud_max_parallel)
|
|
if [ "${effective_parallel}" -gt "${cloud_max}" ]; then
|
|
effective_parallel="${cloud_max}"
|
|
fi
|
|
fi
|
|
|
|
# Bail out early if the cloud reports zero capacity (e.g. droplet limit reached).
|
|
# All agents would fail anyway — skip with an actionable error instead of wasting
|
|
# time on retries that cannot succeed. (#3059)
|
|
if [ "${effective_parallel}" -eq 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
log_err "No capacity available on ${cloud} — all ${cloud} agents will be marked as failed."
|
|
log_err "Delete existing instances or request a limit increase, then re-run."
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
printf 'fail' > "${log_dir}/${cloud}-${agent}.result"
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
|
|
done
|
|
printf '%s %s %s %s %s' "0" "$(printf '%s\n' "${AGENTS_TO_TEST}" | wc -w | tr -d ' ')" "0s" "" "|${cloud_failed}" \
|
|
> "${log_dir}/${cloud}.summary"
|
|
return 1
|
|
fi
|
|
|
|
if [ "${effective_parallel}" -gt 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
|
|
# Parallel mode: batch agents
|
|
log_info "Running agents in parallel (batch size: ${effective_parallel})"
|
|
|
|
local batch_agents=""
|
|
local batch_count=0
|
|
local batch_num=0
|
|
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
batch_agents="${batch_agents} ${agent}"
|
|
batch_count=$((batch_count + 1))
|
|
|
|
if [ "${batch_count}" -ge "${effective_parallel}" ]; then
|
|
batch_num=$((batch_num + 1))
|
|
log_header "Batch ${batch_num} (${cloud})"
|
|
|
|
# Refresh auth before each batch — prevents token expiry in long
|
|
# E2E runs (60+ min). No-op for clouds without refresh support. #2934
|
|
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
|
|
|
|
pids=""
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
run_single_agent "${ba}" "${local_result_file}" &
|
|
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
|
|
done
|
|
|
|
for p in ${pids}; do
|
|
wait "${p}" 2>/dev/null || true
|
|
done
|
|
|
|
# Collect batch results
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
|
|
fi
|
|
done
|
|
|
|
batch_agents=""
|
|
batch_count=0
|
|
fi
|
|
done
|
|
|
|
# Handle remaining agents in last partial batch
|
|
if [ -n "${batch_agents}" ]; then
|
|
batch_num=$((batch_num + 1))
|
|
log_header "Batch ${batch_num} (${cloud})"
|
|
|
|
# Refresh auth before partial batch too — same reason as above. #2934
|
|
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
|
|
|
|
pids=""
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
run_single_agent "${ba}" "${local_result_file}" &
|
|
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
|
|
done
|
|
|
|
for p in ${pids}; do
|
|
wait "${p}" 2>/dev/null || true
|
|
done
|
|
|
|
for ba in ${batch_agents}; do
|
|
local_result_file="${log_dir}/${cloud}-${ba}.result"
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
else
|
|
# Sequential mode
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
local_result_file="${log_dir}/${cloud}-${agent}.result"
|
|
run_single_agent "${agent}" "${local_result_file}"
|
|
|
|
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
|
|
if [ -z "${cloud_passed}" ]; then cloud_passed="${agent}"; else cloud_passed="${cloud_passed} ${agent}"; fi
|
|
else
|
|
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Stale cleanup
|
|
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
|
|
cloud_cleanup_stale || log_warn "Stale cleanup encountered errors"
|
|
fi
|
|
|
|
# Write cloud summary
|
|
local cloud_end
|
|
cloud_end=$(date +%s)
|
|
local cloud_duration=$((cloud_end - cloud_start))
|
|
local cloud_duration_str
|
|
cloud_duration_str=$(format_duration "${cloud_duration}")
|
|
|
|
local pass_count=0
|
|
local fail_count=0
|
|
if [ -n "${cloud_passed}" ]; then pass_count=$(printf '%s\n' "${cloud_passed}" | wc -w | tr -d ' '); fi
|
|
if [ -n "${cloud_failed}" ]; then fail_count=$(printf '%s\n' "${cloud_failed}" | wc -w | tr -d ' '); fi
|
|
|
|
printf '%s %s %s %s %s' "${pass_count}" "${fail_count}" "${cloud_duration_str}" "${cloud_passed}" "|${cloud_failed}" \
|
|
> "${log_dir}/${cloud}.summary"
|
|
|
|
if [ "${fail_count}" -gt 0 ]; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# send_matrix_email LOG_DIR CLOUDS AGENTS TOTAL_PASS TOTAL_FAIL DURATION_STR
|
|
#
|
|
# Sends an agent x cloud matrix report via Resend.
|
|
# Requires: RESEND_API_KEY, KEY_REQUEST_EMAIL env vars (silently skips if absent).
|
|
# ---------------------------------------------------------------------------
|
|
send_matrix_email() {
|
|
local log_dir="$1"
|
|
local clouds="$2"
|
|
local agents="$3"
|
|
local total_pass="$4"
|
|
local total_fail="$5"
|
|
local duration_str="$6"
|
|
|
|
# Skip email for targeted re-runs (partial agent/cloud subset).
|
|
# Set SPAWN_E2E_SKIP_EMAIL=1 to suppress the email (used by quality cycle
|
|
# when re-running only failed agents — a partial email looks like all-passed).
|
|
if [ "${SPAWN_E2E_SKIP_EMAIL:-0}" = "1" ]; then
|
|
log_info "Matrix email skipped (SPAWN_E2E_SKIP_EMAIL=1)"
|
|
return 0
|
|
fi
|
|
|
|
local resend_key="${RESEND_API_KEY:-}"
|
|
local to_email="${KEY_REQUEST_EMAIL:-}"
|
|
|
|
if [ -z "${resend_key}" ] || [ -z "${to_email}" ]; then
|
|
log_info "Matrix email skipped (RESEND_API_KEY or KEY_REQUEST_EMAIL not set)"
|
|
return 0
|
|
fi
|
|
|
|
# Build results string: "cloud:agent:result,..." for bun to process
|
|
local results=""
|
|
for cloud in ${clouds}; do
|
|
for agent in ${agents}; do
|
|
local result="skip"
|
|
local result_file="${log_dir}/${cloud}-${agent}.result"
|
|
if [ -f "${result_file}" ]; then
|
|
result=$(cat "${result_file}")
|
|
fi
|
|
if [ -n "${results}" ]; then results="${results},"; fi
|
|
results="${results}${cloud}:${agent}:${result}"
|
|
done
|
|
done
|
|
|
|
local ts_file
|
|
ts_file=$(mktemp /tmp/e2e-email-XXXXXX.ts)
|
|
|
|
cat > "${ts_file}" << 'TS_EOF'
|
|
const results = (process.env._E2E_RESULTS ?? "").split(",").filter(Boolean);
|
|
const clouds = (process.env._E2E_CLOUDS ?? "").split(" ").filter(Boolean);
|
|
const agents = (process.env._E2E_AGENTS ?? "").split(" ").filter(Boolean);
|
|
const totalPass = process.env._E2E_TOTAL_PASS ?? "0";
|
|
const totalFail = process.env._E2E_TOTAL_FAIL ?? "0";
|
|
const duration = process.env._E2E_DURATION ?? "?";
|
|
const toEmail = process.env.KEY_REQUEST_EMAIL ?? "";
|
|
const resendKey = process.env.RESEND_API_KEY ?? "";
|
|
const timestamp = new Date().toUTCString();
|
|
|
|
// Build lookup map: "cloud:agent" -> result
|
|
const resultMap: Record<string, string> = {};
|
|
for (const entry of results) {
|
|
const parts = entry.split(":");
|
|
resultMap[`${parts[0]}:${parts[1]}`] = parts[2] ?? "skip";
|
|
}
|
|
|
|
// Cell styles per result
|
|
const cellStyle = (result: string): string => {
|
|
if (result === "pass") return "background:#22c55e;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
|
|
if (result === "fail") return "background:#ef4444;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
|
|
return "background:#e2e8f0;color:#94a3b8;padding:4px 10px;border-radius:4px;";
|
|
};
|
|
|
|
const headerCells = clouds
|
|
.map(c => `<th style="padding:8px 14px;background:#1e293b;color:#fff;text-transform:uppercase;font-size:11px;letter-spacing:.05em;">${c}</th>`)
|
|
.join("");
|
|
|
|
const bodyRows = agents
|
|
.map(agent => {
|
|
const cells = clouds
|
|
.map(cloud => {
|
|
const r = resultMap[`${cloud}:${agent}`] ?? "skip";
|
|
return `<td style="padding:6px 14px;text-align:center;"><span style="${cellStyle(r)}">${r.toUpperCase()}</span></td>`;
|
|
})
|
|
.join("");
|
|
return `<tr><td style="padding:6px 14px;font-weight:600;white-space:nowrap;color:#1e293b;">${agent}</td>${cells}</tr>`;
|
|
})
|
|
.join("");
|
|
|
|
const status = totalFail === "0" ? "✅ All Passed" : `❌ ${totalFail} Failed`;
|
|
|
|
const html = `<!DOCTYPE html>
|
|
<html><body style="font-family:system-ui,-apple-system,sans-serif;max-width:860px;margin:0 auto;padding:24px;color:#1e293b;">
|
|
<h2 style="margin:0 0 4px;">${status} — Spawn E2E Matrix</h2>
|
|
<p style="margin:0 0 20px;color:#64748b;font-size:14px;">Completed ${timestamp}</p>
|
|
<table style="border-collapse:collapse;width:100%;">
|
|
<thead>
|
|
<tr>
|
|
<th style="padding:8px 14px;background:#1e293b;color:#fff;text-align:left;font-size:11px;text-transform:uppercase;letter-spacing:.05em;">Agent</th>
|
|
${headerCells}
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
${bodyRows}
|
|
</tbody>
|
|
</table>
|
|
<p style="margin-top:18px;color:#64748b;font-size:13px;">
|
|
<strong style="color:#1e293b;">Total:</strong> ${totalPass} passed, ${totalFail} failed
|
|
·
|
|
<strong style="color:#1e293b;">Duration:</strong> ${duration}
|
|
</p>
|
|
</body></html>`;
|
|
|
|
const subject = totalFail === "0"
|
|
? `✅ E2E Matrix: ${totalPass} passed · ${duration}`
|
|
: `❌ E2E Matrix: ${totalFail} failed, ${totalPass} passed · ${duration}`;
|
|
|
|
const res = await fetch("https://api.resend.com/emails", {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"Authorization": `Bearer ${resendKey}`,
|
|
},
|
|
body: JSON.stringify({
|
|
from: "Spawn QA <onboarding@resend.dev>",
|
|
to: [toEmail],
|
|
subject,
|
|
html,
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const body = await res.text();
|
|
console.error(`Resend API error ${res.status}: ${body}`);
|
|
process.exit(1);
|
|
}
|
|
console.log(`Matrix email sent to ${toEmail}`);
|
|
TS_EOF
|
|
|
|
log_info "Sending matrix email to ${to_email}..."
|
|
_E2E_RESULTS="${results}" \
|
|
_E2E_CLOUDS="${clouds}" \
|
|
_E2E_AGENTS="${agents}" \
|
|
_E2E_TOTAL_PASS="${total_pass}" \
|
|
_E2E_TOTAL_FAIL="${total_fail}" \
|
|
_E2E_DURATION="${duration_str}" \
|
|
bun run "${ts_file}" 2>&1 || log_warn "Failed to send matrix email"
|
|
|
|
rm -f "${ts_file}" 2>/dev/null || true
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Final cleanup trap
|
|
# ---------------------------------------------------------------------------
|
|
final_cleanup() {
|
|
if [ -n "${_TRACKED_APPS}" ]; then
|
|
printf "\n"
|
|
log_warn "Cleaning up tracked instances on exit..."
|
|
for app in ${_TRACKED_APPS}; do
|
|
log_step "Tearing down ${app}..."
|
|
teardown_agent "${app}" 2>/dev/null || log_warn "Failed to tear down ${app}"
|
|
done
|
|
fi
|
|
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR:-}" ]; then
|
|
case "${LOG_DIR}" in
|
|
/tmp/spawn-e2e.*)
|
|
rm -rf "${LOG_DIR}"
|
|
;;
|
|
*)
|
|
log_warn "Refusing to rm -rf unexpected path: ${LOG_DIR}"
|
|
;;
|
|
esac
|
|
fi
|
|
}
|
|
trap final_cleanup EXIT
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
log_header "Spawn E2E Test Suite (Multi-Cloud)"
|
|
log_info "Clouds: ${CLOUDS}"
|
|
log_info "Agents: ${AGENTS_TO_TEST}"
|
|
if [ "${SEQUENTIAL_MODE}" -eq 1 ]; then
|
|
log_info "Agent parallelism: sequential"
|
|
elif [ "${PARALLEL_COUNT}" -ge 99 ]; then
|
|
log_info "Agent parallelism: all at once (per-cloud caps may apply)"
|
|
else
|
|
log_info "Agent parallelism: ${PARALLEL_COUNT} per cloud"
|
|
fi
|
|
if [ "${SKIP_INPUT_TEST}" -eq 1 ]; then
|
|
log_info "Input tests: SKIPPED"
|
|
fi
|
|
if [ "${FAST_MODE}" -eq 1 ]; then
|
|
log_info "Fast mode: ENABLED (--fast passed to spawn)"
|
|
fi
|
|
|
|
# Export FAST_MODE so provision.sh can read it
|
|
export E2E_FAST_MODE="${FAST_MODE}"
|
|
|
|
# Create temp log directory
|
|
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX")
|
|
export LOG_DIR
|
|
log_info "Log directory: ${LOG_DIR}"
|
|
|
|
START_TIME=$(date +%s)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Execute: single-cloud or multi-cloud
|
|
# ---------------------------------------------------------------------------
|
|
if [ "${cloud_count}" -eq 1 ]; then
|
|
# Single cloud — run directly in this process
|
|
run_agents_for_cloud "${CLOUDS}" "${LOG_DIR}" || true
|
|
|
|
else
|
|
# Multi-cloud — each cloud runs as a separate background process
|
|
cloud_pids=""
|
|
for cloud in ${CLOUDS}; do
|
|
(
|
|
# Reset parent's EXIT trap — the main process handles LOG_DIR cleanup
|
|
trap - EXIT
|
|
_TRACKED_APPS=""
|
|
run_agents_for_cloud "${cloud}" "${LOG_DIR}"
|
|
) > "${LOG_DIR}/${cloud}.log" 2>&1 &
|
|
cloud_pid=$!
|
|
if [ -z "${cloud_pids}" ]; then
|
|
cloud_pids="${cloud_pid}"
|
|
else
|
|
cloud_pids="${cloud_pids} ${cloud_pid}"
|
|
fi
|
|
log_info "Started ${cloud} tests (PID: ${cloud_pid})"
|
|
done
|
|
|
|
# Wait for all clouds to finish
|
|
any_failed=0
|
|
for pid in ${cloud_pids}; do
|
|
wait "${pid}" 2>/dev/null || any_failed=1
|
|
done
|
|
|
|
# Print per-cloud logs
|
|
for cloud in ${CLOUDS}; do
|
|
if [ -f "${LOG_DIR}/${cloud}.log" ]; then
|
|
printf "\n"
|
|
log_header "Output: ${cloud}"
|
|
cat "${LOG_DIR}/${cloud}.log"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unified Summary
|
|
# ---------------------------------------------------------------------------
|
|
END_TIME=$(date +%s)
|
|
TOTAL_DURATION=$((END_TIME - START_TIME))
|
|
DURATION_STR=$(format_duration "${TOTAL_DURATION}")
|
|
|
|
printf "\n"
|
|
log_header "E2E Test Summary"
|
|
|
|
total_pass=0
|
|
total_fail=0
|
|
any_cloud_failed=0
|
|
|
|
for cloud in ${CLOUDS}; do
|
|
printf "\n ${BOLD}%s:${NC}\n" "${cloud}"
|
|
|
|
cloud_pass=0
|
|
cloud_fail=0
|
|
cloud_skip=0
|
|
|
|
# Check if this cloud was skipped (no credentials) — no result files written
|
|
cloud_has_results=0
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
if [ -f "${LOG_DIR}/${cloud}-${agent}.result" ]; then
|
|
cloud_has_results=1
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ "${cloud_has_results}" -eq 0 ]; then
|
|
printf " ${YELLOW}(skipped — credentials not configured)${NC}\n"
|
|
continue
|
|
fi
|
|
|
|
for agent in ${AGENTS_TO_TEST}; do
|
|
result_file="${LOG_DIR}/${cloud}-${agent}.result"
|
|
if [ -f "${result_file}" ] && [ "$(cat "${result_file}")" = "pass" ]; then
|
|
printf " ${GREEN}%-12s PASS${NC}\n" "${agent}"
|
|
cloud_pass=$((cloud_pass + 1))
|
|
total_pass=$((total_pass + 1))
|
|
else
|
|
printf " ${RED}%-12s FAIL${NC}\n" "${agent}"
|
|
cloud_fail=$((cloud_fail + 1))
|
|
total_fail=$((total_fail + 1))
|
|
fi
|
|
done
|
|
|
|
if [ "${cloud_fail}" -gt 0 ]; then
|
|
printf " ${RED}%d passed, %d failed${NC}\n" "${cloud_pass}" "${cloud_fail}"
|
|
any_cloud_failed=1
|
|
else
|
|
printf " ${GREEN}%d passed, 0 failed${NC}\n" "${cloud_pass}"
|
|
fi
|
|
done
|
|
|
|
printf "\n"
|
|
printf " ${BOLD}Total:${NC} ${GREEN}%d passed${NC}" "${total_pass}"
|
|
if [ "${total_fail}" -gt 0 ]; then
|
|
printf ", ${RED}%d failed${NC}" "${total_fail}"
|
|
fi
|
|
printf "\n Duration: %s\n" "${DURATION_STR}"
|
|
|
|
# Send matrix email report
|
|
send_matrix_email "${LOG_DIR}" "${CLOUDS}" "${AGENTS_TO_TEST}" "${total_pass}" "${total_fail}" "${DURATION_STR}"
|
|
|
|
# Exit with failure if any agent on any cloud failed
|
|
if [ "${total_fail}" -gt 0 ]; then
|
|
exit 1
|
|
fi
|
|
|
|
# All tests passed — advance the e2e-last-green tag for diff-aware reviews
|
|
mark_e2e_green
|
|
|
|
exit 0
|