spawn/sh/e2e/e2e.sh
A 5e0144b645
fix(zeroclaw): remove broken zeroclaw agent (repo 404) (#3107)
* fix(zeroclaw): remove broken zeroclaw agent (repo 404)

The zeroclaw-labs/zeroclaw GitHub repository returns 404 — all installs
fail. Remove zeroclaw entirely from the matrix: agent definition,
setup code, shell scripts, e2e tests, packer config, skill files,
and documentation.

Fixes #3102

Agent: code-health
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* fix(zeroclaw): remove stale zeroclaw reference from discovery.md ARM agents list

Addresses security review on PR #3107 — the last remaining zeroclaw
reference in .claude/rules/discovery.md is now removed.

Agent: issue-fixer
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* fix(zeroclaw): remove remaining stale zeroclaw references from CI/packer

Remove zeroclaw from:
- .github/workflows/agent-tarballs.yml ARM build matrix
- .github/workflows/docker.yml agent matrix
- packer/digitalocean.pkr.hcl comment
- sh/e2e/e2e.sh comment

Addresses all 5 stale references flagged in security review of PR #3107.

Agent: issue-fixer
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

---------

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-30 15:35:40 -07:00

833 lines
28 KiB
Bash
Executable file

#!/bin/bash
# sh/e2e/e2e.sh — Unified multi-cloud E2E test orchestrator
#
# Usage:
# e2e.sh --cloud aws # AWS only, all agents
# e2e.sh --cloud hetzner claude codex # Hetzner, specific agents
# e2e.sh --cloud aws --cloud hetzner # Both clouds IN PARALLEL
# e2e.sh --cloud all # ALL clouds IN PARALLEL
# e2e.sh --cloud all --parallel 3 # All clouds, 3 agents parallel per cloud
# e2e.sh --cloud aws --skip-input-test # Skip live input tests
# e2e.sh --cloud aws --sequential # Force sequential agents (no parallelism)
set -eo pipefail
# ---------------------------------------------------------------------------
# Resolve script directory and source libraries
# ---------------------------------------------------------------------------
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Auto-set SPAWN_CLI_DIR to repo root so shell scripts use local source instead
# of downloading pre-bundled .js from GitHub releases. Can be overridden by env.
if [ -z "${SPAWN_CLI_DIR:-}" ]; then
_repo_root="$(cd "${SCRIPT_DIR}/../.." && pwd)"
if [ -f "${_repo_root}/packages/cli/src/index.ts" ]; then
export SPAWN_CLI_DIR="${_repo_root}"
fi
unset _repo_root
fi
source "${SCRIPT_DIR}/lib/common.sh"
source "${SCRIPT_DIR}/lib/provision.sh"
source "${SCRIPT_DIR}/lib/verify.sh"
source "${SCRIPT_DIR}/lib/teardown.sh"
source "${SCRIPT_DIR}/lib/soak.sh"
source "${SCRIPT_DIR}/lib/interactive.sh"
source "${SCRIPT_DIR}/lib/ai-review.sh"
# ---------------------------------------------------------------------------
# Auto-load Resend email credentials when not already set.
# Sources /etc/spawn-key-server-auth.env (QA VM) or ~/.config/spawn/resend.env
# (local dev) to populate RESEND_API_KEY and KEY_REQUEST_EMAIL.
# This ensures send_matrix_email fires on manual runs, not just QA-cycle runs.
# ---------------------------------------------------------------------------
if [ -z "${RESEND_API_KEY:-}" ] || [ -z "${KEY_REQUEST_EMAIL:-}" ]; then
for _cred_file in /etc/spawn-key-server-auth.env "${HOME}/.config/spawn/resend.env"; do
if [ -f "${_cred_file}" ]; then
# shellcheck source=/dev/null # path is dynamic
set -a; source "${_cred_file}" 2>/dev/null; set +a
break
fi
done
unset _cred_file
fi
# ---------------------------------------------------------------------------
# All supported clouds (excluding local — no infra to provision)
# ---------------------------------------------------------------------------
ALL_CLOUDS="aws hetzner digitalocean gcp sprite"
# ---------------------------------------------------------------------------
# Parse arguments
# ---------------------------------------------------------------------------
CLOUDS=""
AGENTS_TO_TEST=""
PARALLEL_COUNT=99
SKIP_CLEANUP=0
SKIP_INPUT_TEST="${SKIP_INPUT_TEST:-0}"
SEQUENTIAL_MODE=0
SOAK_MODE=0
INTERACTIVE_MODE=0
FAST_MODE=0
while [ $# -gt 0 ]; do
case "$1" in
--cloud)
shift
if [ $# -eq 0 ]; then
printf "Error: --cloud requires a cloud name\n" >&2
exit 1
fi
if [ "$1" = "all" ]; then
CLOUDS="${ALL_CLOUDS}"
else
# Validate cloud name
local_valid=0
for c in ${ALL_CLOUDS}; do
if [ "$1" = "${c}" ]; then
local_valid=1
break
fi
done
if [ "${local_valid}" -eq 0 ]; then
printf "Unknown cloud: %s\nAvailable: %s all\n" "$1" "${ALL_CLOUDS}" >&2
exit 1
fi
if [ -z "${CLOUDS}" ]; then
CLOUDS="$1"
else
CLOUDS="${CLOUDS} $1"
fi
fi
shift
;;
--parallel)
shift
if [ $# -eq 0 ]; then
printf "Error: --parallel requires a number\n" >&2
exit 1
fi
PARALLEL_COUNT="$1"
if ! printf '%s' "${PARALLEL_COUNT}" | grep -qE '^[0-9]+$' || [ "${PARALLEL_COUNT}" -lt 1 ] || [ "${PARALLEL_COUNT}" -gt 50 ]; then
printf "Error: --parallel must be between 1 and 50\n" >&2
exit 1
fi
shift
;;
--sequential)
SEQUENTIAL_MODE=1
shift
;;
--skip-cleanup)
SKIP_CLEANUP=1
shift
;;
--skip-input-test)
SKIP_INPUT_TEST=1
shift
;;
--soak)
SOAK_MODE=1
shift
;;
--interactive)
INTERACTIVE_MODE=1
shift
;;
--fast)
FAST_MODE=1
shift
;;
--help|-h)
printf "Usage: %s --cloud CLOUD [--cloud CLOUD2 ...] [agents...] [options]\n\n" "$0"
printf "Clouds: %s\n" "${ALL_CLOUDS}"
printf " Use --cloud all for all clouds in parallel.\n\n"
printf "Agents: %s\n\n" "${ALL_AGENTS}"
printf "Options:\n"
printf " --cloud CLOUD Cloud to test (repeatable, or 'all')\n"
printf " --parallel N Run N agents in parallel per cloud (default: all at once)\n"
printf " --sequential Force sequential agent execution\n"
printf " --skip-cleanup Skip stale e2e-* instance cleanup\n"
printf " --skip-input-test Skip live input tests\n"
printf " --fast Provision with --fast flag (images + tarballs + parallel)\n"
printf " --soak Run Telegram soak test (OpenClaw on Sprite)\n"
printf " --interactive AI-driven interactive test (requires ANTHROPIC_API_KEY)\n"
printf " --help Show this help\n"
exit 0
;;
-*)
printf "Unknown option: %s\n" "$1" >&2
exit 1
;;
*)
# Agent name
local_valid=0
for a in ${ALL_AGENTS}; do
if [ "$1" = "${a}" ]; then
local_valid=1
break
fi
done
if [ "${local_valid}" -eq 0 ]; then
printf "Unknown agent: %s\nAvailable: %s\n" "$1" "${ALL_AGENTS}" >&2
exit 1
fi
if [ -z "${AGENTS_TO_TEST}" ]; then
AGENTS_TO_TEST="$1"
else
AGENTS_TO_TEST="${AGENTS_TO_TEST} $1"
fi
shift
;;
esac
done
# Soak mode: run Telegram soak test and exit (no --cloud required)
if [ "${SOAK_MODE}" -eq 1 ]; then
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX")
export LOG_DIR
run_soak_test "${LOG_DIR}"
exit $?
fi
# Require at least one cloud
if [ -z "${CLOUDS}" ]; then
printf "Error: --cloud is required. Use --cloud aws, --cloud all, etc.\n" >&2
printf "Run %s --help for usage.\n" "$0" >&2
exit 1
fi
# Default to all agents
if [ -z "${AGENTS_TO_TEST}" ]; then
AGENTS_TO_TEST="${ALL_AGENTS}"
fi
# ---------------------------------------------------------------------------
# Count clouds to decide single vs multi-cloud mode
# ---------------------------------------------------------------------------
cloud_count=$(printf '%s\n' "${CLOUDS}" | wc -w | tr -d ' ')
# ---------------------------------------------------------------------------
# run_single_agent AGENT
#
# Provisions, verifies, and tears down a single agent.
# Sets result in a temp file for parallel collection.
# ---------------------------------------------------------------------------
run_single_agent() {
local agent="$1"
local result_file="${2:-}"
local agent_start
agent_start=$(date +%s)
log_header "Testing agent: ${agent}"
local app_name
app_name=$(make_app_name "${agent}")
track_app "${app_name}"
local status="fail"
# ---------------------------------------------------------------------------
# Per-agent timeout: run provision/verify/input_test in a subshell with a
# wall-clock timeout. This prevents any single step from hanging indefinitely
# and ensures a result file is always written (pass, fail, or timeout).
# Fixes #2714: digitalocean-opencode stalling with no result.
# ---------------------------------------------------------------------------
local effective_agent_timeout
effective_agent_timeout=$(get_agent_timeout "${agent}")
log_info "Agent timeout: ${effective_agent_timeout}s"
local status_file="${LOG_DIR}/${app_name}.agent-status"
rm -f "${status_file}"
# Run core logic in a subshell so we can kill it on timeout
(
local _inner_status="fail"
if [ "${INTERACTIVE_MODE}" -eq 1 ]; then
# AI-driven interactive mode: harness drives the CLI through PTY.
# After harness exits (on "Starting agent..." marker), the install is still
# running on the remote VM. Run verify_agent to wait for .spawnrc before
# the input test — same as headless mode.
if interactive_provision "${agent}" "${app_name}" "${LOG_DIR}"; then
if verify_agent "${agent}" "${app_name}"; then
if run_input_test "${agent}" "${app_name}"; then
_inner_status="pass"
fi
fi
fi
else
# Standard headless mode
if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then
# AI review of provision logs — advisory only, runs regardless of verify result
ai_review_logs "${agent}" "${app_name}" "${LOG_DIR}" || true
if verify_agent "${agent}" "${app_name}"; then
if run_input_test "${agent}" "${app_name}"; then
_inner_status="pass"
fi
fi
fi
fi
printf '%s' "${_inner_status}" > "${status_file}"
) &
local agent_pid=$!
# Poll for completion or timeout (bash 3.2 compatible — no wait -n)
local agent_waited=0
while [ "${agent_waited}" -lt "${effective_agent_timeout}" ]; do
if [ -f "${status_file}" ]; then
break
fi
# Also break if the subshell exited without writing (crash/error)
if ! kill -0 "${agent_pid}" 2>/dev/null; then
break
fi
sleep 5
agent_waited=$((agent_waited + 5))
done
# Collect result or handle timeout
if [ -f "${status_file}" ]; then
status=$(cat "${status_file}")
wait "${agent_pid}" 2>/dev/null || true
elif kill -0 "${agent_pid}" 2>/dev/null; then
# Timed out — kill the subshell and its children
log_err "${agent} timed out after ${effective_agent_timeout}s — killing"
pkill -P "${agent_pid}" 2>/dev/null || true
kill "${agent_pid}" 2>/dev/null || true
wait "${agent_pid}" 2>/dev/null || true
status="fail"
else
# Subshell exited without writing status file (unexpected error)
log_err "${agent} subshell exited without writing status"
wait "${agent_pid}" 2>/dev/null || true
status="fail"
fi
rm -f "${status_file}"
# Teardown (always attempt, even after timeout)
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
local agent_end
agent_end=$(date +%s)
local agent_duration=$((agent_end - agent_start))
local duration_str
duration_str=$(format_duration "${agent_duration}")
if [ "${status}" = "pass" ]; then
log_ok "${agent} PASSED (${duration_str})"
else
log_err "${agent} FAILED (${duration_str})"
fi
# Write result to file (for parallel collection)
if [ -n "${result_file}" ]; then
printf '%s' "${status}" > "${result_file}"
fi
return 0
}
# ---------------------------------------------------------------------------
# run_agents_for_cloud CLOUD LOG_DIR
#
# Runs all agents for a single cloud. Supports parallel batching.
# Writes per-agent results to LOG_DIR/{cloud}-{agent}.result.
# Writes cloud summary to LOG_DIR/{cloud}.summary.
# ---------------------------------------------------------------------------
run_agents_for_cloud() {
local cloud="$1"
local log_dir="$2"
local cloud_start
cloud_start=$(date +%s)
# Load the cloud driver
load_cloud_driver "${cloud}"
# Set log prefix for multi-cloud output
if [ "${cloud_count}" -gt 1 ]; then
CLOUD_LOG_PREFIX="[${cloud}] "
fi
log_header "E2E Tests: ${cloud}"
log_info "Agents: ${AGENTS_TO_TEST}"
# Validate environment for this cloud
if ! require_env; then
log_warn "Credentials not configured for ${cloud} — skipping"
printf 'SKIPPED (no credentials)' > "${log_dir}/${cloud}.summary"
return 0
fi
local cloud_passed=""
local cloud_failed=""
# Pre-run stale cleanup: remove orphaned e2e instances from previous
# interrupted runs before starting new agents. Uses a shorter max_age (5 min)
# than the default (30 min) so that orphans from recently-failed runs are
# cleaned before they can exhaust the account's instance quota (#2793).
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
_CLEANUP_MAX_AGE=300 cloud_cleanup_stale || log_warn "Pre-run stale cleanup encountered errors"
fi
# Resolve effective parallelism (respect per-cloud cap)
local effective_parallel="${PARALLEL_COUNT}"
if [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
local cloud_max
cloud_max=$(cloud_max_parallel)
if [ "${effective_parallel}" -gt "${cloud_max}" ]; then
effective_parallel="${cloud_max}"
fi
fi
# Bail out early if the cloud reports zero capacity (e.g. droplet limit reached).
# All agents would fail anyway — skip with an actionable error instead of wasting
# time on retries that cannot succeed. (#3059)
if [ "${effective_parallel}" -eq 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
log_err "No capacity available on ${cloud} — all ${cloud} agents will be marked as failed."
log_err "Delete existing instances or request a limit increase, then re-run."
for agent in ${AGENTS_TO_TEST}; do
printf 'fail' > "${log_dir}/${cloud}-${agent}.result"
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
done
printf '%s %s %s %s %s' "0" "$(printf '%s\n' "${AGENTS_TO_TEST}" | wc -w | tr -d ' ')" "0s" "" "|${cloud_failed}" \
> "${log_dir}/${cloud}.summary"
return 1
fi
if [ "${effective_parallel}" -gt 0 ] && [ "${SEQUENTIAL_MODE}" -eq 0 ]; then
# Parallel mode: batch agents
log_info "Running agents in parallel (batch size: ${effective_parallel})"
local batch_agents=""
local batch_count=0
local batch_num=0
for agent in ${AGENTS_TO_TEST}; do
batch_agents="${batch_agents} ${agent}"
batch_count=$((batch_count + 1))
if [ "${batch_count}" -ge "${effective_parallel}" ]; then
batch_num=$((batch_num + 1))
log_header "Batch ${batch_num} (${cloud})"
# Refresh auth before each batch — prevents token expiry in long
# E2E runs (60+ min). No-op for clouds without refresh support. #2934
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
pids=""
for ba in ${batch_agents}; do
local_result_file="${log_dir}/${cloud}-${ba}.result"
run_single_agent "${ba}" "${local_result_file}" &
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
done
for p in ${pids}; do
wait "${p}" 2>/dev/null || true
done
# Collect batch results
for ba in ${batch_agents}; do
local_result_file="${log_dir}/${cloud}-${ba}.result"
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
else
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
fi
done
batch_agents=""
batch_count=0
fi
done
# Handle remaining agents in last partial batch
if [ -n "${batch_agents}" ]; then
batch_num=$((batch_num + 1))
log_header "Batch ${batch_num} (${cloud})"
# Refresh auth before partial batch too — same reason as above. #2934
cloud_refresh_auth || log_warn "Auth refresh failed before batch ${batch_num}"
pids=""
for ba in ${batch_agents}; do
local_result_file="${log_dir}/${cloud}-${ba}.result"
run_single_agent "${ba}" "${local_result_file}" &
if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi
done
for p in ${pids}; do
wait "${p}" 2>/dev/null || true
done
for ba in ${batch_agents}; do
local_result_file="${log_dir}/${cloud}-${ba}.result"
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
if [ -z "${cloud_passed}" ]; then cloud_passed="${ba}"; else cloud_passed="${cloud_passed} ${ba}"; fi
else
if [ -z "${cloud_failed}" ]; then cloud_failed="${ba}"; else cloud_failed="${cloud_failed} ${ba}"; fi
fi
done
fi
else
# Sequential mode
for agent in ${AGENTS_TO_TEST}; do
local_result_file="${log_dir}/${cloud}-${agent}.result"
run_single_agent "${agent}" "${local_result_file}"
if [ -f "${local_result_file}" ] && [ "$(cat "${local_result_file}")" = "pass" ]; then
if [ -z "${cloud_passed}" ]; then cloud_passed="${agent}"; else cloud_passed="${cloud_passed} ${agent}"; fi
else
if [ -z "${cloud_failed}" ]; then cloud_failed="${agent}"; else cloud_failed="${cloud_failed} ${agent}"; fi
fi
done
fi
# Stale cleanup
if [ "${SKIP_CLEANUP}" -eq 0 ]; then
cloud_cleanup_stale || log_warn "Stale cleanup encountered errors"
fi
# Write cloud summary
local cloud_end
cloud_end=$(date +%s)
local cloud_duration=$((cloud_end - cloud_start))
local cloud_duration_str
cloud_duration_str=$(format_duration "${cloud_duration}")
local pass_count=0
local fail_count=0
if [ -n "${cloud_passed}" ]; then pass_count=$(printf '%s\n' "${cloud_passed}" | wc -w | tr -d ' '); fi
if [ -n "${cloud_failed}" ]; then fail_count=$(printf '%s\n' "${cloud_failed}" | wc -w | tr -d ' '); fi
printf '%s %s %s %s %s' "${pass_count}" "${fail_count}" "${cloud_duration_str}" "${cloud_passed}" "|${cloud_failed}" \
> "${log_dir}/${cloud}.summary"
if [ "${fail_count}" -gt 0 ]; then
return 1
fi
return 0
}
# ---------------------------------------------------------------------------
# send_matrix_email LOG_DIR CLOUDS AGENTS TOTAL_PASS TOTAL_FAIL DURATION_STR
#
# Sends an agent x cloud matrix report via Resend.
# Requires: RESEND_API_KEY, KEY_REQUEST_EMAIL env vars (silently skips if absent).
# ---------------------------------------------------------------------------
send_matrix_email() {
local log_dir="$1"
local clouds="$2"
local agents="$3"
local total_pass="$4"
local total_fail="$5"
local duration_str="$6"
# Skip email for targeted re-runs (partial agent/cloud subset).
# Set SPAWN_E2E_SKIP_EMAIL=1 to suppress the email (used by quality cycle
# when re-running only failed agents — a partial email looks like all-passed).
if [ "${SPAWN_E2E_SKIP_EMAIL:-0}" = "1" ]; then
log_info "Matrix email skipped (SPAWN_E2E_SKIP_EMAIL=1)"
return 0
fi
local resend_key="${RESEND_API_KEY:-}"
local to_email="${KEY_REQUEST_EMAIL:-}"
if [ -z "${resend_key}" ] || [ -z "${to_email}" ]; then
log_info "Matrix email skipped (RESEND_API_KEY or KEY_REQUEST_EMAIL not set)"
return 0
fi
# Build results string: "cloud:agent:result,..." for bun to process
local results=""
for cloud in ${clouds}; do
for agent in ${agents}; do
local result="skip"
local result_file="${log_dir}/${cloud}-${agent}.result"
if [ -f "${result_file}" ]; then
result=$(cat "${result_file}")
fi
if [ -n "${results}" ]; then results="${results},"; fi
results="${results}${cloud}:${agent}:${result}"
done
done
local ts_file
ts_file=$(mktemp /tmp/e2e-email-XXXXXX.ts)
cat > "${ts_file}" << 'TS_EOF'
const results = (process.env._E2E_RESULTS ?? "").split(",").filter(Boolean);
const clouds = (process.env._E2E_CLOUDS ?? "").split(" ").filter(Boolean);
const agents = (process.env._E2E_AGENTS ?? "").split(" ").filter(Boolean);
const totalPass = process.env._E2E_TOTAL_PASS ?? "0";
const totalFail = process.env._E2E_TOTAL_FAIL ?? "0";
const duration = process.env._E2E_DURATION ?? "?";
const toEmail = process.env.KEY_REQUEST_EMAIL ?? "";
const resendKey = process.env.RESEND_API_KEY ?? "";
const timestamp = new Date().toUTCString();
// Build lookup map: "cloud:agent" -> result
const resultMap: Record<string, string> = {};
for (const entry of results) {
const parts = entry.split(":");
resultMap[`${parts[0]}:${parts[1]}`] = parts[2] ?? "skip";
}
// Cell styles per result
const cellStyle = (result: string): string => {
if (result === "pass") return "background:#22c55e;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
if (result === "fail") return "background:#ef4444;color:#fff;font-weight:bold;padding:4px 10px;border-radius:4px;";
return "background:#e2e8f0;color:#94a3b8;padding:4px 10px;border-radius:4px;";
};
const headerCells = clouds
.map(c => `<th style="padding:8px 14px;background:#1e293b;color:#fff;text-transform:uppercase;font-size:11px;letter-spacing:.05em;">${c}</th>`)
.join("");
const bodyRows = agents
.map(agent => {
const cells = clouds
.map(cloud => {
const r = resultMap[`${cloud}:${agent}`] ?? "skip";
return `<td style="padding:6px 14px;text-align:center;"><span style="${cellStyle(r)}">${r.toUpperCase()}</span></td>`;
})
.join("");
return `<tr><td style="padding:6px 14px;font-weight:600;white-space:nowrap;color:#1e293b;">${agent}</td>${cells}</tr>`;
})
.join("");
const status = totalFail === "0" ? "✅ All Passed" : `❌ ${totalFail} Failed`;
const html = `<!DOCTYPE html>
<html><body style="font-family:system-ui,-apple-system,sans-serif;max-width:860px;margin:0 auto;padding:24px;color:#1e293b;">
<h2 style="margin:0 0 4px;">${status} — Spawn E2E Matrix</h2>
<p style="margin:0 0 20px;color:#64748b;font-size:14px;">Completed ${timestamp}</p>
<table style="border-collapse:collapse;width:100%;">
<thead>
<tr>
<th style="padding:8px 14px;background:#1e293b;color:#fff;text-align:left;font-size:11px;text-transform:uppercase;letter-spacing:.05em;">Agent</th>
${headerCells}
</tr>
</thead>
<tbody>
${bodyRows}
</tbody>
</table>
<p style="margin-top:18px;color:#64748b;font-size:13px;">
<strong style="color:#1e293b;">Total:</strong> ${totalPass} passed, ${totalFail} failed
&nbsp;·&nbsp;
<strong style="color:#1e293b;">Duration:</strong> ${duration}
</p>
</body></html>`;
const subject = totalFail === "0"
? `✅ E2E Matrix: ${totalPass} passed · ${duration}`
: `❌ E2E Matrix: ${totalFail} failed, ${totalPass} passed · ${duration}`;
const res = await fetch("https://api.resend.com/emails", {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${resendKey}`,
},
body: JSON.stringify({
from: "Spawn QA <onboarding@resend.dev>",
to: [toEmail],
subject,
html,
}),
});
if (!res.ok) {
const body = await res.text();
console.error(`Resend API error ${res.status}: ${body}`);
process.exit(1);
}
console.log(`Matrix email sent to ${toEmail}`);
TS_EOF
log_info "Sending matrix email to ${to_email}..."
_E2E_RESULTS="${results}" \
_E2E_CLOUDS="${clouds}" \
_E2E_AGENTS="${agents}" \
_E2E_TOTAL_PASS="${total_pass}" \
_E2E_TOTAL_FAIL="${total_fail}" \
_E2E_DURATION="${duration_str}" \
bun run "${ts_file}" 2>&1 || log_warn "Failed to send matrix email"
rm -f "${ts_file}" 2>/dev/null || true
}
# ---------------------------------------------------------------------------
# Final cleanup trap
# ---------------------------------------------------------------------------
final_cleanup() {
if [ -n "${_TRACKED_APPS}" ]; then
printf "\n"
log_warn "Cleaning up tracked instances on exit..."
for app in ${_TRACKED_APPS}; do
log_step "Tearing down ${app}..."
teardown_agent "${app}" 2>/dev/null || log_warn "Failed to tear down ${app}"
done
fi
if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR:-}" ]; then
case "${LOG_DIR}" in
/tmp/spawn-e2e.*)
rm -rf "${LOG_DIR}"
;;
*)
log_warn "Refusing to rm -rf unexpected path: ${LOG_DIR}"
;;
esac
fi
}
trap final_cleanup EXIT
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
log_header "Spawn E2E Test Suite (Multi-Cloud)"
log_info "Clouds: ${CLOUDS}"
log_info "Agents: ${AGENTS_TO_TEST}"
if [ "${SEQUENTIAL_MODE}" -eq 1 ]; then
log_info "Agent parallelism: sequential"
elif [ "${PARALLEL_COUNT}" -ge 99 ]; then
log_info "Agent parallelism: all at once (per-cloud caps may apply)"
else
log_info "Agent parallelism: ${PARALLEL_COUNT} per cloud"
fi
if [ "${SKIP_INPUT_TEST}" -eq 1 ]; then
log_info "Input tests: SKIPPED"
fi
if [ "${FAST_MODE}" -eq 1 ]; then
log_info "Fast mode: ENABLED (--fast passed to spawn)"
fi
# Export FAST_MODE so provision.sh can read it
export E2E_FAST_MODE="${FAST_MODE}"
# Create temp log directory
LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX")
export LOG_DIR
log_info "Log directory: ${LOG_DIR}"
START_TIME=$(date +%s)
# ---------------------------------------------------------------------------
# Execute: single-cloud or multi-cloud
# ---------------------------------------------------------------------------
if [ "${cloud_count}" -eq 1 ]; then
# Single cloud — run directly in this process
run_agents_for_cloud "${CLOUDS}" "${LOG_DIR}" || true
else
# Multi-cloud — each cloud runs as a separate background process
cloud_pids=""
for cloud in ${CLOUDS}; do
(
# Reset parent's EXIT trap — the main process handles LOG_DIR cleanup
trap - EXIT
_TRACKED_APPS=""
run_agents_for_cloud "${cloud}" "${LOG_DIR}"
) > "${LOG_DIR}/${cloud}.log" 2>&1 &
cloud_pid=$!
if [ -z "${cloud_pids}" ]; then
cloud_pids="${cloud_pid}"
else
cloud_pids="${cloud_pids} ${cloud_pid}"
fi
log_info "Started ${cloud} tests (PID: ${cloud_pid})"
done
# Wait for all clouds to finish
any_failed=0
for pid in ${cloud_pids}; do
wait "${pid}" 2>/dev/null || any_failed=1
done
# Print per-cloud logs
for cloud in ${CLOUDS}; do
if [ -f "${LOG_DIR}/${cloud}.log" ]; then
printf "\n"
log_header "Output: ${cloud}"
cat "${LOG_DIR}/${cloud}.log"
fi
done
fi
# ---------------------------------------------------------------------------
# Unified Summary
# ---------------------------------------------------------------------------
END_TIME=$(date +%s)
TOTAL_DURATION=$((END_TIME - START_TIME))
DURATION_STR=$(format_duration "${TOTAL_DURATION}")
printf "\n"
log_header "E2E Test Summary"
total_pass=0
total_fail=0
any_cloud_failed=0
for cloud in ${CLOUDS}; do
printf "\n ${BOLD}%s:${NC}\n" "${cloud}"
cloud_pass=0
cloud_fail=0
cloud_skip=0
# Check if this cloud was skipped (no credentials) — no result files written
cloud_has_results=0
for agent in ${AGENTS_TO_TEST}; do
if [ -f "${LOG_DIR}/${cloud}-${agent}.result" ]; then
cloud_has_results=1
break
fi
done
if [ "${cloud_has_results}" -eq 0 ]; then
printf " ${YELLOW}(skipped — credentials not configured)${NC}\n"
continue
fi
for agent in ${AGENTS_TO_TEST}; do
result_file="${LOG_DIR}/${cloud}-${agent}.result"
if [ -f "${result_file}" ] && [ "$(cat "${result_file}")" = "pass" ]; then
printf " ${GREEN}%-12s PASS${NC}\n" "${agent}"
cloud_pass=$((cloud_pass + 1))
total_pass=$((total_pass + 1))
else
printf " ${RED}%-12s FAIL${NC}\n" "${agent}"
cloud_fail=$((cloud_fail + 1))
total_fail=$((total_fail + 1))
fi
done
if [ "${cloud_fail}" -gt 0 ]; then
printf " ${RED}%d passed, %d failed${NC}\n" "${cloud_pass}" "${cloud_fail}"
any_cloud_failed=1
else
printf " ${GREEN}%d passed, 0 failed${NC}\n" "${cloud_pass}"
fi
done
printf "\n"
printf " ${BOLD}Total:${NC} ${GREEN}%d passed${NC}" "${total_pass}"
if [ "${total_fail}" -gt 0 ]; then
printf ", ${RED}%d failed${NC}" "${total_fail}"
fi
printf "\n Duration: %s\n" "${DURATION_STR}"
# Send matrix email report
send_matrix_email "${LOG_DIR}" "${CLOUDS}" "${AGENTS_TO_TEST}" "${total_pass}" "${total_fail}" "${DURATION_STR}"
# Exit with failure if any agent on any cloud failed
if [ "${total_fail}" -gt 0 ]; then
exit 1
fi
# All tests passed — advance the e2e-last-green tag for diff-aware reviews
mark_e2e_green
exit 0