diff --git a/.claude/skills/setup-agent-team/qa-e2e-prompt.md b/.claude/skills/setup-agent-team/qa-e2e-prompt.md new file mode 100644 index 00000000..0786facb --- /dev/null +++ b/.claude/skills/setup-agent-team/qa-e2e-prompt.md @@ -0,0 +1,122 @@ +You are a single-agent QA E2E tester for the spawn codebase. + +## Mission + +Run the Fly.io E2E test suite, investigate any failures, and fix broken provisioning scripts or test infrastructure. + +## Time Budget + +Complete within 15 minutes. At 14 min stop new work and commit whatever progress you have. + +## Worktree Requirement + +**Work in a git worktree — NEVER in the main repo checkout.** + +```bash +git worktree add WORKTREE_BASE_PLACEHOLDER -b qa/e2e-fix origin/main +cd WORKTREE_BASE_PLACEHOLDER +``` + +## Step 1 — Run the E2E Suite + +```bash +cd REPO_ROOT_PLACEHOLDER +chmod +x e2e/fly-e2e.sh +./e2e/fly-e2e.sh --parallel 6 +``` + +Capture the full output. Note which agents passed and which failed. + +## Step 2 — If All Pass + +If every agent passes, you're done. Log the results and exit. No PR needed. + +## Step 3 — If Any Agent Fails + +For each failed agent, investigate the root cause. The failure categories are: + +### Provision failure (app does not exist after provisioning) + +1. Check the stderr log in the temp directory printed at the start of the run +2. Common causes: + - Missing env var for headless mode (e.g., `MODEL_ID` for openclaw) + - Fly.io API auth issues + - Agent-specific install script changed upstream +3. Read the agent's provisioning code: `cli/src/fly/agents.ts` and `cli/src/shared/agent-setup.ts` +4. Read the E2E provision script: `e2e/lib/provision.sh` + +### Verification failure (app exists but checks fail) + +1. SSH into the VM to investigate: + ```bash + flyctl machines list -a APP_NAME --json | jq -r '.[0].id' + flyctl machine exec MACHINE_ID -a APP_NAME --timeout 30 "bash -c 'ls -la ~; cat ~/.spawnrc; echo ---; env'" + ``` +2. Check if the binary path changed — read the agent's install script in `cli/src/shared/agent-setup.ts` +3. Check if the env var names changed — read the agent's config in `manifest.json` +4. Update the verification checks in `e2e/lib/verify.sh` if they are stale + +### Timeout (provision took too long) + +1. Check if `PROVISION_TIMEOUT` or `INSTALL_WAIT` need increasing +2. Check if the agent's install script has a new heavy dependency + +## Step 4 — Fix + +Make fixes in the worktree at WORKTREE_BASE_PLACEHOLDER. Fixes may be in: + +- `e2e/lib/provision.sh` — env vars, timeouts, headless flags +- `e2e/lib/verify.sh` — binary paths, config file locations, env var checks +- `e2e/lib/common.sh` — API helpers, constants +- `e2e/lib/teardown.sh` — cleanup logic +- `e2e/lib/cleanup.sh` — stale app detection + +After fixing: +1. Run `bash -n` on every modified `.sh` file +2. Re-run the E2E suite for the failed agent(s) only to verify the fix: + ```bash + ./e2e/fly-e2e.sh AGENT_NAME + ``` + +## Step 5 — Commit and PR + +1. Commit with a descriptive message: + ``` + fix(e2e): [description of fix] + + Co-Authored-By: Claude Opus 4.6 + ``` + +2. Push and open a PR: + ```bash + git push -u origin qa/e2e-fix + gh pr create --title "fix(e2e): [description]" --body "$(cat <<'EOF' + ## Summary + - [1-2 bullet points describing what broke and why] + + ## E2E Results + - Passed: [list] + - Fixed: [list] + + ## Test plan + - [ ] Re-ran E2E suite for affected agents + - [ ] `bash -n` passes on modified scripts + + -- qa/e2e-tester + EOF + )" + ``` + +3. Clean up worktree: + ```bash + cd REPO_ROOT_PLACEHOLDER && git worktree remove WORKTREE_BASE_PLACEHOLDER --force + ``` + +## Safety + +- NEVER merge the PR — leave for review +- Run `bash -n` on all modified scripts before committing +- Only fix E2E infrastructure — do NOT modify the agent provisioning scripts in `cli/src/` +- **SIGN-OFF**: `-- qa/e2e-tester` + +Begin now. Run the E2E suite. diff --git a/.claude/skills/setup-agent-team/qa.sh b/.claude/skills/setup-agent-team/qa.sh index 962a3d86..5520c139 100644 --- a/.claude/skills/setup-agent-team/qa.sh +++ b/.claude/skills/setup-agent-team/qa.sh @@ -1,12 +1,13 @@ #!/bin/bash set -eo pipefail -# QA Service — Single Cycle (Tri-Mode) +# QA Service — Single Cycle (Quad-Mode) # Triggered by trigger-server.ts via GitHub Actions # # RUN_MODE=quality — agent team: test-runner + dedup-scanner + code-quality-reviewer (reason=schedule/workflow_dispatch, 35 min) # RUN_MODE=fixtures — single agent: collect API fixtures from cloud providers (reason=fixtures, 20 min) # RUN_MODE=issue — single agent: investigate and fix a specific issue (reason=issues, 15 min) +# RUN_MODE=e2e — single agent: run Fly.io E2E tests, investigate failures (reason=e2e, 20 min) SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" @@ -22,7 +23,12 @@ if [[ -n "${SPAWN_ISSUE}" ]] && [[ ! "${SPAWN_ISSUE}" =~ ^[0-9]+$ ]]; then exit 1 fi -if [[ "${SPAWN_REASON}" == "issues" ]] && [[ -n "${SPAWN_ISSUE}" ]]; then +if [[ "${SPAWN_REASON}" == "e2e" ]]; then + RUN_MODE="e2e" + WORKTREE_BASE="/tmp/spawn-worktrees/qa-e2e" + TEAM_NAME="spawn-qa-e2e" + CYCLE_TIMEOUT=1200 # 20 min for E2E tests + investigation +elif [[ "${SPAWN_REASON}" == "issues" ]] && [[ -n "${SPAWN_ISSUE}" ]]; then RUN_MODE="issue" ISSUE_NUM="${SPAWN_ISSUE}" WORKTREE_BASE="/tmp/spawn-worktrees/qa-issue-${ISSUE_NUM}" @@ -217,6 +223,17 @@ elif [[ "${RUN_MODE}" == "issue" ]]; then sed -i "s|WORKTREE_BASE_PLACEHOLDER|${WORKTREE_BASE}|g" "${PROMPT_FILE}" sed -i "s|REPO_ROOT_PLACEHOLDER|${REPO_ROOT}|g" "${PROMPT_FILE}" +elif [[ "${RUN_MODE}" == "e2e" ]]; then + PROMPT_TEMPLATE="${SCRIPT_DIR}/qa-e2e-prompt.md" + if [[ ! -f "$PROMPT_TEMPLATE" ]]; then + log "ERROR: qa-e2e-prompt.md not found at $PROMPT_TEMPLATE" + exit 1 + fi + cat "$PROMPT_TEMPLATE" > "${PROMPT_FILE}" + + sed -i "s|WORKTREE_BASE_PLACEHOLDER|${WORKTREE_BASE}|g" "${PROMPT_FILE}" + sed -i "s|REPO_ROOT_PLACEHOLDER|${REPO_ROOT}|g" "${PROMPT_FILE}" + fi # Add grace period: 5 min beyond the prompt timeout diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml index 42c5608c..2c1b5d81 100644 --- a/.github/workflows/qa.yml +++ b/.github/workflows/qa.yml @@ -4,7 +4,18 @@ name: Daily QA # schedule: # - cron: '0 6 * * *' # workflow_dispatch: -on: workflow_dispatch +on: + workflow_dispatch: + inputs: + reason: + description: 'QA mode to trigger' + required: false + default: 'e2e' + type: choice + options: + - e2e + - schedule + - fixtures jobs: trigger: runs-on: ubuntu-latest @@ -15,6 +26,7 @@ jobs: SPRITE_URL: ${{ secrets.QA_SPRITE_URL }} TRIGGER_SECRET: ${{ secrets.QA_TRIGGER_SECRET }} run: | + REASON="${{ github.event.inputs.reason || 'e2e' }}" curl -sS --fail-with-body -X POST \ - "${SPRITE_URL}/trigger?reason=${{ github.event_name }}" \ + "${SPRITE_URL}/trigger?reason=${REASON}" \ -H "Authorization: Bearer ${TRIGGER_SECRET}" diff --git a/e2e/fly-e2e.sh b/e2e/fly-e2e.sh new file mode 100755 index 00000000..7c292485 --- /dev/null +++ b/e2e/fly-e2e.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# e2e/fly-e2e.sh — Main E2E test orchestrator for Spawn on Fly.io +# +# Usage: +# ./e2e/fly-e2e.sh # All agents, sequential +# ./e2e/fly-e2e.sh claude # Single agent +# ./e2e/fly-e2e.sh claude codex opencode # Specific agents +# ./e2e/fly-e2e.sh --parallel 2 # Parallel (2 at a time) +# ./e2e/fly-e2e.sh --skip-cleanup # Skip stale app cleanup +set -eo pipefail + +# --------------------------------------------------------------------------- +# Resolve script directory and source libraries +# --------------------------------------------------------------------------- +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/lib/common.sh" +source "${SCRIPT_DIR}/lib/provision.sh" +source "${SCRIPT_DIR}/lib/verify.sh" +source "${SCRIPT_DIR}/lib/teardown.sh" +source "${SCRIPT_DIR}/lib/cleanup.sh" + +# --------------------------------------------------------------------------- +# Parse arguments +# --------------------------------------------------------------------------- +AGENTS_TO_TEST="" +PARALLEL_COUNT=0 +SKIP_CLEANUP=0 + +while [ $# -gt 0 ]; do + case "$1" in + --parallel) + shift + if [ $# -eq 0 ]; then + printf "Error: --parallel requires a number\n" >&2 + exit 1 + fi + PARALLEL_COUNT="$1" + shift + ;; + --skip-cleanup) + SKIP_CLEANUP=1 + shift + ;; + --help|-h) + printf "Usage: %s [agent1 agent2 ...] [--parallel N] [--skip-cleanup]\n" "$0" + printf "\nAgents: %s\n" "${ALL_AGENTS}" + printf "\nOptions:\n" + printf " --parallel N Run N agents in parallel (default: sequential)\n" + printf " --skip-cleanup Skip stale e2e-* app cleanup\n" + printf " --help Show this help\n" + exit 0 + ;; + -*) + printf "Unknown option: %s\n" "$1" >&2 + exit 1 + ;; + *) + # Validate agent name + local_valid=0 + for a in ${ALL_AGENTS}; do + if [ "$1" = "${a}" ]; then + local_valid=1 + break + fi + done + if [ "${local_valid}" -eq 0 ]; then + printf "Unknown agent: %s\nAvailable: %s\n" "$1" "${ALL_AGENTS}" >&2 + exit 1 + fi + if [ -z "${AGENTS_TO_TEST}" ]; then + AGENTS_TO_TEST="$1" + else + AGENTS_TO_TEST="${AGENTS_TO_TEST} $1" + fi + shift + ;; + esac +done + +# Default to all agents +if [ -z "${AGENTS_TO_TEST}" ]; then + AGENTS_TO_TEST="${ALL_AGENTS}" +fi + +# --------------------------------------------------------------------------- +# Final cleanup trap — tear down any tracked apps on exit +# --------------------------------------------------------------------------- +final_cleanup() { + if [ -n "${_TRACKED_APPS}" ]; then + printf "\n" + log_warn "Cleaning up tracked apps on exit..." + for app in ${_TRACKED_APPS}; do + log_step "Tearing down ${app}..." + teardown_agent "${app}" 2>/dev/null || log_warn "Failed to tear down ${app}" + done + fi + # Clean up temp log directory + if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR:-}" ]; then + rm -rf "${LOG_DIR}" + fi +} +trap final_cleanup EXIT + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +log_header "Spawn E2E Test Suite (Fly.io)" +log_info "Agents: ${AGENTS_TO_TEST}" +log_info "Parallel: ${PARALLEL_COUNT:-sequential}" + +# Validate environment +if ! require_env; then + log_err "Environment validation failed" + exit 1 +fi + +# Create temp log directory +LOG_DIR=$(mktemp -d "${TMPDIR:-/tmp}/spawn-e2e.XXXXXX") +log_info "Log directory: ${LOG_DIR}" + +START_TIME=$(date +%s) + +# Result tracking (space-separated lists) +PASSED="" +FAILED="" +SKIPPED="" + +# --------------------------------------------------------------------------- +# run_single_agent AGENT +# +# Provisions, verifies, and tears down a single agent. +# Sets result in a temp file for parallel collection. +# --------------------------------------------------------------------------- +run_single_agent() { + local agent="$1" + local result_file="${2:-}" + local agent_start + agent_start=$(date +%s) + + log_header "Testing agent: ${agent}" + + local app_name + app_name=$(make_app_name "${agent}") + track_app "${app_name}" + + local status="fail" + + # Provision + if provision_agent "${agent}" "${app_name}" "${LOG_DIR}"; then + # Verify + if verify_agent "${agent}" "${app_name}"; then + status="pass" + fi + fi + + # Teardown (always attempt) + teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}" + + local agent_end + agent_end=$(date +%s) + local agent_duration=$((agent_end - agent_start)) + local duration_str + duration_str=$(format_duration "${agent_duration}") + + if [ "${status}" = "pass" ]; then + log_ok "${agent} PASSED (${duration_str})" + else + log_err "${agent} FAILED (${duration_str})" + fi + + # Write result to file (for parallel collection) + if [ -n "${result_file}" ]; then + printf '%s' "${status}" > "${result_file}" + else + # Sequential mode: update global variables directly + if [ "${status}" = "pass" ]; then + if [ -z "${PASSED}" ]; then PASSED="${agent}"; else PASSED="${PASSED} ${agent}"; fi + else + if [ -z "${FAILED}" ]; then FAILED="${agent}"; else FAILED="${FAILED} ${agent}"; fi + fi + fi +} + +# --------------------------------------------------------------------------- +# Execute tests +# --------------------------------------------------------------------------- +if [ "${PARALLEL_COUNT}" -gt 0 ]; then + # Parallel mode: batch agents into groups of N + log_info "Running in parallel mode (batch size: ${PARALLEL_COUNT})" + + # Convert agent list to indexed array + agent_array="" + agent_count=0 + for a in ${AGENTS_TO_TEST}; do + agent_array="${agent_array} ${a}" + agent_count=$((agent_count + 1)) + done + + batch_num=0 + batch_agents="" + batch_count=0 + + for agent in ${agent_array}; do + batch_agents="${batch_agents} ${agent}" + batch_count=$((batch_count + 1)) + + if [ "${batch_count}" -ge "${PARALLEL_COUNT}" ]; then + # Run this batch + batch_num=$((batch_num + 1)) + log_header "Batch ${batch_num}" + + pids="" + for ba in ${batch_agents}; do + local_result_file="${LOG_DIR}/${ba}.result" + run_single_agent "${ba}" "${local_result_file}" & + if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi + done + + # Wait for all PIDs in this batch + for p in ${pids}; do + wait "${p}" 2>/dev/null || true + done + + # Collect results + for ba in ${batch_agents}; do + local_result_file="${LOG_DIR}/${ba}.result" + if [ -f "${local_result_file}" ]; then + local_result=$(cat "${local_result_file}") + if [ "${local_result}" = "pass" ]; then + if [ -z "${PASSED}" ]; then PASSED="${ba}"; else PASSED="${PASSED} ${ba}"; fi + else + if [ -z "${FAILED}" ]; then FAILED="${ba}"; else FAILED="${FAILED} ${ba}"; fi + fi + else + if [ -z "${FAILED}" ]; then FAILED="${ba}"; else FAILED="${FAILED} ${ba}"; fi + fi + done + + batch_agents="" + batch_count=0 + fi + done + + # Handle remaining agents in last partial batch + if [ -n "${batch_agents}" ]; then + batch_num=$((batch_num + 1)) + log_header "Batch ${batch_num}" + + pids="" + for ba in ${batch_agents}; do + local_result_file="${LOG_DIR}/${ba}.result" + run_single_agent "${ba}" "${local_result_file}" & + if [ -z "${pids}" ]; then pids="$!"; else pids="${pids} $!"; fi + done + + for p in ${pids}; do + wait "${p}" 2>/dev/null || true + done + + for ba in ${batch_agents}; do + local_result_file="${LOG_DIR}/${ba}.result" + if [ -f "${local_result_file}" ]; then + local_result=$(cat "${local_result_file}") + if [ "${local_result}" = "pass" ]; then + if [ -z "${PASSED}" ]; then PASSED="${ba}"; else PASSED="${PASSED} ${ba}"; fi + else + if [ -z "${FAILED}" ]; then FAILED="${ba}"; else FAILED="${FAILED} ${ba}"; fi + fi + else + if [ -z "${FAILED}" ]; then FAILED="${ba}"; else FAILED="${FAILED} ${ba}"; fi + fi + done + fi + +else + # Sequential mode + for agent in ${AGENTS_TO_TEST}; do + run_single_agent "${agent}" + done +fi + +# --------------------------------------------------------------------------- +# Stale cleanup +# --------------------------------------------------------------------------- +if [ "${SKIP_CLEANUP}" -eq 0 ]; then + cleanup_stale_apps || log_warn "Stale cleanup encountered errors" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +END_TIME=$(date +%s) +TOTAL_DURATION=$((END_TIME - START_TIME)) +DURATION_STR=$(format_duration "${TOTAL_DURATION}") + +# Count results +pass_count=0 +fail_count=0 +skip_count=0 + +for _ in ${PASSED}; do pass_count=$((pass_count + 1)); done +for _ in ${FAILED}; do fail_count=$((fail_count + 1)); done +for _ in ${SKIPPED}; do skip_count=$((skip_count + 1)); done + +printf "\n" +log_header "E2E Test Summary" +printf "${GREEN} Passed: %d${NC}\n" "${pass_count}" +if [ "${fail_count}" -gt 0 ]; then + printf "${RED} Failed: %d${NC}\n" "${fail_count}" +else + printf " Failed: %d\n" "${fail_count}" +fi +if [ "${skip_count}" -gt 0 ]; then + printf "${YELLOW} Skipped: %d${NC}\n" "${skip_count}" +fi +printf " Duration: %s\n" "${DURATION_STR}" + +if [ -n "${PASSED}" ]; then + printf "${GREEN} Passed agents: %s${NC}\n" "${PASSED}" +fi +if [ -n "${FAILED}" ]; then + printf "${RED} Failed agents: %s${NC}\n" "${FAILED}" +fi + +# Exit with failure if any agent failed +if [ "${fail_count}" -gt 0 ]; then + exit 1 +fi + +exit 0 diff --git a/e2e/lib/cleanup.sh b/e2e/lib/cleanup.sh new file mode 100644 index 00000000..d52a4852 --- /dev/null +++ b/e2e/lib/cleanup.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# e2e/lib/cleanup.sh — Find and destroy stale e2e-* apps +set -eo pipefail + +# --------------------------------------------------------------------------- +# cleanup_stale_apps +# +# Lists all apps in the org, filters for e2e-* pattern, and tears down any +# older than 30 minutes (based on the unix timestamp embedded in the name). +# --------------------------------------------------------------------------- +cleanup_stale_apps() { + log_header "Cleaning up stale e2e apps" + + local now + now=$(date +%s) + local max_age=1800 # 30 minutes in seconds + + # List all apps via REST API + local apps_json + apps_json=$(fly_api GET "/apps?org_slug=personal" 2>/dev/null || true) + + if [ -z "${apps_json}" ] || [ "${apps_json}" = "null" ]; then + log_info "Could not list apps — skipping cleanup" + return 0 + fi + + # Extract app names matching e2e-* pattern + local app_names + app_names=$(printf '%s' "${apps_json}" | jq -r '.apps[]?.name // empty' 2>/dev/null | grep '^e2e-' || true) + + if [ -z "${app_names}" ]; then + log_ok "No stale e2e apps found" + return 0 + fi + + local cleaned=0 + local skipped=0 + + for app_name in ${app_names}; do + # Extract timestamp from name: e2e-AGENT-TIMESTAMP + # The timestamp is the last dash-separated segment + local ts + ts=$(printf '%s' "${app_name}" | sed 's/.*-//') + + # Validate it looks like a unix timestamp (all digits, 10 chars) + if ! printf '%s' "${ts}" | grep -qE '^[0-9]{10}$'; then + log_warn "Skipping ${app_name} — cannot parse timestamp" + skipped=$((skipped + 1)) + continue + fi + + local age=$((now - ts)) + if [ "${age}" -gt "${max_age}" ]; then + local age_str + age_str=$(format_duration "${age}") + log_step "Destroying stale app ${app_name} (age: ${age_str})" + teardown_agent "${app_name}" || log_warn "Failed to tear down ${app_name}" + cleaned=$((cleaned + 1)) + else + skipped=$((skipped + 1)) + fi + done + + if [ "${cleaned}" -gt 0 ]; then + log_ok "Cleaned ${cleaned} stale app(s)" + fi + if [ "${skipped}" -gt 0 ]; then + log_info "Skipped ${skipped} recent app(s)" + fi +} diff --git a/e2e/lib/common.sh b/e2e/lib/common.sh new file mode 100644 index 00000000..c9b0e9e3 --- /dev/null +++ b/e2e/lib/common.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# e2e/lib/common.sh — Constants, logging, env validation, Fly API helpers +set -eo pipefail + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +ALL_AGENTS="claude openclaw zeroclaw codex opencode kilocode" +FLY_API_BASE="https://api.machines.dev/v1" +PROVISION_TIMEOUT="${PROVISION_TIMEOUT:-480}" +INSTALL_WAIT="${INSTALL_WAIT:-120}" +FLY_REGION="${FLY_REGION:-iad}" +FLY_VM_MEMORY="${FLY_VM_MEMORY:-2048}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +# Tracked apps for cleanup on exit +_TRACKED_APPS="" + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- +log_header() { + printf "\n${BOLD}${BLUE}=== %s ===${NC}\n" "$1" +} + +log_step() { + printf "${CYAN} -> %s${NC}\n" "$1" +} + +log_ok() { + printf "${GREEN} [PASS] %s${NC}\n" "$1" +} + +log_err() { + printf "${RED} [FAIL] %s${NC}\n" "$1" +} + +log_warn() { + printf "${YELLOW} [WARN] %s${NC}\n" "$1" +} + +log_info() { + printf "${BLUE} [INFO] %s${NC}\n" "$1" +} + +# --------------------------------------------------------------------------- +# Environment validation +# --------------------------------------------------------------------------- +require_env() { + local missing=0 + + # Check required tools + if ! command -v flyctl >/dev/null 2>&1; then + log_err "flyctl not found. Install from https://fly.io/docs/flyctl/install/" + missing=1 + fi + + if ! command -v jq >/dev/null 2>&1; then + log_err "jq not found. Install via: brew install jq / apt install jq" + missing=1 + fi + + if ! command -v bun >/dev/null 2>&1; then + log_err "bun not found. Install from https://bun.sh" + missing=1 + fi + + # Check OPENROUTER_API_KEY + if [ -z "${OPENROUTER_API_KEY:-}" ]; then + log_err "OPENROUTER_API_KEY is not set" + missing=1 + fi + + # Check / generate FLY_API_TOKEN + if [ -z "${FLY_API_TOKEN:-}" ]; then + log_info "FLY_API_TOKEN not set, generating via flyctl..." + FLY_API_TOKEN=$(flyctl tokens create org personal --expiry 2h 2>/dev/null || true) + if [ -z "${FLY_API_TOKEN:-}" ]; then + log_warn "Could not generate token. Falling back to flyctl stored credentials." + # Validate flyctl is authenticated + if ! flyctl auth whoami >/dev/null 2>&1; then + log_err "flyctl is not authenticated. Run: flyctl auth login" + missing=1 + fi + else + export FLY_API_TOKEN + log_ok "Generated FLY_API_TOKEN (expires in 2h)" + fi + fi + + if [ "${missing}" -eq 1 ]; then + return 1 + fi + + log_ok "Environment validated" + return 0 +} + +# --------------------------------------------------------------------------- +# Fly API helper +# --------------------------------------------------------------------------- +# fly_api METHOD ENDPOINT [BODY] +# Calls the Fly Machines REST API. +fly_api() { + local method="$1" + local endpoint="$2" + local body="${3:-}" + local url="${FLY_API_BASE}${endpoint}" + local auth_header + + # Detect token format for auth header + local token="${FLY_API_TOKEN:-}" + if [ -z "${token}" ]; then + # If no token, try to get one from flyctl + token=$(flyctl auth token 2>/dev/null || true) + fi + + if [ -z "${token}" ]; then + log_err "No Fly API token available" + return 1 + fi + + # FlyV1 tokens start with FlyV1, otherwise use Bearer + case "${token}" in + FlyV1\ *) auth_header="Authorization: ${token}" ;; + *) auth_header="Authorization: Bearer ${token}" ;; + esac + + local curl_args=("-s" "-X" "${method}" "-H" "${auth_header}" "-H" "Content-Type: application/json") + if [ -n "${body}" ]; then + curl_args+=("-d" "${body}") + fi + + curl "${curl_args[@]}" "${url}" +} + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +make_app_name() { + local agent="$1" + local ts + ts=$(date +%s) + printf "e2e-%s-%s" "${agent}" "${ts}" +} + +format_duration() { + local seconds="$1" + local mins=$((seconds / 60)) + local secs=$((seconds % 60)) + printf "%dm %ds" "${mins}" "${secs}" +} + +track_app() { + local app_name="$1" + if [ -z "${_TRACKED_APPS}" ]; then + _TRACKED_APPS="${app_name}" + else + _TRACKED_APPS="${_TRACKED_APPS} ${app_name}" + fi +} + +untrack_app() { + local app_name="$1" + local new_list="" + for app in ${_TRACKED_APPS}; do + if [ "${app}" != "${app_name}" ]; then + if [ -z "${new_list}" ]; then + new_list="${app}" + else + new_list="${new_list} ${app}" + fi + fi + done + _TRACKED_APPS="${new_list}" +} diff --git a/e2e/lib/provision.sh b/e2e/lib/provision.sh new file mode 100644 index 00000000..09ce5be5 --- /dev/null +++ b/e2e/lib/provision.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# e2e/lib/provision.sh — Provision an agent VM via spawn CLI (headless) +set -eo pipefail + +# --------------------------------------------------------------------------- +# provision_agent AGENT APP_NAME LOG_DIR +# +# Runs spawn in headless mode with a timeout. The provision process hangs on +# the interactive SSH session (step 12 of the orchestration), so we kill it +# after PROVISION_TIMEOUT seconds. The install itself usually succeeds; we +# verify via app existence and .spawnrc presence afterward. +# --------------------------------------------------------------------------- +provision_agent() { + local agent="$1" + local app_name="$2" + local log_dir="$3" + + local exit_file="${log_dir}/${agent}.exit" + local stdout_file="${log_dir}/${agent}.stdout" + local stderr_file="${log_dir}/${agent}.stderr" + + # Resolve CLI entry point (relative to this script's location in e2e/lib/) + local cli_entry + cli_entry="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/cli/src/index.ts" + + if [ ! -f "${cli_entry}" ]; then + log_err "CLI entry point not found: ${cli_entry}" + return 1 + fi + + log_step "Provisioning ${agent} as ${app_name} (timeout: ${PROVISION_TIMEOUT}s)" + + # Remove stale exit file + rm -f "${exit_file}" + + # Environment for headless provisioning + # FLY_API_TOKEN="" forces spawn to use flyctl stored credentials (see plan section 6) + # MODEL_ID bypasses the interactive model selection prompt (required by openclaw) + ( + SPAWN_NON_INTERACTIVE=1 \ + SPAWN_SKIP_GITHUB_AUTH=1 \ + SPAWN_SKIP_API_VALIDATION=1 \ + MODEL_ID="${MODEL_ID:-openrouter/auto}" \ + FLY_APP_NAME="${app_name}" \ + FLY_REGION="${FLY_REGION}" \ + FLY_VM_MEMORY="${FLY_VM_MEMORY}" \ + FLY_API_TOKEN="" \ + OPENROUTER_API_KEY="${OPENROUTER_API_KEY}" \ + bun run "${cli_entry}" "${agent}" fly --headless --output json \ + > "${stdout_file}" 2> "${stderr_file}" + printf '%s' "$?" > "${exit_file}" + ) & + local pid=$! + + # Poll for completion or timeout (bash 3.2 compatible — no wait -n) + local waited=0 + while [ "${waited}" -lt "${PROVISION_TIMEOUT}" ]; do + if [ -f "${exit_file}" ]; then + break + fi + sleep 5 + waited=$((waited + 5)) + done + + # Kill if still running (the interactive SSH session hangs) + if [ ! -f "${exit_file}" ]; then + log_warn "Provision timed out after ${PROVISION_TIMEOUT}s — killing (install may still succeed)" + kill "${pid}" 2>/dev/null || true + wait "${pid}" 2>/dev/null || true + fi + + # Check if the provision process exited cleanly + local exit_code="" + if [ -f "${exit_file}" ]; then + exit_code=$(cat "${exit_file}") + fi + + # Even if provision "failed" (timeout), the app may exist and install may have completed. + # Verify app existence via flyctl + REST API fallback. + local app_exists=0 + if flyctl status -a "${app_name}" >/dev/null 2>&1; then + app_exists=1 + else + # REST API fallback + local api_result + api_result=$(fly_api GET "/apps/${app_name}/machines" 2>/dev/null || true) + if printf '%s' "${api_result}" | jq -e '.[0].id' >/dev/null 2>&1; then + app_exists=1 + fi + fi + + if [ "${app_exists}" -eq 0 ]; then + log_err "App ${app_name} does not exist after provisioning" + if [ -f "${stderr_file}" ]; then + log_err "Stderr tail:" + tail -20 "${stderr_file}" >&2 || true + fi + return 1 + fi + + log_ok "App ${app_name} exists" + + # Wait for install to complete (.spawnrc is written near the end) + log_step "Waiting for install to complete (polling .spawnrc, up to ${INSTALL_WAIT}s)..." + local install_waited=0 + local install_ok=0 + while [ "${install_waited}" -lt "${INSTALL_WAIT}" ]; do + if fly_ssh "${app_name}" "test -f ~/.spawnrc" >/dev/null 2>&1; then + install_ok=1 + break + fi + sleep 10 + install_waited=$((install_waited + 10)) + done + + if [ "${install_ok}" -eq 1 ]; then + # Settle time for agent binary install to finish after .spawnrc is written + sleep 5 + log_ok "Install completed (.spawnrc found)" + return 0 + else + log_warn ".spawnrc not found after ${INSTALL_WAIT}s — install may still be running" + return 0 # Continue to verification; it will catch real failures + fi +} diff --git a/e2e/lib/teardown.sh b/e2e/lib/teardown.sh new file mode 100644 index 00000000..45d24279 --- /dev/null +++ b/e2e/lib/teardown.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# e2e/lib/teardown.sh — Tear down a Fly.io app via REST API +set -eo pipefail + +# --------------------------------------------------------------------------- +# teardown_agent APP_NAME +# +# 1. List machines in the app +# 2. Stop each machine +# 3. Delete each machine (force) +# 4. Delete the app +# --------------------------------------------------------------------------- +teardown_agent() { + local app="$1" + + log_step "Tearing down ${app}..." + + # Get machines list + local machines_json + machines_json=$(fly_api GET "/apps/${app}/machines" 2>/dev/null || true) + + if [ -z "${machines_json}" ] || [ "${machines_json}" = "null" ]; then + log_warn "No machines response for ${app} — attempting app delete anyway" + fly_api DELETE "/apps/${app}" >/dev/null 2>&1 || true + untrack_app "${app}" + return 0 + fi + + # Extract machine IDs + local machine_ids + machine_ids=$(printf '%s' "${machines_json}" | jq -r '.[].id // empty' 2>/dev/null || true) + + if [ -n "${machine_ids}" ]; then + # Stop each machine + for mid in ${machine_ids}; do + log_step "Stopping machine ${mid}..." + fly_api POST "/apps/${app}/machines/${mid}/stop" '{}' >/dev/null 2>&1 || true + done + + # Brief wait for stop to propagate + sleep 2 + + # Force-delete each machine + for mid in ${machine_ids}; do + log_step "Deleting machine ${mid}..." + fly_api DELETE "/apps/${app}/machines/${mid}?force=true" >/dev/null 2>&1 || true + done + fi + + # Delete the app + log_step "Deleting app ${app}..." + fly_api DELETE "/apps/${app}" >/dev/null 2>&1 || true + + # Verify deletion + if flyctl status -a "${app}" >/dev/null 2>&1; then + log_warn "App ${app} may still exist (flyctl still reports it)" + else + log_ok "App ${app} torn down" + fi + + untrack_app "${app}" +} diff --git a/e2e/lib/verify.sh b/e2e/lib/verify.sh new file mode 100644 index 00000000..ece21f41 --- /dev/null +++ b/e2e/lib/verify.sh @@ -0,0 +1,317 @@ +#!/bin/bash +# e2e/lib/verify.sh — SSH helpers and per-agent verification +set -eo pipefail + +# --------------------------------------------------------------------------- +# Machine ID cache (avoid repeated API calls) +# --------------------------------------------------------------------------- +_FLY_MACHINE_ID="" +_FLY_MACHINE_APP="" + +# --------------------------------------------------------------------------- +# fly_ssh APP_NAME COMMAND +# +# Resolves machine ID, escapes single quotes, and runs the command via +# flyctl machine exec. Returns the exit code of the remote command. +# --------------------------------------------------------------------------- +fly_ssh() { + local app="$1" + local cmd="$2" + + # Resolve machine ID (cached per app) + if [ "${_FLY_MACHINE_APP}" != "${app}" ] || [ -z "${_FLY_MACHINE_ID}" ]; then + _FLY_MACHINE_ID=$(flyctl machines list -a "${app}" --json 2>/dev/null | jq -r '.[0].id') + _FLY_MACHINE_APP="${app}" + if [ -z "${_FLY_MACHINE_ID}" ] || [ "${_FLY_MACHINE_ID}" = "null" ]; then + log_err "Could not resolve machine ID for app ${app}" + return 1 + fi + fi + + # Escape single quotes in command: each ' becomes '\'' + local escaped_cmd + escaped_cmd=$(printf '%s' "${cmd}" | sed "s/'/'\\\\''/g") + + flyctl machine exec "${_FLY_MACHINE_ID}" -a "${app}" --timeout 30 "bash -c '${escaped_cmd}'" +} + +# --------------------------------------------------------------------------- +# verify_common APP_NAME AGENT +# +# Checks that apply to ALL agents: +# 1. SSH connectivity +# 2. .spawnrc exists +# 3. .spawnrc contains OPENROUTER_API_KEY +# --------------------------------------------------------------------------- +verify_common() { + local app="$1" + local agent="$2" + local failures=0 + + # 1. SSH connectivity + log_step "Checking SSH connectivity..." + if fly_ssh "${app}" "echo e2e-ssh-ok" 2>/dev/null | grep -q "e2e-ssh-ok"; then + log_ok "SSH connectivity" + else + log_err "SSH connectivity failed" + failures=$((failures + 1)) + fi + + # 2. .spawnrc exists + log_step "Checking .spawnrc exists..." + if fly_ssh "${app}" "test -f ~/.spawnrc" >/dev/null 2>&1; then + log_ok ".spawnrc exists" + else + log_err ".spawnrc not found" + failures=$((failures + 1)) + fi + + # 3. .spawnrc has OPENROUTER_API_KEY + log_step "Checking OPENROUTER_API_KEY in .spawnrc..." + if fly_ssh "${app}" "grep -q OPENROUTER_API_KEY ~/.spawnrc" >/dev/null 2>&1; then + log_ok "OPENROUTER_API_KEY present in .spawnrc" + else + log_err "OPENROUTER_API_KEY not found in .spawnrc" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +# --------------------------------------------------------------------------- +# Per-agent verify functions +# All checks are EXIT-CODE BASED (never capture and compare stdout). +# --------------------------------------------------------------------------- + +verify_claude() { + local app="$1" + local failures=0 + + # Binary check + log_step "Checking claude binary..." + if fly_ssh "${app}" "PATH=\$HOME/.claude/local/bin:\$HOME/.local/bin:\$HOME/.bun/bin:\$PATH command -v claude" >/dev/null 2>&1; then + log_ok "claude binary found" + else + log_err "claude binary not found" + failures=$((failures + 1)) + fi + + # Config check + log_step "Checking claude config..." + if fly_ssh "${app}" "test -f ~/.claude/settings.json" >/dev/null 2>&1; then + log_ok "~/.claude/settings.json exists" + else + log_err "~/.claude/settings.json not found" + failures=$((failures + 1)) + fi + + # Env check + log_step "Checking claude env (openrouter base url)..." + if fly_ssh "${app}" "grep -q openrouter.ai ~/.spawnrc" >/dev/null 2>&1; then + log_ok "openrouter.ai configured in .spawnrc" + else + log_err "openrouter.ai not found in .spawnrc" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +verify_openclaw() { + local app="$1" + local failures=0 + + # Binary check + log_step "Checking openclaw binary..." + if fly_ssh "${app}" "PATH=\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH command -v openclaw" >/dev/null 2>&1; then + log_ok "openclaw binary found" + else + log_err "openclaw binary not found" + failures=$((failures + 1)) + fi + + # Env check + log_step "Checking openclaw env (ANTHROPIC_API_KEY)..." + if fly_ssh "${app}" "grep -q ANTHROPIC_API_KEY ~/.spawnrc" >/dev/null 2>&1; then + log_ok "ANTHROPIC_API_KEY present in .spawnrc" + else + log_err "ANTHROPIC_API_KEY not found in .spawnrc" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +verify_zeroclaw() { + local app="$1" + local failures=0 + + # Binary check (requires cargo env) + log_step "Checking zeroclaw binary..." + if fly_ssh "${app}" "source ~/.cargo/env 2>/dev/null; command -v zeroclaw" >/dev/null 2>&1; then + log_ok "zeroclaw binary found" + else + log_err "zeroclaw binary not found" + failures=$((failures + 1)) + fi + + # Env check: ZEROCLAW_PROVIDER + log_step "Checking zeroclaw env (ZEROCLAW_PROVIDER)..." + if fly_ssh "${app}" "grep -q ZEROCLAW_PROVIDER ~/.spawnrc" >/dev/null 2>&1; then + log_ok "ZEROCLAW_PROVIDER present in .spawnrc" + else + log_err "ZEROCLAW_PROVIDER not found in .spawnrc" + failures=$((failures + 1)) + fi + + # Env check: provider is openrouter + log_step "Checking zeroclaw uses openrouter..." + if fly_ssh "${app}" "grep ZEROCLAW_PROVIDER ~/.spawnrc | grep -q openrouter" >/dev/null 2>&1; then + log_ok "ZEROCLAW_PROVIDER set to openrouter" + else + log_err "ZEROCLAW_PROVIDER not set to openrouter" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +verify_codex() { + local app="$1" + local failures=0 + + # Binary check + log_step "Checking codex binary..." + if fly_ssh "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.zshrc 2>/dev/null; command -v codex" >/dev/null 2>&1; then + log_ok "codex binary found" + else + log_err "codex binary not found" + failures=$((failures + 1)) + fi + + # Config check + log_step "Checking codex config..." + if fly_ssh "${app}" "test -f ~/.codex/config.toml" >/dev/null 2>&1; then + log_ok "~/.codex/config.toml exists" + else + log_err "~/.codex/config.toml not found" + failures=$((failures + 1)) + fi + + # Env check + log_step "Checking codex env (OPENROUTER_API_KEY)..." + if fly_ssh "${app}" "grep -q OPENROUTER_API_KEY ~/.spawnrc" >/dev/null 2>&1; then + log_ok "OPENROUTER_API_KEY present in .spawnrc" + else + log_err "OPENROUTER_API_KEY not found in .spawnrc" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +verify_opencode() { + local app="$1" + local failures=0 + + # Binary check + log_step "Checking opencode binary..." + if fly_ssh "${app}" "PATH=\$HOME/.opencode/bin:\$PATH command -v opencode" >/dev/null 2>&1; then + log_ok "opencode binary found" + else + log_err "opencode binary not found" + failures=$((failures + 1)) + fi + + # Env check + log_step "Checking opencode env (OPENROUTER_API_KEY)..." + if fly_ssh "${app}" "grep -q OPENROUTER_API_KEY ~/.spawnrc" >/dev/null 2>&1; then + log_ok "OPENROUTER_API_KEY present in .spawnrc" + else + log_err "OPENROUTER_API_KEY not found in .spawnrc" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +verify_kilocode() { + local app="$1" + local failures=0 + + # Binary check + log_step "Checking kilocode binary..." + if fly_ssh "${app}" "source ~/.spawnrc 2>/dev/null; source ~/.zshrc 2>/dev/null; command -v kilocode" >/dev/null 2>&1; then + log_ok "kilocode binary found" + else + log_err "kilocode binary not found" + failures=$((failures + 1)) + fi + + # Env check: KILO_PROVIDER_TYPE + log_step "Checking kilocode env (KILO_PROVIDER_TYPE)..." + if fly_ssh "${app}" "grep -q KILO_PROVIDER_TYPE ~/.spawnrc" >/dev/null 2>&1; then + log_ok "KILO_PROVIDER_TYPE present in .spawnrc" + else + log_err "KILO_PROVIDER_TYPE not found in .spawnrc" + failures=$((failures + 1)) + fi + + # Env check: provider is openrouter + log_step "Checking kilocode uses openrouter..." + if fly_ssh "${app}" "grep KILO_PROVIDER_TYPE ~/.spawnrc | grep -q openrouter" >/dev/null 2>&1; then + log_ok "KILO_PROVIDER_TYPE set to openrouter" + else + log_err "KILO_PROVIDER_TYPE not set to openrouter" + failures=$((failures + 1)) + fi + + return "${failures}" +} + +# --------------------------------------------------------------------------- +# verify_agent AGENT APP_NAME +# +# Dispatch: common checks + agent-specific checks. +# Returns 0 if all pass, 1 if any fail. +# --------------------------------------------------------------------------- +verify_agent() { + local agent="$1" + local app="$2" + local total_failures=0 + + # Reset machine ID cache for each agent + _FLY_MACHINE_ID="" + _FLY_MACHINE_APP="" + + log_header "Verifying ${agent} (${app})" + + # Common checks + local common_failures=0 + verify_common "${app}" "${agent}" || common_failures=$? + total_failures=$((total_failures + common_failures)) + + # Agent-specific checks + local agent_failures=0 + case "${agent}" in + claude) verify_claude "${app}" || agent_failures=$? ;; + openclaw) verify_openclaw "${app}" || agent_failures=$? ;; + zeroclaw) verify_zeroclaw "${app}" || agent_failures=$? ;; + codex) verify_codex "${app}" || agent_failures=$? ;; + opencode) verify_opencode "${app}" || agent_failures=$? ;; + kilocode) verify_kilocode "${app}" || agent_failures=$? ;; + *) + log_err "Unknown agent: ${agent}" + return 1 + ;; + esac + total_failures=$((total_failures + agent_failures)) + + if [ "${total_failures}" -eq 0 ]; then + log_ok "All checks passed for ${agent}" + return 0 + else + log_err "${total_failures} check(s) failed for ${agent}" + return 1 + fi +}