spawn/sh/e2e/lib/common.sh
A 5e0144b645
fix(zeroclaw): remove broken zeroclaw agent (repo 404) (#3107)
* fix(zeroclaw): remove broken zeroclaw agent (repo 404)

The zeroclaw-labs/zeroclaw GitHub repository returns 404 — all installs
fail. Remove zeroclaw entirely from the matrix: agent definition,
setup code, shell scripts, e2e tests, packer config, skill files,
and documentation.

Fixes #3102

Agent: code-health
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* fix(zeroclaw): remove stale zeroclaw reference from discovery.md ARM agents list

Addresses security review on PR #3107 — the last remaining zeroclaw
reference in .claude/rules/discovery.md is now removed.

Agent: issue-fixer
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* fix(zeroclaw): remove remaining stale zeroclaw references from CI/packer

Remove zeroclaw from:
- .github/workflows/agent-tarballs.yml ARM build matrix
- .github/workflows/docker.yml agent matrix
- packer/digitalocean.pkr.hcl comment
- sh/e2e/e2e.sh comment

Addresses all 5 stale references flagged in security review of PR #3107.

Agent: issue-fixer
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

---------

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-30 15:35:40 -07:00

317 lines
10 KiB
Bash

#!/bin/bash
# e2e/lib/common.sh — Constants, logging, env validation for multi-cloud E2E
set -eo pipefail
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
ALL_AGENTS="claude openclaw codex opencode kilocode hermes junie cursor"
PROVISION_TIMEOUT="${PROVISION_TIMEOUT:-720}"
INSTALL_WAIT="${INSTALL_WAIT:-600}"
INPUT_TEST_TIMEOUT="${INPUT_TEST_TIMEOUT:-120}"
# Per-agent overall timeout: max wall-clock time for provision + verify + input test.
# Ensures a result file is always written even if a step hangs indefinitely.
AGENT_TIMEOUT="${AGENT_TIMEOUT:-1800}"
# Validate numeric env vars that get interpolated into remote command strings.
# A non-numeric value here could lead to shell injection via SSH commands.
case "${PROVISION_TIMEOUT}" in ''|*[!0-9]*) PROVISION_TIMEOUT=720 ;; esac
case "${INSTALL_WAIT}" in ''|*[!0-9]*) INSTALL_WAIT=600 ;; esac
case "${INPUT_TEST_TIMEOUT}" in ''|*[!0-9]*) INPUT_TEST_TIMEOUT=120 ;; esac
case "${AGENT_TIMEOUT}" in ''|*[!0-9]*) AGENT_TIMEOUT=1800 ;; esac
# ---------------------------------------------------------------------------
# OpenRouter API key fallback
#
# On QA VMs that run Claude Code via OpenRouter, the API key is stored as
# ANTHROPIC_AUTH_TOKEN (because Claude Code uses ANTHROPIC_BASE_URL + token).
# Export OPENROUTER_API_KEY from ANTHROPIC_AUTH_TOKEN when using OpenRouter.
# ---------------------------------------------------------------------------
if [ -z "${OPENROUTER_API_KEY:-}" ] && [ -n "${ANTHROPIC_AUTH_TOKEN:-}" ]; then
case "${ANTHROPIC_BASE_URL:-}" in
*openrouter*)
export OPENROUTER_API_KEY="${ANTHROPIC_AUTH_TOKEN}"
;;
esac
fi
# Active cloud (set by load_cloud_driver)
ACTIVE_CLOUD=""
# Cloud log prefix for multi-cloud parallel output
CLOUD_LOG_PREFIX=""
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
# Tracked instances for cleanup on exit
_TRACKED_APPS=""
# ---------------------------------------------------------------------------
# Logging (with optional cloud prefix for parallel output)
# ---------------------------------------------------------------------------
log_header() {
printf '\n%b%b%s=== %s ===%b\n' "$BOLD" "$BLUE" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
log_step() {
printf '%b%s -> %s%b\n' "$CYAN" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
log_ok() {
printf '%b%s [PASS] %s%b\n' "$GREEN" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
log_err() {
printf '%b%s [FAIL] %s%b\n' "$RED" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
log_warn() {
printf '%b%s [WARN] %s%b\n' "$YELLOW" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
log_info() {
printf '%b%s [INFO] %s%b\n' "$BLUE" "${CLOUD_LOG_PREFIX}" "$1" "$NC"
}
# ---------------------------------------------------------------------------
# load_cloud_driver CLOUD
#
# Sources the cloud-specific driver and sets ACTIVE_CLOUD for wrapper dispatch.
# NOTE: Uses BASH_SOURCE and source with a filesystem path. This is intentional —
# e2e scripts are always run from the filesystem, never via bash <(curl ...).
# ---------------------------------------------------------------------------
load_cloud_driver() {
local cloud="$1"
ACTIVE_CLOUD="${cloud}"
# Resolve driver file (relative to this script's location).
# BASH_SOURCE[0] is safe here — e2e scripts run from disk, not curl|bash.
local driver_dir
driver_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/clouds"
local driver_file="${driver_dir}/${cloud}.sh"
if [ ! -f "${driver_file}" ]; then
log_err "Cloud driver not found: ${driver_file}"
return 1
fi
# shellcheck source=/dev/null # driver path is dynamic
source "${driver_file}"
log_step "Loaded cloud driver: ${cloud}"
}
# ---------------------------------------------------------------------------
# Cloud wrapper functions — use ACTIVE_CLOUD for indirection (set by load_cloud_driver)
# ---------------------------------------------------------------------------
cloud_validate_env() { "_${ACTIVE_CLOUD}_validate_env" "$@"; }
cloud_headless_env() { "_${ACTIVE_CLOUD}_headless_env" "$@"; }
cloud_provision_verify() { "_${ACTIVE_CLOUD}_provision_verify" "$@"; }
cloud_exec() { "_${ACTIVE_CLOUD}_exec" "$@"; }
cloud_teardown() { "_${ACTIVE_CLOUD}_teardown" "$@"; }
cloud_cleanup_stale() { "_${ACTIVE_CLOUD}_cleanup_stale" "$@"; }
cloud_max_parallel() {
if type "_${ACTIVE_CLOUD}_max_parallel" >/dev/null 2>&1; then
"_${ACTIVE_CLOUD}_max_parallel" "$@"
else
printf '99'
fi
}
cloud_install_wait() {
if type "_${ACTIVE_CLOUD}_install_wait" >/dev/null 2>&1; then
"_${ACTIVE_CLOUD}_install_wait" "$@"
else
printf '%s' "${INSTALL_WAIT}"
fi
}
# Refresh auth token if the cloud driver supports it (e.g. Sprite tokens
# expire after ~60 min). Called before each provisioning batch to prevent
# auth expiry failures in long-running E2E suites. See #2934.
cloud_refresh_auth() {
if type "_${ACTIVE_CLOUD}_refresh_auth" >/dev/null 2>&1; then
"_${ACTIVE_CLOUD}_refresh_auth" "$@"
fi
}
# ---------------------------------------------------------------------------
# Per-agent provision timeout overrides
#
# Some agents (e.g. junie) have heavier installs that exceed the default
# PROVISION_TIMEOUT on slower clouds. This map lets us set per-agent defaults
# without raising the global timeout for all agents.
#
# Override precedence:
# 1. PROVISION_TIMEOUT_<agent> env var (explicit override)
# 2. Built-in per-agent default (below)
# 3. Global PROVISION_TIMEOUT
# ---------------------------------------------------------------------------
_PROVISION_TIMEOUT_junie=1200
_AGENT_TIMEOUT_junie=2400
# Hermes installs a Python virtualenv which can take 20+ min on slow VMs.
# Provision timeout bumped to match the CLI install timeout (600s).
# Agent timeout bumped to 3600s to give the install enough headroom.
_PROVISION_TIMEOUT_hermes=720
_AGENT_TIMEOUT_hermes=3600
get_provision_timeout() {
local agent="$1"
# Sanitize agent name: whitelist [A-Za-z0-9_] only, replacing all else with _
# This prevents shell metacharacter injection before eval on lines below
local safe_agent
safe_agent=$(printf '%s' "${agent}" | sed 's/[^A-Za-z0-9_]/_/g')
# Check for env var override: PROVISION_TIMEOUT_<agent>
local env_var="PROVISION_TIMEOUT_${safe_agent}"
eval "local env_val=\${${env_var}:-}"
if [ -n "${env_val}" ]; then
case "${env_val}" in ''|*[!0-9]*) ;; *) printf '%s' "${env_val}"; return ;; esac
fi
# Check for built-in per-agent default
local builtin_var="_PROVISION_TIMEOUT_${safe_agent}"
eval "local builtin_val=\${${builtin_var}:-}"
if [ -n "${builtin_val}" ]; then
printf '%s' "${builtin_val}"
return
fi
# Fall back to global
printf '%s' "${PROVISION_TIMEOUT}"
}
# ---------------------------------------------------------------------------
# get_agent_timeout AGENT
#
# Returns the overall wall-clock timeout (seconds) for a single agent run
# (provision + verify + input test). Same override precedence as above:
# 1. AGENT_TIMEOUT_<agent> env var
# 2. Built-in per-agent default (_AGENT_TIMEOUT_<agent>)
# 3. Global AGENT_TIMEOUT
# ---------------------------------------------------------------------------
get_agent_timeout() {
local agent="$1"
local safe_agent
safe_agent=$(printf '%s' "${agent}" | sed 's/[^A-Za-z0-9_]/_/g')
# Check for env var override: AGENT_TIMEOUT_<agent>
local env_var="AGENT_TIMEOUT_${safe_agent}"
eval "local env_val=\${${env_var}:-}"
if [ -n "${env_val}" ]; then
case "${env_val}" in ''|*[!0-9]*) ;; *) printf '%s' "${env_val}"; return ;; esac
fi
# Check for built-in per-agent default
local builtin_var="_AGENT_TIMEOUT_${safe_agent}"
eval "local builtin_val=\${${builtin_var}:-}"
if [ -n "${builtin_val}" ]; then
printf '%s' "${builtin_val}"
return
fi
# Fall back to global
printf '%s' "${AGENT_TIMEOUT}"
}
# ---------------------------------------------------------------------------
# require_common_env
#
# Validates tools and env vars common to ALL clouds (bun, jq, OPENROUTER_API_KEY).
# Cloud-specific validation is handled by cloud_validate_env().
# ---------------------------------------------------------------------------
require_common_env() {
local missing=0
if ! command -v jq >/dev/null 2>&1; then
log_err "jq not found. Install via: brew install jq / apt install jq"
missing=1
fi
if ! command -v bun >/dev/null 2>&1; then
log_err "bun not found. Install from https://bun.sh"
missing=1
fi
if [ -z "${OPENROUTER_API_KEY:-}" ]; then
log_err "OPENROUTER_API_KEY is not set"
missing=1
fi
if [ "${missing}" -eq 1 ]; then
return 1
fi
return 0
}
# ---------------------------------------------------------------------------
# require_env
#
# Validates common env + active cloud-specific env.
# ---------------------------------------------------------------------------
require_env() {
if ! require_common_env; then
return 1
fi
if ! cloud_validate_env; then
return 1
fi
log_ok "Environment validated"
return 0
}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
make_app_name() {
local agent="$1"
local ts
ts=$(date +%s)
# Include ACTIVE_CLOUD to avoid name collisions in multi-cloud parallel runs
if [ -n "${ACTIVE_CLOUD:-}" ]; then
printf "e2e-%s-%s-%s" "${ACTIVE_CLOUD}" "${agent}" "${ts}"
else
printf "e2e-%s-%s" "${agent}" "${ts}"
fi
}
format_duration() {
local seconds="$1"
local mins=$((seconds / 60))
local secs=$((seconds % 60))
printf "%dm %ds" "${mins}" "${secs}"
}
track_app() {
local app_name="$1"
if [ -z "${_TRACKED_APPS}" ]; then
_TRACKED_APPS="${app_name}"
else
_TRACKED_APPS="${_TRACKED_APPS} ${app_name}"
fi
}
untrack_app() {
local app_name="$1"
local new_list=""
for app in ${_TRACKED_APPS}; do
if [ "${app}" != "${app_name}" ]; then
if [ -z "${new_list}" ]; then
new_list="${app}"
else
new_list="${new_list} ${app}"
fi
fi
done
_TRACKED_APPS="${new_list}"
}