mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-10 04:09:40 +00:00
Replace fly/lib/common.sh (741 lines of bash) with a TypeScript implementation using Bun runtime. The fly/ provider was the most complex bash code in the project — recent fixes (#1597, #1599, #1600) highlight the pain of debugging HTTP calls, JSON parsing, and multi-step auth flows in shell. New TypeScript modules: - fly/lib/ui.ts — logging, prompts, validation (zero deps) - fly/lib/fly.ts — API client (fetch), auth chain, org listing, provisioning - fly/lib/oauth.ts — OpenRouter OAuth via Bun.serve(), key management - fly/lib/agents.ts — typed agent configs for all 6 agents - fly/main.ts — orchestrator entry point Agent .sh files become thin shims (~30 lines) that install bun if needed, download TS sources for curl|bash execution, and delegate to main.ts. Test coverage: - 44 TypeScript unit tests (bun test) for pure logic - 4 fly failure-mode tests (mock.sh) for error scenarios - All existing test suites pass (110 run.sh, 76 mock.sh) Co-authored-by: lab <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1036 lines
32 KiB
Bash
1036 lines
32 KiB
Bash
#!/bin/bash
|
|
# Fixture-based mock test suite for cloud provider agent scripts
|
|
#
|
|
# Uses recorded API responses from test/fixtures/{cloud}/ to test
|
|
# every agent script without making real API calls.
|
|
#
|
|
# Usage:
|
|
# bash test/mock.sh # Test all clouds with fixtures
|
|
# bash test/mock.sh hetzner # Test all agents on one cloud
|
|
# bash test/mock.sh hetzner claude # Test one agent on one cloud
|
|
|
|
set -eo pipefail
|
|
|
|
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
|
|
printf 'WARNING: bash %s detected. Some features may need bash 4+.\n' "${BASH_VERSION}" >&2
|
|
fi
|
|
|
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
FIXTURES_DIR="${REPO_ROOT}/test/fixtures"
|
|
TEST_DIR=$(mktemp -d)
|
|
MOCK_LOG="${TEST_DIR}/mock_calls.log"
|
|
|
|
# Colors (respect NO_COLOR standard: https://no-color.org/)
|
|
if [[ -n "${NO_COLOR:-}" ]]; then
|
|
RED='' GREEN='' YELLOW='' CYAN='' NC=''
|
|
else
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
fi
|
|
|
|
# Counters
|
|
PASSED=0
|
|
FAILED=0
|
|
SKIPPED=0
|
|
|
|
# Cleanup on exit
|
|
cleanup() {
|
|
rm -rf "${TEST_DIR}"
|
|
rm -f /tmp/spawn_* 2>/dev/null || true
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
# ============================================================
|
|
# Assertions (same pattern as test/run.sh)
|
|
# ============================================================
|
|
|
|
assert_exit_code() {
|
|
local actual="$1"
|
|
local expected="$2"
|
|
local msg="$3"
|
|
if [[ "${actual}" -eq "${expected}" ]]; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} ${msg} (got exit code ${actual})"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_log_contains() {
|
|
local pattern="$1"
|
|
local msg="$2"
|
|
if grep -qE "${pattern}" "${MOCK_LOG}" 2>/dev/null; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} ${msg}"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_api_called() {
|
|
local method="$1"
|
|
local endpoint_pattern="$2"
|
|
local msg="${3:-calls ${method} ${endpoint_pattern}}"
|
|
if grep -qE "curl ${method} .*${endpoint_pattern}" "${MOCK_LOG}" 2>/dev/null; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} ${msg}"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_env_injected() {
|
|
local var_name="$1"
|
|
local msg="${2:-injects ${var_name}}"
|
|
# Check mock log (ssh/scp commands may reference the var) and output log.
|
|
# Also check case-insensitively: OPENROUTER_API_KEY → "openrouter" appears
|
|
# in output like "Using OpenRouter API key from environment".
|
|
local first_word
|
|
first_word=$(printf '%s' "$var_name" | sed 's/_.*//' | tr '[:upper:]' '[:lower:]')
|
|
if grep -qE "${var_name}" "${MOCK_LOG}" 2>/dev/null || \
|
|
grep -qE "${var_name}" "${TEST_DIR}/output.log" 2>/dev/null || \
|
|
grep -qi "${first_word}" "${TEST_DIR}/output.log" 2>/dev/null || \
|
|
grep -qi "${first_word}" "${MOCK_LOG}" 2>/dev/null; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} ${msg}"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_file_created() {
|
|
local path_pattern="$1"
|
|
local msg="${2:-creates file matching ${path_pattern}}"
|
|
if grep -qE "(scp|upload|file).*${path_pattern}" "${MOCK_LOG}" 2>/dev/null; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} ${msg}"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_no_body_errors() {
|
|
local msg="${1:-no request body validation errors}"
|
|
if grep -qE "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null; then
|
|
local errors
|
|
errors=$(grep "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null)
|
|
printf '%b\n' " ${RED}✗${NC} ${msg}"
|
|
printf '%b\n' " ${RED} Errors:${NC}"
|
|
printf '%s\n' "$errors" | while IFS= read -r line; do
|
|
printf ' %s\n' "$line"
|
|
done
|
|
FAILED=$((FAILED + 1))
|
|
else
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg}"
|
|
PASSED=$((PASSED + 1))
|
|
fi
|
|
}
|
|
|
|
assert_server_cleaned_up() {
|
|
local state_file="$1"
|
|
local msg="${2:-server lifecycle tracked}"
|
|
if [[ ! -f "$state_file" ]]; then
|
|
printf '%b\n' " ${YELLOW}⚠${NC} ${msg} (no state file)"
|
|
return 0
|
|
fi
|
|
local created deleted
|
|
created=$(grep -c "^CREATED:" "$state_file" 2>/dev/null || true)
|
|
deleted=$(grep -c "^DELETED:" "$state_file" 2>/dev/null || true)
|
|
if [[ "$created" -gt 0 ]]; then
|
|
printf '%b\n' " ${GREEN}✓${NC} ${msg} (created=${created}, deleted=${deleted})"
|
|
PASSED=$((PASSED + 1))
|
|
if [[ "$deleted" -lt "$created" ]]; then
|
|
printf '%b\n' " ${YELLOW}⚠${NC} warning: ${created} created but only ${deleted} deleted (expected — user takes over)"
|
|
fi
|
|
else
|
|
printf '%b\n' " ${YELLOW}⚠${NC} ${msg} (no server creation tracked)"
|
|
fi
|
|
}
|
|
|
|
# ============================================================
|
|
# Mock setup
|
|
# ============================================================
|
|
|
|
setup_mock_curl() {
|
|
local SCRIPT_DIR
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
cp "${SCRIPT_DIR}/mock-curl-script.sh" "${TEST_DIR}/curl"
|
|
chmod +x "${TEST_DIR}/curl"
|
|
}
|
|
|
|
setup_mock_ssh() {
|
|
# Mock ssh — log and succeed
|
|
cat > "${TEST_DIR}/ssh" << 'MOCKSSH'
|
|
#!/bin/bash
|
|
echo "ssh $*" >> "${MOCK_LOG}"
|
|
exit 0
|
|
MOCKSSH
|
|
chmod +x "${TEST_DIR}/ssh"
|
|
|
|
# Mock scp — log and succeed
|
|
cat > "${TEST_DIR}/scp" << 'MOCKSCP'
|
|
#!/bin/bash
|
|
echo "scp $*" >> "${MOCK_LOG}"
|
|
exit 0
|
|
MOCKSCP
|
|
chmod +x "${TEST_DIR}/scp"
|
|
}
|
|
|
|
# Create a mock that logs its invocation and exits 0
|
|
# Usage: _create_logging_mock NAME [NAME...]
|
|
_create_logging_mock() {
|
|
local name
|
|
for name in "$@"; do
|
|
cat > "${TEST_DIR}/${name}" << MOCK
|
|
#!/bin/bash
|
|
echo "${name} \$*" >> "\${MOCK_LOG}"
|
|
exit 0
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/${name}"
|
|
done
|
|
}
|
|
|
|
# Create a mock that silently exits 0 (no logging)
|
|
# Usage: _create_silent_mock NAME [NAME...]
|
|
_create_silent_mock() {
|
|
local name
|
|
for name in "$@"; do
|
|
cat > "${TEST_DIR}/${name}" << 'MOCK'
|
|
#!/bin/bash
|
|
exit 0
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/${name}"
|
|
done
|
|
}
|
|
|
|
# Create the ssh-keygen mock script
|
|
_create_ssh_keygen_mock() {
|
|
cat > "${TEST_DIR}/ssh-keygen" << 'MOCK'
|
|
#!/bin/bash
|
|
echo "ssh-keygen $*" >> "${MOCK_LOG}"
|
|
# Check for -l flag (fingerprint listing)
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
-l*) echo "256 MD5:af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d test@test (ED25519)"; exit 0 ;;
|
|
esac
|
|
done
|
|
# Parse -f flag for key creation
|
|
KEY_PATH=""
|
|
prev=""
|
|
for arg in "$@"; do
|
|
if [ "$prev" = "-f" ]; then
|
|
KEY_PATH="$arg"
|
|
fi
|
|
prev="$arg"
|
|
done
|
|
if [ -n "$KEY_PATH" ]; then
|
|
mkdir -p "$(dirname "$KEY_PATH")"
|
|
touch "$KEY_PATH"
|
|
echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${KEY_PATH}.pub"
|
|
fi
|
|
exit 0
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/ssh-keygen"
|
|
}
|
|
|
|
setup_mock_agents() {
|
|
# Agent binaries
|
|
_create_logging_mock claude openclaw zeroclaw codex opencode kilocode q
|
|
|
|
# Tools used during agent install and file upload
|
|
_create_logging_mock pip pip3 npm npx node openssl shred cargo go git base64
|
|
|
|
# bun: pass `bun -e "..."` (JSON processing) through to the real binary;
|
|
# log all other invocations as no-ops.
|
|
# Fallback chain: real bun → node (with Bun.stdin polyfill) → exit 0
|
|
# CI (GitHub Actions ubuntu-latest) has node but not bun, so the node
|
|
# fallback is essential for _fly_json / _fly_list_orgs / list_servers.
|
|
cat > "${TEST_DIR}/bun" << 'MOCKBUN'
|
|
#!/bin/bash
|
|
echo "bun $*" >> "${MOCK_LOG}"
|
|
|
|
# Find the real bun binary (skip our mock directory)
|
|
_find_real_bun() {
|
|
local _self_dir
|
|
_self_dir="$(cd "$(dirname "$0")" && pwd)"
|
|
IFS=: read -ra _path_dirs <<< "$PATH"
|
|
for _d in "${_path_dirs[@]}"; do
|
|
if [[ "$_d" != "$_self_dir" && -x "$_d/bun" ]]; then
|
|
echo "$_d/bun"
|
|
return 0
|
|
fi
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# Delegate `bun run <file>` and `bun test <file>` to the real bun.
|
|
# fly/ agent shims use `bun run main.ts` — must pass through.
|
|
if [[ "$1" == "run" || "$1" == "test" ]]; then
|
|
_real_bun=$(_find_real_bun) || { echo "real bun not found" >&2; exit 1; }
|
|
exec "$_real_bun" "$@"
|
|
fi
|
|
|
|
if [[ "$1" == "-e" ]]; then
|
|
_code="$2"
|
|
shift 2 # remove -e and the code, leaving extra args (e.g. -- field default)
|
|
_real_bun=$(_find_real_bun)
|
|
if [[ -n "$_real_bun" ]]; then
|
|
exec "$_real_bun" -e "$_code" "$@"
|
|
fi
|
|
# No real bun found — try node with a Bun.stdin polyfill
|
|
_self_dir="$(cd "$(dirname "$0")" && pwd)"
|
|
IFS=: read -ra _path_dirs <<< "$PATH"
|
|
_real_node=""
|
|
for _d in "${_path_dirs[@]}"; do
|
|
if [[ "$_d" != "$_self_dir" && -x "$_d/node" ]]; then
|
|
_real_node="$_d/node"
|
|
break
|
|
fi
|
|
done
|
|
if [[ -n "$_real_node" ]]; then
|
|
# Polyfill Bun.stdin.text() for node: read all of stdin as a string.
|
|
# --input-type=module enables top-level await (used by fly/lib scripts).
|
|
_polyfill='globalThis.Bun={stdin:{text:()=>new Promise(r=>{let d="";process.stdin.setEncoding("utf8");process.stdin.on("data",c=>d+=c);process.stdin.on("end",()=>r(d))})}};'
|
|
# Strip TypeScript type annotations for node compatibility.
|
|
_js_code=$(printf '%s' "$_code" | sed -E 's/: (any\[\]|any|string|number|void)//g; s/ as any//g')
|
|
exec "$_real_node" --input-type=module -e "${_polyfill}${_js_code}" "$@"
|
|
fi
|
|
fi
|
|
exit 0
|
|
MOCKBUN
|
|
chmod +x "${TEST_DIR}/bun"
|
|
|
|
# Silent mocks (no logging needed)
|
|
_create_silent_mock clear sleep
|
|
|
|
# Mock timeout/gtimeout to just run the command (skip the timeout value)
|
|
cat > "${TEST_DIR}/timeout" << 'MOCK'
|
|
#!/bin/bash
|
|
shift
|
|
exec "$@"
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/timeout"
|
|
cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout"
|
|
|
|
# Mock python3 — delegate to real python3 for JSON parsing
|
|
cat > "${TEST_DIR}/python3" << 'MOCK'
|
|
#!/bin/bash
|
|
exec /usr/bin/python3 "$@"
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/python3"
|
|
|
|
# Mock 'ssh-keygen' — returns MD5 fingerprint matching fixture data
|
|
_create_ssh_keygen_mock
|
|
|
|
# Mock fly/flyctl CLI — handles ssh console, auth token, version
|
|
_create_fly_mock
|
|
}
|
|
|
|
_create_fly_mock() {
|
|
cat > "${TEST_DIR}/fly" << 'MOCK'
|
|
#!/bin/bash
|
|
echo "fly $*" >> "${MOCK_LOG}"
|
|
|
|
# Simulate fly CLI failures when MOCK_ERROR_SCENARIO is set
|
|
case "${MOCK_ERROR_SCENARIO:-}" in
|
|
ssh_tunnel_failure)
|
|
case "$1" in
|
|
ssh)
|
|
echo "Error: failed to connect to tunnel: context deadline exceeded" >&2
|
|
exit 1 ;;
|
|
machine)
|
|
case "${2:-}" in
|
|
exec)
|
|
echo "Error: machine not reachable" >&2
|
|
exit 1 ;;
|
|
esac ;;
|
|
esac ;;
|
|
ssh_timeout)
|
|
case "$1" in
|
|
ssh|machine)
|
|
# Never return "ok" — simulates SSH not becoming ready
|
|
exit 1 ;;
|
|
esac ;;
|
|
esac
|
|
|
|
case "$1" in
|
|
auth)
|
|
case "${2:-}" in
|
|
token) echo "test-token-fly" ;;
|
|
esac ;;
|
|
machine)
|
|
case "${2:-}" in
|
|
exec)
|
|
# fly machine exec MACHINE_ID --app APP -- bash -c CMD
|
|
all_args="$*"
|
|
if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
|
|
echo "ok"
|
|
fi
|
|
;;
|
|
list) echo "[]" ;;
|
|
esac ;;
|
|
ssh)
|
|
# fly ssh console -a APP -C "bash -c CMD" --quiet
|
|
# Extract the command and simulate its output
|
|
all_args="$*"
|
|
# Check for "echo ok" (may be escaped as echo\ ok by printf %q)
|
|
if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then
|
|
echo "ok"
|
|
fi
|
|
;;
|
|
version)
|
|
echo "fly v0.3.50" ;;
|
|
esac
|
|
exit 0
|
|
MOCK
|
|
chmod +x "${TEST_DIR}/fly"
|
|
cp "${TEST_DIR}/fly" "${TEST_DIR}/flyctl"
|
|
}
|
|
|
|
setup_fake_home() {
|
|
local fake_home="${TEST_DIR}/fakehome"
|
|
mkdir -p "${fake_home}/.ssh"
|
|
mkdir -p "${fake_home}/.config/spawn"
|
|
mkdir -p "${fake_home}/.claude"
|
|
mkdir -p "${fake_home}/.local/bin"
|
|
# Create dummy SSH key pair
|
|
echo "-----BEGIN OPENSSH PRIVATE KEY-----" > "${fake_home}/.ssh/id_ed25519"
|
|
echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${fake_home}/.ssh/id_ed25519.pub"
|
|
chmod 600 "${fake_home}/.ssh/id_ed25519"
|
|
echo "${fake_home}"
|
|
}
|
|
|
|
# ============================================================
|
|
# Cloud API helpers (for use by test infra tests)
|
|
# ============================================================
|
|
|
|
# Strip API base URL to get just the endpoint path.
|
|
# Used by test/test-infra-sync.test.ts to validate cloud coverage.
|
|
_strip_simple_base() {
|
|
local url="$1" pattern="$2"
|
|
echo "$url" | sed "s|${pattern}||"
|
|
}
|
|
|
|
_strip_pattern_base() {
|
|
local url="$1" sed_pattern="$2"
|
|
echo "$url" | sed "$sed_pattern"
|
|
}
|
|
|
|
|
|
_strip_api_base() {
|
|
local url="$1"
|
|
local endpoint="$url"
|
|
|
|
case "$url" in
|
|
https://api.hetzner.cloud/v1*)
|
|
endpoint="${url#https://api.hetzner.cloud/v1}" ;;
|
|
https://api.digitalocean.com/v2*)
|
|
endpoint="${url#https://api.digitalocean.com/v2}" ;;
|
|
https://api.machines.dev/v1*)
|
|
endpoint="${url#https://api.machines.dev/v1}" ;;
|
|
esac
|
|
|
|
echo "$endpoint" | sed 's|?.*||'
|
|
}
|
|
|
|
# Get required POST body fields for a cloud endpoint.
|
|
_get_required_fields() {
|
|
local cloud="$1"
|
|
local endpoint="$2"
|
|
|
|
case "${cloud}:${endpoint}" in
|
|
hetzner:/servers) echo "name server_type image location" ;;
|
|
digitalocean:/droplets) echo "name region size image" ;;
|
|
fly:*/machines) echo "name region config" ;;
|
|
esac
|
|
}
|
|
|
|
# Validate POST request body contains required fields for major clouds.
|
|
# Used during mock script execution to catch invalid API requests.
|
|
# Args: cloud method endpoint body
|
|
_validate_body() {
|
|
local cloud="$1"
|
|
local method="$2"
|
|
local endpoint="$3"
|
|
local body="$4"
|
|
|
|
[[ "$method" != "POST" ]] && return 0
|
|
[[ -z "$body" ]] && return 0
|
|
|
|
local required_fields
|
|
required_fields=$(_get_required_fields "$cloud" "$endpoint")
|
|
[[ -z "$required_fields" ]] && return 0
|
|
|
|
# Check if body is valid JSON
|
|
if ! printf '%s' "$body" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then
|
|
echo "BODY_ERROR:invalid_json:${endpoint}" >> "${MOCK_LOG}"
|
|
return 1
|
|
fi
|
|
|
|
# Check for required fields
|
|
for field in $required_fields; do
|
|
if ! printf '%s' "$body" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then
|
|
echo "BODY_ERROR:missing_field:${field}:${endpoint}" >> "${MOCK_LOG}"
|
|
fi
|
|
done
|
|
|
|
return 0
|
|
}
|
|
|
|
# ============================================================
|
|
# Cloud-specific env var setup
|
|
# ============================================================
|
|
|
|
setup_env_for_cloud() {
|
|
local cloud="$1"
|
|
|
|
# Universal env vars
|
|
export OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000"
|
|
export INSTANCE_STATUS_POLL_DELAY=0
|
|
|
|
# Cloud-specific env vars from fixture data
|
|
local env_file="${FIXTURES_DIR}/${cloud}/_env.sh"
|
|
if [[ -f "$env_file" ]]; then
|
|
# shellcheck disable=SC1090
|
|
source "$env_file"
|
|
fi
|
|
}
|
|
|
|
# ============================================================
|
|
# Discovery
|
|
# ============================================================
|
|
|
|
discover_clouds() {
|
|
for fixture_dir in "${FIXTURES_DIR}"/*/; do
|
|
local cloud
|
|
cloud=$(basename "$fixture_dir")
|
|
if [[ -f "${fixture_dir}/_metadata.json" ]]; then
|
|
echo "$cloud"
|
|
fi
|
|
done
|
|
}
|
|
|
|
discover_agents() {
|
|
local cloud="$1"
|
|
for script in "${REPO_ROOT}/${cloud}"/*.sh; do
|
|
[[ -f "$script" ]] || continue
|
|
local agent
|
|
agent=$(basename "$script" .sh)
|
|
echo "$agent"
|
|
done
|
|
}
|
|
|
|
# ============================================================
|
|
# Test runner helpers
|
|
# ============================================================
|
|
|
|
# Wait for a process to complete or timeout
|
|
# Args: pid timeout_seconds exit_code_var
|
|
_wait_with_timeout() {
|
|
local pid="$1"
|
|
local timeout="$2"
|
|
local exit_code_var="$3"
|
|
local i=0
|
|
|
|
while kill -0 "$pid" 2>/dev/null; do
|
|
if [[ "$i" -ge "$timeout" ]]; then
|
|
kill -9 "$pid" 2>/dev/null
|
|
wait "$pid" 2>/dev/null || true
|
|
eval "${exit_code_var}=124"
|
|
return
|
|
fi
|
|
sleep 1
|
|
i=$((i + 1))
|
|
done
|
|
wait "$pid" 2>/dev/null || eval "${exit_code_var}=$?"
|
|
}
|
|
|
|
# Run a script in a sandboxed environment with a 4-second timeout.
|
|
# Sets exit_code variable in the caller's scope.
|
|
# Args: script_path cloud state_file fake_home
|
|
run_script_with_timeout() {
|
|
local script_path="$1"
|
|
local cloud="$2"
|
|
local state_file="$3"
|
|
local fake_home="$4"
|
|
|
|
exit_code=0
|
|
|
|
MOCK_LOG="${MOCK_LOG}" \
|
|
MOCK_FIXTURE_DIR="${FIXTURES_DIR}/${cloud}" \
|
|
MOCK_CLOUD="${cloud}" \
|
|
MOCK_REPO_ROOT="${REPO_ROOT}" \
|
|
MOCK_VALIDATE_BODY="${MOCK_VALIDATE_BODY:-}" \
|
|
MOCK_TRACK_STATE="${MOCK_TRACK_STATE:-}" \
|
|
MOCK_STATE_FILE="${state_file}" \
|
|
MOCK_ERROR_SCENARIO="${MOCK_ERROR_SCENARIO:-}" \
|
|
PATH="${TEST_DIR}:${PATH}" \
|
|
HOME="${fake_home}" \
|
|
bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
|
|
local pid=$!
|
|
_wait_with_timeout "$pid" 4 "exit_code"
|
|
}
|
|
|
|
# Print last 20 lines of output on script failure.
|
|
# Args: exit_code
|
|
show_failure_output() {
|
|
local exit_code="$1"
|
|
if [[ "${exit_code}" -ne 0 ]]; then
|
|
printf '%b\n' " ${RED}--- output (last 20 lines) ---${NC}"
|
|
tail -20 "${TEST_DIR}/output.log" 2>/dev/null | while IFS= read -r line; do
|
|
printf ' %s\n' "$line"
|
|
done
|
|
printf '%b\n' " ${RED}--- end output ---${NC}"
|
|
fi
|
|
}
|
|
|
|
# Assert that the script failed when an error scenario was injected.
|
|
# Returns 0 (with result recorded) if an error scenario is active, 1 otherwise.
|
|
# Args: exit_code cloud agent
|
|
assert_error_scenario() {
|
|
local exit_code="$1"
|
|
local cloud="$2"
|
|
local agent="$3"
|
|
|
|
[[ -n "${MOCK_ERROR_SCENARIO:-}" ]] || return 1
|
|
|
|
if [[ "${exit_code}" -ne 0 ]]; then
|
|
printf '%b\n' " ${GREEN}✓${NC} fails on ${MOCK_ERROR_SCENARIO} (exit code ${exit_code})"
|
|
PASSED=$((PASSED + 1))
|
|
record_test_result "${cloud}" "${agent}" "pass"
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} should fail on ${MOCK_ERROR_SCENARIO} but exited 0"
|
|
FAILED=$((FAILED + 1))
|
|
record_test_result "${cloud}" "${agent}" "fail"
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# Assert that the expected cloud-specific API calls were made.
|
|
# Reads assertions from test/fixtures/{cloud}/_api_assertions.sh if present,
|
|
# otherwise falls back to a generic API call check.
|
|
# Args: cloud
|
|
assert_cloud_api_calls() {
|
|
local cloud="$1"
|
|
local assertions_file="${FIXTURES_DIR}/${cloud}/_api_assertions.sh"
|
|
if [[ -f "$assertions_file" ]]; then
|
|
# shellcheck disable=SC1090
|
|
source "$assertions_file"
|
|
else
|
|
assert_log_contains "curl (GET|POST) https://" "makes API calls"
|
|
fi
|
|
}
|
|
|
|
# Write pass/fail result to RESULTS_FILE if set.
|
|
# Args: cloud agent result [reason]
|
|
# Result format: cloud/agent:pass or cloud/agent:fail[:reason]
|
|
# Reasons: exit_code, missing_api_call, missing_env, no_fixture
|
|
record_test_result() {
|
|
local cloud="$1"
|
|
local agent="$2"
|
|
local result="$3"
|
|
local reason="${4:-}"
|
|
[[ -n "${RESULTS_FILE:-}" ]] || return 0
|
|
if [[ -n "$reason" ]]; then
|
|
printf '%s/%s:%s:%s\n' "${cloud}" "${agent}" "${result}" "${reason}" >> "${RESULTS_FILE}"
|
|
else
|
|
printf '%s/%s:%s\n' "${cloud}" "${agent}" "${result}" >> "${RESULTS_FILE}"
|
|
fi
|
|
}
|
|
|
|
# ============================================================
|
|
# Test runner
|
|
# ============================================================
|
|
|
|
# Run an assertion and store the number of new failures in _ASSERT_DELTA.
|
|
# Usage: _tracked_assert <assertion_command> [args...]
|
|
# The assertion runs in the current shell so PASSED/FAILED propagate.
|
|
_tracked_assert() {
|
|
local _before=$FAILED
|
|
"$@"
|
|
_ASSERT_DELTA=$(( FAILED - _before ))
|
|
}
|
|
|
|
# Determine the primary failure reason from tracked failure counts.
|
|
# Args: has_no_fixture exit_fails api_fails ssh_fails env_fails
|
|
# Prints the reason string to stdout.
|
|
_categorize_failure() {
|
|
local has_no_fixture="$1" exit_fails="$2" api_fails="$3" ssh_fails="$4" env_fails="$5"
|
|
if [[ "$has_no_fixture" -gt 0 ]]; then echo "no_fixture"
|
|
elif [[ "$exit_fails" -gt 0 ]]; then echo "exit_code"
|
|
elif [[ "$api_fails" -gt 0 ]]; then echo "missing_api_call"
|
|
elif [[ "$env_fails" -gt 0 ]]; then echo "missing_env"
|
|
elif [[ "$ssh_fails" -gt 0 ]]; then echo "missing_ssh"
|
|
else echo "unknown"
|
|
fi
|
|
}
|
|
|
|
# Run assertions for a script and track which categories failed.
|
|
# Outputs: _exit_failed, _api_failed, _ssh_failed, _env_failed (as 0/1)
|
|
_run_assertions_and_track() {
|
|
local exit_code="$1" cloud="$2"
|
|
local _ASSERT_DELTA=0
|
|
|
|
_tracked_assert assert_exit_code "${exit_code}" 0 "exits successfully"
|
|
_exit_failed=$_ASSERT_DELTA
|
|
|
|
_tracked_assert assert_cloud_api_calls "$cloud"
|
|
_api_failed=$_ASSERT_DELTA
|
|
|
|
_tracked_assert assert_log_contains "ssh " "uses SSH"
|
|
_ssh_failed=$_ASSERT_DELTA
|
|
|
|
_tracked_assert assert_env_injected "OPENROUTER_API_KEY"
|
|
_env_failed=$_ASSERT_DELTA
|
|
|
|
if [[ "${MOCK_VALIDATE_BODY:-}" == "1" ]]; then
|
|
assert_no_body_errors
|
|
fi
|
|
if [[ "${MOCK_TRACK_STATE:-}" == "1" ]]; then
|
|
assert_server_cleaned_up "$3"
|
|
fi
|
|
}
|
|
|
|
# Check for missing fixtures in the mock log.
|
|
_has_missing_fixture() {
|
|
grep -q "NO_FIXTURE:" "${MOCK_LOG}" 2>/dev/null && echo 1 || echo 0
|
|
}
|
|
|
|
# Setup test environment for a script
|
|
# Args: cloud state_file
|
|
_setup_test_env() {
|
|
local cloud="$1"
|
|
local state_file="$2"
|
|
: > "${MOCK_LOG}"
|
|
setup_env_for_cloud "$cloud"
|
|
: > "${state_file}"
|
|
}
|
|
|
|
# Record test result based on failure categories
|
|
# Args: cloud agent pre_failed
|
|
_record_categorized_result() {
|
|
local cloud="$1"
|
|
local agent="$2"
|
|
local pre_failed="$3"
|
|
|
|
local pre_fail=$((FAILED - pre_failed))
|
|
if [[ "$pre_fail" -gt 0 ]]; then
|
|
local _has_no_fixture
|
|
_has_no_fixture=$(_has_missing_fixture)
|
|
local _reason
|
|
_reason=$(_categorize_failure "$_has_no_fixture" "$_exit_failed" "$_api_failed" "$_ssh_failed" "$_env_failed")
|
|
record_test_result "${cloud}" "${agent}" "fail" "${_reason}"
|
|
else
|
|
record_test_result "${cloud}" "${agent}" "pass"
|
|
fi
|
|
}
|
|
|
|
# Run per-agent install assertions.
|
|
# Sources shared assertions file and optional per-cloud overrides.
|
|
_run_agent_assertions() {
|
|
local cloud="$1"
|
|
local agent="$2"
|
|
local shared_file="${FIXTURES_DIR}/_shared_agent_assertions.sh"
|
|
local cloud_file="${FIXTURES_DIR}/${cloud}/_agent_assertions.sh"
|
|
|
|
if [[ -f "$shared_file" ]]; then
|
|
# shellcheck disable=SC1090
|
|
source "$shared_file"
|
|
# Apply per-cloud overrides if they exist
|
|
if [[ -f "$cloud_file" ]]; then
|
|
# shellcheck disable=SC1090
|
|
source "$cloud_file"
|
|
fi
|
|
assert_agent_install "$cloud" "$agent"
|
|
fi
|
|
}
|
|
|
|
run_test() {
|
|
local cloud="$1"
|
|
local agent="$2"
|
|
local script_path="${REPO_ROOT}/${cloud}/${agent}.sh"
|
|
|
|
if [[ ! -f "$script_path" ]]; then
|
|
printf '%b\n' " ${YELLOW}skip${NC} ${cloud}/${agent}.sh — file not found"
|
|
SKIPPED=$((SKIPPED + 1))
|
|
return 0
|
|
fi
|
|
|
|
# fly/ scripts use TypeScript (bun) with native fetch() for API calls.
|
|
# Fixture-based mock tests (which intercept curl) don't apply.
|
|
# Fly coverage comes from: bun test (44 tests) + fly failure mode tests (4 tests).
|
|
if [[ "$cloud" == "fly" && -z "${MOCK_ERROR_SCENARIO:-}" ]]; then
|
|
printf '%b\n' " ${YELLOW}skip${NC} ${cloud}/${agent}.sh — TS provider (tested via bun test + failure modes)"
|
|
SKIPPED=$((SKIPPED + 1))
|
|
return 0
|
|
fi
|
|
|
|
printf '%b\n' " ${CYAN}test${NC} ${cloud}/${agent}.sh"
|
|
|
|
local _pre_failed="${FAILED}"
|
|
local fake_home
|
|
fake_home=$(setup_fake_home)
|
|
local state_file="${TEST_DIR}/state_${cloud}_${agent}.log"
|
|
|
|
_setup_test_env "$cloud" "$state_file"
|
|
|
|
local exit_code
|
|
run_script_with_timeout "${script_path}" "${cloud}" "${state_file}" "${fake_home}"
|
|
show_failure_output "${exit_code}"
|
|
|
|
# Error scenario mode: just check that script failed, then return
|
|
if assert_error_scenario "${exit_code}" "${cloud}" "${agent}"; then
|
|
printf '\n'
|
|
return 0
|
|
fi
|
|
|
|
# Normal mode: run standard assertions and track failures per category
|
|
_run_assertions_and_track "${exit_code}" "${cloud}" "${state_file}"
|
|
|
|
# Per-agent install assertions
|
|
_run_agent_assertions "$cloud" "$agent"
|
|
|
|
_record_categorized_result "${cloud}" "${agent}" "$_pre_failed"
|
|
|
|
printf '\n'
|
|
}
|
|
|
|
# ============================================================
|
|
# Fly.io failure-mode tests (#1579)
|
|
# ============================================================
|
|
|
|
# Run a single Fly.io agent script under a specific error scenario.
|
|
# Expects MOCK_ERROR_SCENARIO to trigger error injection in mock curl and/or fly CLI.
|
|
# Args: scenario agent
|
|
_run_fly_error_test() {
|
|
local scenario="$1"
|
|
local agent="$2"
|
|
local script_path="${REPO_ROOT}/fly/${agent}.sh"
|
|
|
|
[[ -f "$script_path" ]] || return 0
|
|
|
|
printf '%b\n' " ${CYAN}test${NC} fly/${agent}.sh [${scenario}]"
|
|
|
|
local fake_home
|
|
fake_home=$(setup_fake_home)
|
|
local state_file="${TEST_DIR}/state_fly_${agent}_${scenario}.log"
|
|
|
|
: > "${MOCK_LOG}"
|
|
setup_env_for_cloud "fly"
|
|
: > "${state_file}"
|
|
|
|
# Re-create fly mock so it picks up the error scenario
|
|
_create_fly_mock
|
|
|
|
local exit_code=0
|
|
MOCK_LOG="${MOCK_LOG}" \
|
|
MOCK_FIXTURE_DIR="${FIXTURES_DIR}/fly" \
|
|
MOCK_CLOUD="fly" \
|
|
MOCK_REPO_ROOT="${REPO_ROOT}" \
|
|
MOCK_ERROR_SCENARIO="${scenario}" \
|
|
MOCK_STATE_FILE="${state_file}" \
|
|
SPAWN_NON_INTERACTIVE=1 \
|
|
PATH="${TEST_DIR}:${PATH}" \
|
|
HOME="${fake_home}" \
|
|
bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 &
|
|
local pid=$!
|
|
_wait_with_timeout "$pid" 10 "exit_code"
|
|
|
|
if [[ "${exit_code}" -ne 0 ]]; then
|
|
printf '%b\n' " ${GREEN}✓${NC} fails on ${scenario} (exit code ${exit_code})"
|
|
PASSED=$((PASSED + 1))
|
|
else
|
|
printf '%b\n' " ${RED}✗${NC} should fail on ${scenario} but exited 0"
|
|
FAILED=$((FAILED + 1))
|
|
fi
|
|
printf '\n'
|
|
}
|
|
|
|
# Run all Fly.io failure-mode tests using a single representative agent.
|
|
# Uses claude.sh as the test subject since it exercises the full provisioning path.
|
|
run_fly_failure_tests() {
|
|
printf '%b\n' "${CYAN}━━━ fly failure modes (#1579) ━━━${NC}"
|
|
|
|
local test_agent="claude"
|
|
if [[ ! -f "${REPO_ROOT}/fly/${test_agent}.sh" ]]; then
|
|
printf '%b\n' " ${YELLOW}skip${NC} fly/${test_agent}.sh not found"
|
|
SKIPPED=$((SKIPPED + 1))
|
|
return 0
|
|
fi
|
|
|
|
# 1. API rate limit (429) — mock curl returns 429 for cloud API calls
|
|
_run_fly_error_test "rate_limit" "$test_agent"
|
|
|
|
# 2. Machine creation failure (422) — mock curl returns 422 for POST to */machines*
|
|
_run_fly_error_test "create_failure" "$test_agent"
|
|
|
|
# 3. SSH tunnel failure — fly ssh console / fly machine exec exit non-zero
|
|
_run_fly_error_test "ssh_tunnel_failure" "$test_agent"
|
|
|
|
# 4. SSH timeout — fly CLI never returns "ok", _fly_wait_for_ssh exhausts retries
|
|
_run_fly_error_test "ssh_timeout" "$test_agent"
|
|
}
|
|
|
|
# ============================================================
|
|
# Main
|
|
# ============================================================
|
|
|
|
printf '%b\n' "${CYAN}===============================${NC}"
|
|
printf '%b\n' "${CYAN} Spawn Mock Test Suite${NC}"
|
|
printf '%b\n' "${CYAN}===============================${NC}"
|
|
printf '\n'
|
|
|
|
# Parse arguments
|
|
FILTER_CLOUD="${1:-}"
|
|
FILTER_AGENT="${2:-}"
|
|
|
|
# Set up mocks once
|
|
setup_mock_curl
|
|
setup_mock_ssh
|
|
setup_mock_agents
|
|
|
|
# Discover what to test
|
|
if [[ -n "$FILTER_CLOUD" ]]; then
|
|
CLOUDS="$FILTER_CLOUD"
|
|
if [[ ! -d "${FIXTURES_DIR}/${FILTER_CLOUD}" ]]; then
|
|
printf '%b\n' "${RED}No fixtures for cloud: ${FILTER_CLOUD}${NC}"
|
|
printf "Available: %s\n" "$(discover_clouds | tr '\n' ' ')"
|
|
exit 1
|
|
fi
|
|
else
|
|
CLOUDS=$(discover_clouds)
|
|
fi
|
|
|
|
if [[ -z "$CLOUDS" ]]; then
|
|
printf '%b\n' "${YELLOW}No fixture data found in ${FIXTURES_DIR}/${NC}"
|
|
printf "Run test/record.sh first to record API fixtures.\n"
|
|
exit 0
|
|
fi
|
|
|
|
printf "Fixtures dir: %s\n" "${FIXTURES_DIR}"
|
|
printf "Clouds: %s\n" "$CLOUDS"
|
|
printf '\n'
|
|
|
|
# --- Run clouds in parallel ---
|
|
CLOUD_RESULTS_DIR="${TEST_DIR}/cloud_results"
|
|
mkdir -p "${CLOUD_RESULTS_DIR}"
|
|
|
|
CLOUD_PIDS=""
|
|
for cloud in $CLOUDS; do
|
|
(
|
|
# Isolated per-cloud state
|
|
CLOUD_TEST_DIR=$(mktemp -d)
|
|
MOCK_LOG="${CLOUD_TEST_DIR}/mock_calls.log"
|
|
CLOUD_PASSED=0
|
|
CLOUD_FAILED=0
|
|
CLOUD_SKIPPED=0
|
|
|
|
# Re-create mocks in per-cloud temp dir (curl/ssh/agents need own copies)
|
|
TEST_DIR="${CLOUD_TEST_DIR}"
|
|
setup_mock_curl
|
|
setup_mock_ssh
|
|
setup_mock_agents
|
|
|
|
# Override counters used by assertions (they modify PASSED/FAILED/SKIPPED)
|
|
PASSED=0
|
|
FAILED=0
|
|
SKIPPED=0
|
|
|
|
printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}"
|
|
|
|
if [[ -n "$FILTER_AGENT" ]]; then
|
|
AGENTS="$FILTER_AGENT"
|
|
else
|
|
AGENTS=$(discover_agents "$cloud")
|
|
fi
|
|
|
|
if [[ -z "$AGENTS" ]]; then
|
|
printf '%b\n' " ${YELLOW}skip${NC} no agent scripts found in ${cloud}/"
|
|
SKIPPED=$((SKIPPED + 1))
|
|
else
|
|
for agent in $AGENTS; do
|
|
run_test "$cloud" "$agent"
|
|
done
|
|
fi
|
|
printf '\n'
|
|
|
|
# Write counts to results file for aggregation
|
|
printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/${cloud}.counts"
|
|
|
|
rm -rf "${CLOUD_TEST_DIR}"
|
|
) > "${CLOUD_RESULTS_DIR}/${cloud}.log" 2>&1 &
|
|
CLOUD_PIDS="${CLOUD_PIDS} $!"
|
|
done
|
|
|
|
# Wait for all clouds to finish
|
|
for pid in $CLOUD_PIDS; do
|
|
wait "$pid" 2>/dev/null || true
|
|
done
|
|
|
|
# Print output from each cloud (in discovery order for consistent output)
|
|
for cloud in $CLOUDS; do
|
|
if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.log" ]]; then
|
|
cat "${CLOUD_RESULTS_DIR}/${cloud}.log"
|
|
fi
|
|
done
|
|
|
|
# Aggregate results from all clouds
|
|
for cloud in $CLOUDS; do
|
|
if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.counts" ]]; then
|
|
read -r p f s < "${CLOUD_RESULTS_DIR}/${cloud}.counts"
|
|
PASSED=$((PASSED + p))
|
|
FAILED=$((FAILED + f))
|
|
SKIPPED=$((SKIPPED + s))
|
|
fi
|
|
done
|
|
|
|
# --- Fly.io failure-mode tests (#1579) ---
|
|
# Run only when fly fixtures exist and no agent filter is active
|
|
if [[ -d "${FIXTURES_DIR}/fly" && ( -z "$FILTER_CLOUD" || "$FILTER_CLOUD" == "fly" ) && -z "$FILTER_AGENT" ]]; then
|
|
(
|
|
FLY_FAIL_TEST_DIR=$(mktemp -d)
|
|
TEST_DIR="${FLY_FAIL_TEST_DIR}"
|
|
MOCK_LOG="${FLY_FAIL_TEST_DIR}/mock_calls.log"
|
|
PASSED=0
|
|
FAILED=0
|
|
SKIPPED=0
|
|
|
|
setup_mock_curl
|
|
setup_mock_ssh
|
|
setup_mock_agents
|
|
|
|
run_fly_failure_tests
|
|
|
|
printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/fly_failures.counts"
|
|
rm -rf "${FLY_FAIL_TEST_DIR}"
|
|
) > "${CLOUD_RESULTS_DIR}/fly_failures.log" 2>&1
|
|
|
|
if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.log" ]]; then
|
|
cat "${CLOUD_RESULTS_DIR}/fly_failures.log"
|
|
fi
|
|
if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.counts" ]]; then
|
|
read -r p f s < "${CLOUD_RESULTS_DIR}/fly_failures.counts"
|
|
PASSED=$((PASSED + p))
|
|
FAILED=$((FAILED + f))
|
|
SKIPPED=$((SKIPPED + s))
|
|
fi
|
|
fi
|
|
|
|
# --- Summary ---
|
|
printf '%b\n' "${CYAN}===============================${NC}"
|
|
TOTAL=$((PASSED + FAILED + SKIPPED))
|
|
printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${TOTAL} total"
|
|
printf '%b\n' "${CYAN}===============================${NC}"
|
|
|
|
if [[ "$FAILED" -gt 0 ]]; then
|
|
exit 1
|
|
fi
|
|
exit 0
|