diff --git a/.claude/skills/setup-agent-team/qa-cycle.sh b/.claude/skills/setup-agent-team/qa-cycle.sh index 31807738..f5539544 100644 --- a/.claude/skills/setup-agent-team/qa-cycle.sh +++ b/.claude/skills/setup-agent-team/qa-cycle.sh @@ -875,10 +875,12 @@ if [[ -f "${RESULTS_PHASE4}" ]]; then RETRY_FAIL=$(grep -c ':fail$' "${RESULTS_PHASE4}" || true) log "Phase 4: ${RETRY_PASS} passed, ${RETRY_FAIL} failed" - python3 test/update-readme.py "${RESULTS_PHASE4}" 2>&1 | tee -a "${LOG_FILE}" + # TODO: Rewrite update-readme.py as TypeScript utility + # For now, skip README auto-update (removed test/update-readme.py - security theater cleanup) + log "Phase 4: Skipping README auto-update (needs TypeScript rewrite)" # Commit + push if README changed (using PR workflow to avoid race conditions) - if [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then + if false && [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then # Create feature branch for README update (timestamped to avoid collisions) README_BRANCH="qa/readme-update-$(date +%s)" git checkout -b "${README_BRANCH}" 2>&1 | tee -a "${LOG_FILE}" diff --git a/test/e2e.sh b/test/e2e.sh deleted file mode 100644 index 675b22a4..00000000 --- a/test/e2e.sh +++ /dev/null @@ -1,1268 +0,0 @@ -#!/bin/bash -set -eo pipefail - -# E2E Tests — Real server provisioning, agent install, and verification -# By default runs ONE agent per cloud (smoke test). Use --all for the full matrix. -# -# Usage: -# bash test/e2e.sh # One agent per cloud (smoke test) -# bash test/e2e.sh --all # All agents on all clouds (full matrix) -# bash test/e2e.sh fly # One agent on fly -# bash test/e2e.sh fly openclaw # Single combo -# bash test/e2e.sh fly --all # All agents on fly -# bash test/e2e.sh --cleanup # Destroy stale e2e-* servers -# bash test/e2e.sh --history # Show timing history -# bash test/e2e.sh --compare openclaw # Compare agent across clouds -# -# Environment: -# OPENROUTER_API_KEY — Required for all tests -# E2E_CANARY_AGENT — Agent to use for smoke tests (default: openclaw) -# E2E_AUTO_FIX — Set to "1" to spawn Claude agents for failures (default: 0) -# E2E_OPTIMIZE — Set to "1" to spawn Claude agents for slow-but-passing tests (default: 0) -# E2E_TIMEOUT — Per-combo timeout in seconds (default: 900) -# -# Each agent script runs with SPAWN_NON_INTERACTIVE=1 so safe_read() fails -# immediately instead of hanging on /dev/tty. Cloud-specific env vars -# (HETZNER_LOCATION, FLY_REGION, etc.) are auto-set to sane defaults. - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -E2E_TIMEOUT="${E2E_TIMEOUT:-900}" -E2E_AUTO_FIX="${E2E_AUTO_FIX:-0}" -E2E_OPTIMIZE="${E2E_OPTIMIZE:-0}" -E2E_ALL=0 -E2E_CANARY_AGENT="${E2E_CANARY_AGENT:-openclaw}" -E2E_RESULTS_DIR="" -E2E_SERVER_PREFIX="e2e" -E2E_PIDS="" -E2E_TIMINGS_FILE="${REPO_ROOT}/.docs/e2e-timings.json" -E2E_SLOW_THRESHOLD=180 # seconds — flag as slow even if passing - -# --- Logging --- - -_e2e_log() { - printf '[%s] [e2e] %s\n' "$(date +'%H:%M:%S')" "$*" -} - -_e2e_pass() { - printf ' \033[32m✓\033[0m %s\n' "$*" -} - -_e2e_fail() { - printf ' \033[31m✗\033[0m %s\n' "$*" -} - -# --- Cloud config lookup (bash 3.2 compatible — no associative arrays) --- - -# Get the env var name used for server/app name -_get_name_env_var() { - case "$1" in - fly) echo "FLY_APP_NAME" ;; - hetzner) echo "HETZNER_SERVER_NAME" ;; - digitalocean) echo "DO_DROPLET_NAME" ;; - aws) echo "LIGHTSAIL_SERVER_NAME" ;; - daytona) echo "DAYTONA_SANDBOX_NAME" ;; - gcp) echo "GCP_INSTANCE_NAME" ;; - - sprite) echo "SPRITE_NAME" ;; - *) echo "" ;; - esac -} - -# Get the env var name used for cloud token -_get_token_env_var() { - case "$1" in - fly) echo "FLY_API_TOKEN" ;; - hetzner) echo "HCLOUD_TOKEN" ;; - digitalocean) echo "DO_API_TOKEN" ;; - daytona) echo "DAYTONA_API_KEY" ;; - *) echo "" ;; - esac -} - -# --- Credential helpers --- - -# Try to load a token from the spawn config file into the env var. -# Returns 0 if token was loaded, 1 if not. -_load_token_from_config() { - local cloud="$1" - local token_var - token_var=$(_get_token_env_var "$cloud") - [[ -z "$token_var" ]] && return 1 - - # Already set — nothing to do - local current="${!token_var:-}" - [[ -n "$current" ]] && return 0 - - local config_file="${HOME}/.config/spawn/${cloud}.json" - [[ -f "$config_file" ]] || return 1 - - local saved - saved=$(python3 -c "import json, sys; data=json.load(open(sys.argv[1])); print(data.get('api_key','') or data.get('token',''))" "$config_file" 2>/dev/null) - if [[ -n "$saved" ]]; then - export "$token_var=$saved" - return 0 - fi - return 1 -} - -# Interactive credential collection — runs BEFORE non-interactive tests. -# For each token-based cloud, ensures the env var is set by: -# 1. Checking the env var -# 2. Loading from ~/.config/spawn/{cloud}.json -# 3. Prompting the user (Enter to skip) -_collect_credentials() { - local clouds="$1" - local collected="" - local skipped="" - - for cloud in $clouds; do - local token_var - token_var=$(_get_token_env_var "$cloud") - - # CLI-auth clouds (aws, gcp, sprite) — no token to collect - [[ -z "$token_var" ]] && continue - - # Already in env? - if [[ -n "${!token_var:-}" ]]; then - collected="${collected} ${cloud}" - continue - fi - - # Try config file - if _load_token_from_config "$cloud"; then - _e2e_log "Loaded ${token_var} from ~/.config/spawn/${cloud}.json" - collected="${collected} ${cloud}" - continue - fi - - # Fly: try CLI auth (fly auth token) - if [[ "$cloud" == "fly" ]] && _try_fly_cli_token; then - _e2e_log "Loaded FLY_API_TOKEN from fly CLI auth" - collected="${collected} ${cloud}" - continue - fi - - # No TTY? Can't prompt — skip - if ! echo -n "" > /dev/tty 2>/dev/null; then - skipped="${skipped} ${cloud}" - continue - fi - - # Interactive prompt - printf ' %s: paste %s (Enter to skip): ' "$cloud" "$token_var" - local token="" - read -r token /dev/null; then - fly_cmd="fly" - elif command -v flyctl &>/dev/null; then - fly_cmd="flyctl" - else - return 1 - fi - local token - token=$("$fly_cmd" auth token 2>/dev/null) || return 1 - if [[ -n "$token" ]]; then - export FLY_API_TOKEN="$token" - return 0 - fi - return 1 -} - -# --- Credential check --- - -# Check if a cloud has credentials available (non-interactive) -_cloud_has_credentials() { - local cloud="$1" - local token_var - token_var=$(_get_token_env_var "$cloud") - - # Clouds that use CLI auth rather than env var tokens - case "$cloud" in - aws) command -v aws &>/dev/null && aws sts get-caller-identity &>/dev/null 2>&1; return $? ;; - gcp) command -v gcloud &>/dev/null && gcloud auth print-access-token &>/dev/null 2>&1; return $? ;; - - sprite) command -v sprite &>/dev/null; return $? ;; - local) return 0 ;; - esac - - # Token-based clouds: check env var, then spawn config file, then CLI - if [[ -n "$token_var" ]]; then - local token_val="${!token_var:-}" - if [[ -n "$token_val" ]]; then - return 0 - fi - # Check spawn config file - local config_file="${HOME}/.config/spawn/${cloud}.json" - if [[ -f "$config_file" ]]; then - return 0 - fi - # Fly: also check CLI auth - if [[ "$cloud" == "fly" ]]; then - _try_fly_cli_token &>/dev/null && return 0 - fi - fi - return 1 -} - -# --- Cleanup --- - -_cleanup_e2e() { - local exit_code=$? - # Kill any remaining background test jobs - if [[ -n "${E2E_PIDS:-}" ]]; then - for pid in ${E2E_PIDS}; do - kill "$pid" 2>/dev/null || true - done - fi - # Clean up results dir - if [[ -n "${E2E_RESULTS_DIR:-}" ]] && [[ -d "${E2E_RESULTS_DIR}" ]]; then - rm -rf "${E2E_RESULTS_DIR}" - fi - exit "$exit_code" -} -trap _cleanup_e2e EXIT SIGTERM SIGINT - -# --- macOS-compatible timeout --- - -_run_with_timeout() { - local secs="$1"; shift - "$@" & - local pid=$! - local elapsed=0 - while kill -0 "$pid" 2>/dev/null; do - if [[ "$elapsed" -ge "$secs" ]]; then - kill "$pid" 2>/dev/null - sleep 1 - kill -9 "$pid" 2>/dev/null || true - wait "$pid" 2>/dev/null || true - return 124 - fi - sleep 1 - elapsed=$((elapsed + 1)) - done - wait "$pid" 2>/dev/null -} - -# --- Stale server cleanup --- - -_cleanup_stale_servers() { - _e2e_log "Skipping bash-based cleanup (clouds use TypeScript)" - return 0 -} - -# Destroy a specific e2e test server by name. -# Clouds that take a name directly are easy; others need a name→ID lookup. -_destroy_e2e_server() { - return 0 -} - -# --- Non-interactive env setup --- - -# Export all env vars needed to run agent scripts without any interactive prompts. -# Called by both preflight and per-combo tests. -_setup_noninteractive_env() { - local cloud="$1" - - export SPAWN_NON_INTERACTIVE=1 - export MODEL_ID="${MODEL_ID:-openrouter/auto}" - export SPAWN_SKIP_GITHUB_AUTH=1 - - case "$cloud" in - hetzner) - export HETZNER_LOCATION="${HETZNER_LOCATION:-fsn1}" - export HETZNER_SERVER_TYPE="${HETZNER_SERVER_TYPE:-cx23}" - ;; - fly) - export FLY_REGION="${FLY_REGION:-iad}" - export FLY_VM_SIZE="${FLY_VM_SIZE:-shared-cpu-1x}" - export FLY_VM_MEMORY="${FLY_VM_MEMORY:-1024}" - ;; - gcp) - export GCP_ZONE="${GCP_ZONE:-us-central1-a}" - export GCP_MACHINE_TYPE="${GCP_MACHINE_TYPE:-e2-micro}" - ;; - esac -} - -# --- Per-cloud preflight --- - -# Run cloud_authenticate() once per cloud BEFORE parallel agent tests. -# This installs CLIs, imports SSH keys, and validates tokens so that -# 15 parallel agent scripts don't race on the same shared resources. -_preflight_cloud() { - local cloud="$1" - local log_file="${E2E_RESULTS_DIR}/preflight_${cloud}.log" - local env_file="${E2E_RESULTS_DIR}/preflight_${cloud}.env" - - _e2e_log "Pre-flight: ${cloud}..." - - # Run cloud_authenticate in a subshell, then dump the validated token - # so the parent can export it for agent scripts. - local token_var - token_var=$(_get_token_env_var "$cloud") - - ( - _setup_noninteractive_env "$cloud" - - # Write token from env to env file for parent to pick up - if [[ -n "$token_var" ]] && [[ -n "${!token_var:-}" ]]; then - printf '%s' "${!token_var}" > "$env_file" - fi - ) > "$log_file" 2>&1 - - local rc=$? - if [[ $rc -ne 0 ]]; then - local last_err - last_err=$(grep -iE "error|fail|cannot|not found|invalid" "$log_file" 2>/dev/null | tail -1 || true) - _e2e_fail "pre-flight ${cloud}: ${last_err:-exit code $rc}" - return 1 - fi - - # Import validated token into parent so agent scripts skip re-validation - if [[ -n "$token_var" ]] && [[ -f "$env_file" ]] && [[ -s "$env_file" ]]; then - local token_val - token_val=$(cat "$env_file") - export "$token_var=$token_val" - rm -f "$env_file" - fi - - _e2e_pass "pre-flight ${cloud}" - return 0 -} - -# --- Per-combo test function --- - -run_e2e_test() { - local cloud="$1" agent="$2" - local server_name="${E2E_SERVER_PREFIX}-${agent}-$(date +%s)-$$" - local log_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.log" - local start_time - start_time=$(date +%s) - - _e2e_log " ▶ ${cloud}/${agent} starting..." - - # Set the cloud-specific server name env var so the script skips interactive prompt - local name_var - name_var=$(_get_name_env_var "$cloud") - if [[ -n "$name_var" ]]; then - export "$name_var"="$server_name" - fi - - _setup_noninteractive_env "$cloud" - - # Run the agent script with stdin from /dev/null (no interactive prompts) - local exit_code=0 - _run_with_timeout "$E2E_TIMEOUT" bash "${REPO_ROOT}/${cloud}/${agent}.sh" \ - < /dev/null > "$log_file" 2>&1 || exit_code=$? - - local elapsed=$(( $(date +%s) - start_time )) - - # Determine result - # The script will always "fail" at the interactive session step (no TTY), - # but "setup completed successfully" printed before that means everything - # up to session launch worked. - local result="fail" - local reason="" - - if [[ "$exit_code" -eq 124 ]]; then - reason="timeout (${E2E_TIMEOUT}s)" - elif grep -q "setup completed successfully" "$log_file" 2>/dev/null; then - result="pass" - reason="setup complete (session expected to fail without TTY)" - else - reason="exit code ${exit_code}" - # Try to extract last meaningful error - local last_error - last_error=$(grep -iE "error|fail|fatal|cannot|not found" "$log_file" 2>/dev/null | tail -3 || true) - if [[ -n "$last_error" ]]; then - reason="${reason}: $(printf '%s' "$last_error" | head -1)" - fi - fi - - # Write results - printf '%s\n' "$result" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - printf '%s\n' "$elapsed" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - printf '%s\n' "$reason" > "${E2E_RESULTS_DIR}/${cloud}_${agent}.reason" - - # Destroy the test server — don't leak cloud resources - _destroy_e2e_server "$cloud" "$server_name" - - # Progress output - if [[ "$result" == "pass" ]]; then - _e2e_pass "${cloud}/${agent} ${elapsed}s" - else - _e2e_fail "${cloud}/${agent} ${elapsed}s (${reason})" - fi -} - -# --- Auto-fix function --- - -_find_working_reference() { - local agent="$1" exclude_cloud="$2" - for cloud_dir in "${REPO_ROOT}"/*/; do - local cloud_name - cloud_name=$(basename "$cloud_dir") - [[ "$cloud_name" == "$exclude_cloud" ]] && continue - [[ -f "${cloud_dir}${agent}.sh" ]] || continue - printf '%s' "${cloud_dir}${agent}.sh" - return 0 - done - return 1 -} - -# Build the prompt for a single failing combo (used by per-cloud agent) -_build_failure_context() { - local cloud="$1" agent="$2" - local log_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.log" - local script="${REPO_ROOT}/${cloud}/${agent}.sh" - - printf '### %s/%s\n\n' "$cloud" "$agent" - - printf 'Last 50 lines of output:\n```\n' - if [[ -f "$log_file" ]]; then - tail -50 "$log_file" - else - printf '(no log file)\n' - fi - printf '```\n\n' - - printf 'Script (%s/%s.sh):\n```bash\n' "$cloud" "$agent" - if [[ -f "$script" ]]; then - cat "$script" - fi - printf '```\n\n' - - local ref_script="" - ref_script=$(_find_working_reference "$agent" "$cloud" 2>/dev/null) || true - if [[ -n "$ref_script" ]] && [[ -f "$ref_script" ]]; then - printf 'Reference (working on another cloud — %s):\n```bash\n' "$(basename "$(dirname "$ref_script")")" - cat "$ref_script" - printf '```\n\n' - fi -} - -# Spawn one Claude agent to fix a single failing combo -auto_fix_combo() { - local cloud="$1" agent="$2" - - if ! command -v claude &>/dev/null; then - _e2e_log "claude CLI not found — skipping auto-fix for ${cloud}/${agent}" - return 1 - fi - - local prompt - prompt=$(_build_failure_context "$cloud" "$agent") - - local cloud_lib="" - if [[ -f "${REPO_ROOT}/${cloud}/lib/common.sh" ]]; then - cloud_lib=$(cat "${REPO_ROOT}/${cloud}/lib/common.sh") - fi - - _e2e_log "Spawning Claude agent for ${cloud}/${agent}..." - - claude -p "You are fixing an E2E test failure for **${cloud}/${agent}**. - -## Cloud Library (${cloud}/lib/common.sh) -\`\`\`bash -${cloud_lib} -\`\`\` - -## Failure - -${prompt} - -## Instructions - -Fix the failing script: ${cloud}/${agent}.sh - -1. Read the error output to understand what went wrong -2. Compare with the reference script (working on another cloud) if available -3. Fix the issue — common problems: wrong install command, missing PATH, timeout in non-TTY -4. Run \`bash -n\` on every modified file - -Only modify files under ${cloud}/. Do not modify lib/common.sh or shared/." 2>&1 | tee -a "${E2E_RESULTS_DIR}/autofix_${cloud}_${agent}.log" || true -} - -# --- Timing history --- - -# Save a test result to the timings JSON file -# Usage: _save_timing cloud/agent elapsed status -_save_timing() { - local combo="$1" elapsed="$2" status="$3" - local today - today=$(date +%Y-%m-%d) - - mkdir -p "$(dirname "$E2E_TIMINGS_FILE")" - - python3 -c " -import json, sys, os - -combo = sys.argv[1] -elapsed = int(sys.argv[2]) -status = sys.argv[3] -today = sys.argv[4] -path = sys.argv[5] - -data = {} -if os.path.exists(path): - try: - with open(path) as f: - data = json.load(f) - except (json.JSONDecodeError, IOError): - data = {} - -if combo not in data: - data[combo] = {'runs': [], 'best': {}} - -entry = {'date': today, 'total': elapsed, 'status': status} -data[combo]['runs'].insert(0, entry) -# Keep last 10 runs -data[combo]['runs'] = data[combo]['runs'][:10] - -# Update best if this is a pass and faster -if status == 'pass': - best = data[combo].get('best', {}) - if not best.get('total') or elapsed < best['total']: - data[combo]['best'] = {'total': elapsed, 'date': today} - -with open(path, 'w') as f: - json.dump(data, f, indent=2) -" "$combo" "$elapsed" "$status" "$today" "$E2E_TIMINGS_FILE" 2>/dev/null || true -} - -# Show timing history from the JSON file -_show_history() { - if [[ ! -f "$E2E_TIMINGS_FILE" ]]; then - _e2e_log "No timing history found at ${E2E_TIMINGS_FILE}" - return 0 - fi - - python3 -c " -import json, sys - -path = sys.argv[1] -with open(path) as f: - data = json.load(f) - -if not data: - print('No timing data recorded yet.') - sys.exit(0) - -for combo in sorted(data.keys()): - info = data[combo] - best = info.get('best', {}) - best_total = best.get('total', '-') - best_date = best.get('date', '-') - runs = info.get('runs', []) - print(f'\\n━━━ {combo} ━━━') - print(f' Best: {best_total}s ({best_date})') - print(f' Recent runs:') - for r in runs[:5]: - status_icon = '✓' if r['status'] == 'pass' else '✗' - print(f' {status_icon} {r[\"date\"]} {r[\"total\"]}s ({r[\"status\"]})') -" "$E2E_TIMINGS_FILE" -} - -# Compare a single agent across all clouds -_show_compare() { - local agent="$1" - if [[ ! -f "$E2E_TIMINGS_FILE" ]]; then - _e2e_log "No timing history found at ${E2E_TIMINGS_FILE}" - return 0 - fi - - python3 -c " -import json, sys - -agent = sys.argv[1] -path = sys.argv[2] -with open(path) as f: - data = json.load(f) - -matches = {k: v for k, v in data.items() if k.endswith('/' + agent)} -if not matches: - print(f'No timing data for agent: {agent}') - sys.exit(0) - -print(f'\\n━━━ {agent} across clouds ━━━') -print(f'{\"CLOUD\":<15} {\"BEST\":<10} {\"LATEST\":<10} {\"STATUS\":<8}') -print('-' * 45) - -for combo in sorted(matches.keys()): - cloud = combo.split('/')[0] - info = matches[combo] - best = info.get('best', {}).get('total', '-') - runs = info.get('runs', []) - if runs: - latest = runs[0]['total'] - status = runs[0]['status'] - else: - latest = '-' - status = '-' - best_s = f'{best}s' if isinstance(best, int) else best - latest_s = f'{latest}s' if isinstance(latest, int) else latest - print(f'{cloud:<15} {best_s:<10} {latest_s:<10} {status:<8}') -" "$agent" "$E2E_TIMINGS_FILE" -} - -# Check if a passing combo is slow and needs optimization -# Returns 0 (true) if optimization is needed, 1 if not -# Prints the reason to stdout -_check_slow() { - local combo="$1" elapsed="$2" - - python3 -c " -import json, sys, os - -combo = sys.argv[1] -elapsed = int(sys.argv[2]) -threshold = int(sys.argv[3]) -path = sys.argv[4] -agent = combo.split('/')[1] -cloud = combo.split('/')[0] - -reasons = [] - -# Trigger 1: Absolute slow -if elapsed > threshold: - reasons.append(f'absolute_slow: {elapsed}s exceeds {threshold}s threshold') - -# Load history for regression + peer comparison -data = {} -if os.path.exists(path): - try: - with open(path) as f: - data = json.load(f) - except (json.JSONDecodeError, IOError): - pass - -# Trigger 2: Regression vs best -if combo in data: - best = data[combo].get('best', {}).get('total') - if best and elapsed > best * 1.5: - reasons.append(f'regression: {elapsed}s is >50%% slower than best {best}s') - -# Trigger 3: Slow vs peers (same agent on other clouds) -peer_times = [] -for key, val in data.items(): - if key.endswith('/' + agent) and key != combo: - peer_best = val.get('best', {}).get('total') - if peer_best: - peer_times.append((key.split('/')[0], peer_best)) - -if peer_times: - fastest_cloud, fastest_time = min(peer_times, key=lambda x: x[1]) - if elapsed > fastest_time * 2: - reasons.append(f'slow_vs_peers: {elapsed}s is >2x slower than {fastest_cloud} ({fastest_time}s)') - -if reasons: - print('|'.join(reasons)) - sys.exit(0) -else: - sys.exit(1) -" "$combo" "$elapsed" "$E2E_SLOW_THRESHOLD" "$E2E_TIMINGS_FILE" 2>/dev/null -} - -# Build context for optimization agent (peer timings, history) -_build_optimization_context() { - local combo="$1" elapsed="$2" - - python3 -c " -import json, sys, os - -combo = sys.argv[1] -elapsed = int(sys.argv[2]) -path = sys.argv[3] -agent = combo.split('/')[1] -cloud = combo.split('/')[0] - -data = {} -if os.path.exists(path): - try: - with open(path) as f: - data = json.load(f) - except (json.JSONDecodeError, IOError): - pass - -lines = [] - -# Best time -best = '-' -if combo in data: - b = data[combo].get('best', {}).get('total') - if b: - best = f'{b}s' -lines.append(f'- Total time: {elapsed}s (best ever: {best})') - -# Peer timings -lines.append(f'- Same agent on other clouds:') -for key in sorted(data.keys()): - if key.endswith('/' + agent) and key != combo: - peer_cloud = key.split('/')[0] - peer_best = data[key].get('best', {}).get('total', '?') - lines.append(f' - {peer_cloud}: {peer_best}s') - -# History -if combo in data: - runs = data[combo].get('runs', []) - if runs: - lines.append(f'- History:') - for r in runs[:5]: - lines.append(f' - {r[\"date\"]}: {r[\"total\"]}s ({r[\"status\"]})') - -print('\\n'.join(lines)) -" "$combo" "$elapsed" "$E2E_TIMINGS_FILE" 2>/dev/null || true -} - -# Build optimization context for a single slow combo (used by per-cloud agent) -_build_slow_context() { - local cloud="$1" agent="$2" elapsed="$3" reasons="$4" - local script="${REPO_ROOT}/${cloud}/${agent}.sh" - - printf '### %s/%s (%ss)\n\n' "$cloud" "$agent" "$elapsed" - - printf 'Why flagged:\n' - printf '%s\n' "$reasons" | while IFS= read -r r; do - printf '- %s\n' "$r" - done - printf '\n' - - local timing_context - timing_context=$(_build_optimization_context "${cloud}/${agent}" "$elapsed") - printf 'Timings:\n%s\n\n' "$timing_context" - - printf 'Script (%s/%s.sh):\n```bash\n' "$cloud" "$agent" - if [[ -f "$script" ]]; then - cat "$script" - fi - printf '```\n\n' - - local ref_script="" - ref_script=$(_find_working_reference "$agent" "$cloud" 2>/dev/null) || true - if [[ -n "$ref_script" ]] && [[ -f "$ref_script" ]]; then - printf 'Reference (fastest peer — %s):\n```bash\n' "$(basename "$(dirname "$ref_script")")" - cat "$ref_script" - printf '```\n\n' - fi -} - -# Spawn one Claude agent to optimize a single slow combo -optimize_slow_combo() { - local cloud="$1" agent="$2" elapsed="$3" reasons="$4" - - if ! command -v claude &>/dev/null; then - _e2e_log "claude CLI not found — skipping optimization for ${cloud}/${agent}" - return 1 - fi - - local prompt - prompt=$(_build_slow_context "$cloud" "$agent" "$elapsed" "$reasons") - - local cloud_lib="" - if [[ -f "${REPO_ROOT}/${cloud}/lib/common.sh" ]]; then - cloud_lib=$(cat "${REPO_ROOT}/${cloud}/lib/common.sh") - fi - - _e2e_log "Spawning Claude agent for ${cloud}/${agent} (${elapsed}s)..." - - claude -p "You are optimizing a slow E2E test for **${cloud}/${agent}**. -The script PASSES but is too slow. - -## Cloud Library (${cloud}/lib/common.sh) -\`\`\`bash -${cloud_lib} -\`\`\` - -## Slow Script - -${prompt} - -## Instructions - -Optimize the script: ${cloud}/${agent}.sh - -1. Compare timings with the fastest peer cloud for the same agent -2. Identify what makes it slow (heavy installer, compiling native deps, unnecessary steps) -3. Make it faster — use lighter install methods, skip unnecessary setup, parallelize where possible -4. Run \`bash -n\` on every modified file -5. Don't break anything — the script must still pass E2E - -Only modify files under ${cloud}/. Do not modify lib/common.sh or shared/." 2>&1 | tee -a "${E2E_RESULTS_DIR}/optimize_${cloud}_${agent}.log" || true -} - -# --- Main --- - -main() { - local filter_cloud="" filter_agent="" - - # Parse args: strip --all flag, assign positional cloud/agent - for arg in "$@"; do - case "$arg" in - --all) E2E_ALL=1 ;; - *) - if [[ -z "$filter_cloud" ]]; then - filter_cloud="$arg" - else - filter_agent="$arg" - fi - ;; - esac - done - - # Handle --cleanup - if [[ "$filter_cloud" == "--cleanup" ]]; then - _e2e_log "Running stale server cleanup..." - for cloud in fly hetzner digitalocean; do - if _cloud_has_credentials "$cloud"; then - _cleanup_stale_servers "$cloud" - fi - done - _e2e_log "Cleanup complete" - return 0 - fi - - # Handle --history - if [[ "$filter_cloud" == "--history" ]]; then - _show_history - return 0 - fi - - # Handle --compare AGENT - if [[ "$filter_cloud" == "--compare" ]]; then - if [[ -z "$filter_agent" ]]; then - _e2e_log "Usage: bash test/e2e.sh --compare AGENT_NAME" - return 1 - fi - _show_compare "$filter_agent" - return 0 - fi - - # Get OPENROUTER_API_KEY - if [[ -z "${OPENROUTER_API_KEY:-}" ]]; then - # Non-interactive: fail fast with a clear message - if ! echo -n "" > /dev/tty 2>/dev/null; then - _e2e_log "ERROR: OPENROUTER_API_KEY not set and no TTY available" - _e2e_log "Export it before running: export OPENROUTER_API_KEY=sk-or-v1-..." - return 1 - fi - - # Interactive: offer OAuth or paste - source "${REPO_ROOT}/shared/common.sh" 2>/dev/null || true - - _e2e_log "OPENROUTER_API_KEY not set — let's grab one" - echo "" - printf ' 1) Open browser (OAuth) — quickest, logs you in via openrouter.ai\n' - printf ' 2) Paste a key — get one from https://openrouter.ai/settings/keys\n' - printf ' 3) Quit\n' - echo "" - printf ' Pick [1/2/3]: ' - read -r _choice /dev/null || true - done - - # Run all combos in parallel (background subshells) - E2E_PIDS="" - for combo in $combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - ( - run_e2e_test "$cloud" "$agent" - ) & - E2E_PIDS="${E2E_PIDS} $!" - done - - # Wait for all to finish - _e2e_log "Waiting for ${combo_count} test(s) to complete (timeout: ${E2E_TIMEOUT}s each)..." - for pid in ${E2E_PIDS}; do - wait "$pid" 2>/dev/null || true - done - E2E_PIDS="" - - # Collect and report results - echo "" - _e2e_log "━━━ E2E Results ━━━" - echo "" - - local total_pass=0 - local total_fail=0 - local failed_combos="" - - for combo in $combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - local reason_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.reason" - - local result="fail" - local elapsed="?" - local reason="no result file" - - [[ -f "$result_file" ]] && result=$(cat "$result_file") - [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file") - [[ -f "$reason_file" ]] && reason=$(cat "$reason_file") - - if [[ "$result" == "pass" ]]; then - _e2e_pass "${cloud}/${agent} ${elapsed}s" - total_pass=$((total_pass + 1)) - else - _e2e_fail "${cloud}/${agent} ${elapsed}s (${reason})" - total_fail=$((total_fail + 1)) - failed_combos="${failed_combos} ${combo}" - fi - done - - echo "" - local summary="Total: ${total_pass} passed, ${total_fail} failed out of ${combo_count}" - if [[ -n "${preflight_skipped:-}" ]]; then - summary="${summary} (skipped:${preflight_skipped})" - fi - _e2e_log "$summary" - - # Save timings to history - for combo in $combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - local result="fail" - local elapsed="0" - [[ -f "$result_file" ]] && result=$(cat "$result_file") - [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file") - _save_timing "$combo" "$elapsed" "$result" - done - - # Optimization phase: check passing combos for slowness - local slow_combos="" - if [[ "$E2E_OPTIMIZE" == "1" ]]; then - for combo in $combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - local result="fail" - local elapsed="0" - [[ -f "$result_file" ]] && result=$(cat "$result_file") - [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file") - - if [[ "$result" == "pass" ]]; then - local slow_reasons="" - slow_reasons=$(_check_slow "$combo" "$elapsed") || true - if [[ -n "$slow_reasons" ]]; then - slow_combos="${slow_combos} ${combo}:${elapsed}:${slow_reasons}" - fi - fi - done - fi - - if [[ -n "${slow_combos}" ]]; then - echo "" - _e2e_log "━━━ Optimization Phase ━━━" - echo "" - - # Print all slow combos - for entry in $slow_combos; do - local combo="${entry%%:*}" - local rest="${entry#*:}" - local elapsed="${rest%%:*}" - local reasons="${rest#*:}" - printf ' \033[33m⚡\033[0m %s %ss (%s)\n' "$combo" "$elapsed" "$(printf '%s' "$reasons" | tr '|' ', ')" - done - echo "" - - # Spawn one Claude agent per slow combo, all in parallel - local opt_pids="" - for entry in $slow_combos; do - local combo="${entry%%:*}" - local rest="${entry#*:}" - local elapsed="${rest%%:*}" - local reasons - reasons=$(printf '%s' "${rest#*:}" | tr '|' '\n') - local cloud="${combo%%/*}" - local agent="${combo##*/}" - - ( - optimize_slow_combo "$cloud" "$agent" "$elapsed" "$reasons" - ) & - opt_pids="${opt_pids} $!" - done - - # Wait for all optimization agents - for pid in $opt_pids; do - wait "$pid" 2>/dev/null || true - done - - # Re-run optimized combos to verify - echo "" - _e2e_log "━━━ Re-running Optimized Combos ━━━" - echo "" - - for entry in $slow_combos; do - local combo="${entry%%:*}" - local old_elapsed="${entry#*:}" - old_elapsed="${old_elapsed%%:*}" - local cloud="${combo%%/*}" - local agent="${combo##*/}" - - run_e2e_test "$cloud" "$agent" || true - - local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - local result="fail" - local new_elapsed="?" - [[ -f "$result_file" ]] && result=$(cat "$result_file") - [[ -f "$timing_file" ]] && new_elapsed=$(cat "$timing_file") - - if [[ "$result" == "pass" ]]; then - _e2e_pass "${combo} ${new_elapsed}s (was ${old_elapsed}s)" - _save_timing "$combo" "$new_elapsed" "$result" - else - _e2e_fail "${combo} ${new_elapsed}s (optimization broke it — was ${old_elapsed}s)" - fi - done - fi - - # Auto-fix failures — one Claude agent per combo, all in parallel - if [[ "$total_fail" -gt 0 ]] && [[ "$E2E_AUTO_FIX" == "1" ]]; then - echo "" - _e2e_log "━━━ Auto-Fix Phase ━━━" - echo "" - - # Spawn one agent per failing combo in parallel - local fix_pids="" - for combo in $failed_combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - - ( - auto_fix_combo "$cloud" "$agent" - ) & - fix_pids="${fix_pids} $!" - done - - # Wait for all fix agents - for pid in $fix_pids; do - wait "$pid" 2>/dev/null || true - done - - # Re-run fixed combos - echo "" - _e2e_log "━━━ Re-running Fixed Combos ━━━" - echo "" - - local rerun_pass=0 - local rerun_fail=0 - - for combo in $failed_combos; do - local cloud="${combo%%/*}" - local agent="${combo##*/}" - - run_e2e_test "$cloud" "$agent" || true - - local result_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.result" - local timing_file="${E2E_RESULTS_DIR}/${cloud}_${agent}.timing" - local result="fail" - local elapsed="?" - - [[ -f "$result_file" ]] && result=$(cat "$result_file") - [[ -f "$timing_file" ]] && elapsed=$(cat "$timing_file") - - if [[ "$result" == "pass" ]]; then - _e2e_pass "${cloud}/${agent} ${elapsed}s (FIXED)" - rerun_pass=$((rerun_pass + 1)) - else - _e2e_fail "${cloud}/${agent} ${elapsed}s (still failing)" - rerun_fail=$((rerun_fail + 1)) - fi - done - - echo "" - _e2e_log "Auto-fix: ${rerun_pass} fixed, ${rerun_fail} still failing" - fi - - echo "" - _e2e_log "━━━ E2E Complete ━━━" - - # Exit with failure if any tests failed (and weren't fixed) - if [[ "$total_fail" -gt 0 ]]; then - if [[ "$E2E_AUTO_FIX" == "1" ]] && [[ "${rerun_fail:-0}" -eq 0 ]]; then - return 0 - fi - return 1 - fi - return 0 -} - -main "$@" diff --git a/test/mock-curl-script.sh b/test/mock-curl-script.sh deleted file mode 100644 index 64c96e37..00000000 --- a/test/mock-curl-script.sh +++ /dev/null @@ -1,222 +0,0 @@ -#!/bin/bash -# Mock curl — returns fixture data based on URL -# Env vars from parent: MOCK_LOG, MOCK_FIXTURE_DIR, MOCK_CLOUD - -# --- Helper functions --- - -_parse_args() { - METHOD="GET" - URL="" - BODY="" - HAS_WRITE_OUT=false - local prev_flag="" - - for arg in "$@"; do - case "$prev_flag" in - -X) METHOD="$arg"; prev_flag=""; continue ;; - -w) - case "$arg" in - *http_code*) HAS_WRITE_OUT=true ;; - esac - prev_flag=""; continue - ;; - -d) BODY="$arg"; prev_flag=""; continue ;; - -H|-o|-u|-K|--connect-timeout|--max-time|--retry|--retry-delay) prev_flag=""; continue ;; - esac - case "$arg" in - -X|-w|-d|-H|-o|-u|-K|--connect-timeout|--max-time|--retry|--retry-delay) prev_flag="$arg"; continue ;; - -s|-f|-S|-L|-k|-#|-fsSL|-fsS|-sS) continue ;; - http://*|https://*) URL="$arg" ;; - esac - done -} - -_maybe_inject_error() { - [ -n "${MOCK_ERROR_SCENARIO:-}" ] || return 1 - case "$URL" in - *openrouter.ai*|*raw.githubusercontent.com*|*claude.ai/install*|*bun.sh*|*nodesource*|*nodejs.org*|*openclaw.ai*|*opencode*|*zeroclaw*|*pip.pypa.io*|*get.docker.com*|*npmjs.org*|*github.com/*/releases*) - return 1 ;; - esac - case "${MOCK_ERROR_SCENARIO}" in - auth_failure) - printf '{"error":"Unauthorized"}' - if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n401'; fi - exit 1 ;; - rate_limit) - printf '{"error":"Rate limit exceeded"}' - if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n429'; fi - exit 1 ;; - server_error) - printf '{"error":"Internal server error"}' - if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n500'; fi - exit 1 ;; - create_failure) - if [ "$METHOD" = "POST" ]; then - case "$URL" in - *servers*|*droplets*|*instances*|*machines*) - printf '{"error":"Unprocessable entity"}' - if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n422'; fi - exit 1 ;; - esac - fi ;; - esac - return 1 -} - -_handle_special_urls() { - case "$URL" in - *claude.ai/install*|*bun.sh*|*nodesource*|*nodejs.org*|*openclaw.ai*|*opencode*install*|*zeroclaw*install*|\ - *pip.pypa.io*|*get.docker.com*|*install.python-poetry.org*|\ - *npmjs.org*|*deb.nodesource.com*|*github.com/*/releases*|*cli.github.com*) - printf '#!/bin/bash\nexit 0\n' - exit 0 ;; - *raw.githubusercontent.com/OpenRouterTeam/spawn/*) - local_path="${MOCK_REPO_ROOT}/${URL##*spawn/main/}" - if [ -f "$local_path" ]; then cat "$local_path"; fi - exit 0 ;; - *openrouter.ai*) - printf '{"key":"sk-or-v1-mock"}\n' - if [ "$HAS_WRITE_OUT" = "true" ]; then printf '\n200'; fi - exit 0 ;; - esac -} - -_strip_api_base() { - ENDPOINT="$URL" - case "$URL" in - https://api.hetzner.cloud/v1*) ENDPOINT="${URL#https://api.hetzner.cloud/v1}" ;; - https://api.digitalocean.com/v2*) ENDPOINT="${URL#https://api.digitalocean.com/v2}" ;; - https://api.machines.dev/v1*) ENDPOINT="${URL#https://api.machines.dev/v1}" ;; - esac - EP_CLEAN=$(echo "$ENDPOINT" | sed 's|?.*||') -} - -_check_fields() { - local fields="$1" - for field in $fields; do - if ! printf '%s' "$BODY" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then - echo "BODY_ERROR:missing_field:${field}:${URL}" >> "${MOCK_LOG}" - fi - done -} - -_validate_body() { - [ "${MOCK_VALIDATE_BODY:-}" = "1" ] && [ -n "$BODY" ] && [ "$METHOD" = "POST" ] || return 0 - if ! printf '%s' "$BODY" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then - echo "BODY_ERROR:invalid_json:${URL}" >> "${MOCK_LOG}" - return 0 - fi - case "${MOCK_CLOUD}" in - hetzner) case "$EP_CLEAN" in /servers) _check_fields "name server_type image location" ;; esac ;; - digitalocean) case "$EP_CLEAN" in /droplets) _check_fields "name region size image" ;; esac ;; - fly) case "$EP_CLEAN" in */machines) _check_fields "name region config" ;; esac ;; - esac -} - -_try_fixture() { - local f="${MOCK_FIXTURE_DIR}/$1.json" - if [ -f "$f" ]; then cat "$f"; return 0; fi - return 1 -} - -_synthetic_active_response() { - case "$MOCK_CLOUD" in - digitalocean) printf '{"droplet":{"id":12345678,"name":"test-srv","status":"active","networks":{"v4":[{"ip_address":"10.0.0.1","type":"public"}]}}}' ;; - hetzner) printf '{"server":{"id":99999,"name":"test-srv","status":"running","public_net":{"ipv4":{"ip":"10.0.0.1"}}}}' ;; - fly) printf '{"id":"d890e84b0d3089","name":"test-app","state":"started","region":"iad","private_ip":"fdaa:0:0:0:a7b:0:0:2"}' ;; - *) printf '{}' ;; - esac -} - -_respond_get() { - local FIXTURE_NAME - FIXTURE_NAME=$(echo "$EP_CLEAN" | sed 's|^/||; s|/|_|g') - - local LAST_SEG HAS_ID_SUFFIX=false - LAST_SEG=$(echo "$EP_CLEAN" | sed 's|.*/||') - case "$LAST_SEG" in *[0-9]*) HAS_ID_SUFFIX=true ;; esac - - if _try_fixture "$FIXTURE_NAME"; then - : - elif [ "$HAS_ID_SUFFIX" = "false" ]; then - local FIXTURE_NAME_BASE - FIXTURE_NAME_BASE=$(echo "$FIXTURE_NAME" | sed 's|_[0-9a-f-]*$||') - if ! _try_fixture "$FIXTURE_NAME_BASE"; then - echo "NO_FIXTURE:GET:${EP_CLEAN}:${FIXTURE_NAME}" >> "${MOCK_LOG}" - printf '{}' - fi - else - # ID-suffixed GET (e.g., /servers/12345) — use synthetic for status polling - _synthetic_active_response - fi -} - -_respond_post() { - case "$EP_CLEAN" in - /ssh_keys|/ssh-keys|/account/keys|/profile/sshkeys|/sshkeys|*/sshkey) - printf '{"ssh_key":{"id":99999,"name":"test-key","fingerprint":"af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d"}}' - ;; - /apps) - printf '{"id":"test-app","name":"test-app","status":"deployed","organization":{"slug":"personal"}}' - ;; - *) - if _try_fixture "create_server"; then - : - else - echo "NO_FIXTURE:POST:${EP_CLEAN}:create_server" >> "${MOCK_LOG}" - case "$MOCK_CLOUD" in - hetzner) printf '{"server":{"id":99999,"name":"test-srv","public_net":{"ipv4":{"ip":"10.0.0.1"}}},"action":{"id":1,"status":"running"}}' ;; - digitalocean) printf '{"droplet":{"id":12345678,"name":"test-srv","status":"new","networks":{"v4":[{"ip_address":"10.0.0.1","type":"public"}]}}}' ;; - *) printf '{"id":"test-id","status":"active","ip":"10.0.0.1"}' ;; - esac - fi - ;; - esac -} - -_track_state() { - [ "${MOCK_TRACK_STATE:-}" = "1" ] && [ -n "${MOCK_STATE_FILE:-}" ] || return 0 - local TS - TS=$(date +%s) - case "$METHOD" in - POST) - case "$EP_CLEAN" in - /servers|/droplets|/instances|/instance-operations/launch|*/machines) - echo "CREATED:${MOCK_CLOUD}:${TS}" >> "${MOCK_STATE_FILE}" ;; - esac ;; - DELETE) - echo "DELETED:${MOCK_CLOUD}:${TS}" >> "${MOCK_STATE_FILE}" ;; - esac -} - -# --- Main logic --- - -_parse_args "$@" - -echo "curl ${METHOD} ${URL}" >> "${MOCK_LOG}" -if [ -n "$BODY" ]; then - echo "BODY:${BODY}" >> "${MOCK_LOG}" -fi - -_maybe_inject_error -_handle_special_urls - -if [ -z "$URL" ]; then exit 0; fi - -_strip_api_base -_validate_body - -case "$METHOD" in - GET) _respond_get ;; - POST) _respond_post ;; - DELETE) _try_fixture "delete_server" || printf '{}' ;; - *) printf '{}' ;; -esac - -_track_state - -if [ "$HAS_WRITE_OUT" = "true" ]; then - printf '\n200' -fi - -exit 0 diff --git a/test/mock.sh b/test/mock.sh deleted file mode 100644 index 0977d48c..00000000 --- a/test/mock.sh +++ /dev/null @@ -1,1036 +0,0 @@ -#!/bin/bash -# Fixture-based mock test suite for cloud provider agent scripts -# -# Uses recorded API responses from test/fixtures/{cloud}/ to test -# every agent script without making real API calls. -# -# Usage: -# bash test/mock.sh # Test all clouds with fixtures -# bash test/mock.sh hetzner # Test all agents on one cloud -# bash test/mock.sh hetzner claude # Test one agent on one cloud - -set -eo pipefail - -if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then - printf 'WARNING: bash %s detected. Some features may need bash 4+.\n' "${BASH_VERSION}" >&2 -fi - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -FIXTURES_DIR="${REPO_ROOT}/test/fixtures" -TEST_DIR=$(mktemp -d) -MOCK_LOG="${TEST_DIR}/mock_calls.log" - -# Colors (respect NO_COLOR standard: https://no-color.org/) -if [[ -n "${NO_COLOR:-}" ]]; then - RED='' GREEN='' YELLOW='' CYAN='' NC='' -else - RED='\033[0;31m' - GREEN='\033[0;32m' - YELLOW='\033[1;33m' - CYAN='\033[0;36m' - NC='\033[0m' -fi - -# Counters -PASSED=0 -FAILED=0 -SKIPPED=0 - -# Cleanup on exit -cleanup() { - rm -rf "${TEST_DIR}" - rm -f /tmp/spawn_* 2>/dev/null || true -} -trap cleanup EXIT - -# ============================================================ -# Assertions (same pattern as test/run.sh) -# ============================================================ - -assert_exit_code() { - local actual="$1" - local expected="$2" - local msg="$3" - if [[ "${actual}" -eq "${expected}" ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg} (got exit code ${actual})" - FAILED=$((FAILED + 1)) - fi -} - -assert_log_contains() { - local pattern="$1" - local msg="$2" - if grep -qE "${pattern}" "${MOCK_LOG}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -assert_api_called() { - local method="$1" - local endpoint_pattern="$2" - local msg="${3:-calls ${method} ${endpoint_pattern}}" - if grep -qE "curl ${method} .*${endpoint_pattern}" "${MOCK_LOG}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -assert_env_injected() { - local var_name="$1" - local msg="${2:-injects ${var_name}}" - # Check mock log (ssh/scp commands may reference the var) and output log. - # Also check case-insensitively: OPENROUTER_API_KEY → "openrouter" appears - # in output like "Using OpenRouter API key from environment". - local first_word - first_word=$(printf '%s' "$var_name" | sed 's/_.*//' | tr '[:upper:]' '[:lower:]') - if grep -qE "${var_name}" "${MOCK_LOG}" 2>/dev/null || \ - grep -qE "${var_name}" "${TEST_DIR}/output.log" 2>/dev/null || \ - grep -qi "${first_word}" "${TEST_DIR}/output.log" 2>/dev/null || \ - grep -qi "${first_word}" "${MOCK_LOG}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -assert_file_created() { - local path_pattern="$1" - local msg="${2:-creates file matching ${path_pattern}}" - if grep -qE "(scp|upload|file).*${path_pattern}" "${MOCK_LOG}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -assert_no_body_errors() { - local msg="${1:-no request body validation errors}" - if grep -qE "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null; then - local errors - errors=$(grep "BODY_ERROR:" "${MOCK_LOG}" 2>/dev/null) - printf '%b\n' " ${RED}✗${NC} ${msg}" - printf '%b\n' " ${RED} Errors:${NC}" - printf '%s\n' "$errors" | while IFS= read -r line; do - printf ' %s\n' "$line" - done - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - fi -} - -assert_server_cleaned_up() { - local state_file="$1" - local msg="${2:-server lifecycle tracked}" - if [[ ! -f "$state_file" ]]; then - printf '%b\n' " ${YELLOW}⚠${NC} ${msg} (no state file)" - return 0 - fi - local created deleted - created=$(grep -c "^CREATED:" "$state_file" 2>/dev/null || true) - deleted=$(grep -c "^DELETED:" "$state_file" 2>/dev/null || true) - if [[ "$created" -gt 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg} (created=${created}, deleted=${deleted})" - PASSED=$((PASSED + 1)) - if [[ "$deleted" -lt "$created" ]]; then - printf '%b\n' " ${YELLOW}⚠${NC} warning: ${created} created but only ${deleted} deleted (expected — user takes over)" - fi - else - printf '%b\n' " ${YELLOW}⚠${NC} ${msg} (no server creation tracked)" - fi -} - -# ============================================================ -# Mock setup -# ============================================================ - -setup_mock_curl() { - local SCRIPT_DIR - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - cp "${SCRIPT_DIR}/mock-curl-script.sh" "${TEST_DIR}/curl" - chmod +x "${TEST_DIR}/curl" -} - -setup_mock_ssh() { - # Mock ssh — log and succeed - cat > "${TEST_DIR}/ssh" << 'MOCKSSH' -#!/bin/bash -echo "ssh $*" >> "${MOCK_LOG}" -exit 0 -MOCKSSH - chmod +x "${TEST_DIR}/ssh" - - # Mock scp — log and succeed - cat > "${TEST_DIR}/scp" << 'MOCKSCP' -#!/bin/bash -echo "scp $*" >> "${MOCK_LOG}" -exit 0 -MOCKSCP - chmod +x "${TEST_DIR}/scp" -} - -# Create a mock that logs its invocation and exits 0 -# Usage: _create_logging_mock NAME [NAME...] -_create_logging_mock() { - local name - for name in "$@"; do - cat > "${TEST_DIR}/${name}" << MOCK -#!/bin/bash -echo "${name} \$*" >> "\${MOCK_LOG}" -exit 0 -MOCK - chmod +x "${TEST_DIR}/${name}" - done -} - -# Create a mock that silently exits 0 (no logging) -# Usage: _create_silent_mock NAME [NAME...] -_create_silent_mock() { - local name - for name in "$@"; do - cat > "${TEST_DIR}/${name}" << 'MOCK' -#!/bin/bash -exit 0 -MOCK - chmod +x "${TEST_DIR}/${name}" - done -} - -# Create the ssh-keygen mock script -_create_ssh_keygen_mock() { - cat > "${TEST_DIR}/ssh-keygen" << 'MOCK' -#!/bin/bash -echo "ssh-keygen $*" >> "${MOCK_LOG}" -# Check for -l flag (fingerprint listing) -for arg in "$@"; do - case "$arg" in - -l*) echo "256 MD5:af:0d:c5:57:a8:fd:b2:82:5e:d4:c1:65:f0:0c:8a:9d test@test (ED25519)"; exit 0 ;; - esac -done -# Parse -f flag for key creation -KEY_PATH="" -prev="" -for arg in "$@"; do - if [ "$prev" = "-f" ]; then - KEY_PATH="$arg" - fi - prev="$arg" -done -if [ -n "$KEY_PATH" ]; then - mkdir -p "$(dirname "$KEY_PATH")" - touch "$KEY_PATH" - echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${KEY_PATH}.pub" -fi -exit 0 -MOCK - chmod +x "${TEST_DIR}/ssh-keygen" -} - -setup_mock_agents() { - # Agent binaries - _create_logging_mock claude openclaw zeroclaw codex opencode kilocode q - - # Tools used during agent install and file upload - _create_logging_mock pip pip3 npm npx node openssl shred cargo go git base64 - - # bun: pass `bun -e "..."` (JSON processing) through to the real binary; - # log all other invocations as no-ops. - # Fallback chain: real bun → node (with Bun.stdin polyfill) → exit 0 - # CI (GitHub Actions ubuntu-latest) has node but not bun, so the node - # fallback is essential for _fly_json / _fly_list_orgs / list_servers. - cat > "${TEST_DIR}/bun" << 'MOCKBUN' -#!/bin/bash -echo "bun $*" >> "${MOCK_LOG}" - -# Find the real bun binary (skip our mock directory) -_find_real_bun() { - local _self_dir - _self_dir="$(cd "$(dirname "$0")" && pwd)" - IFS=: read -ra _path_dirs <<< "$PATH" - for _d in "${_path_dirs[@]}"; do - if [[ "$_d" != "$_self_dir" && -x "$_d/bun" ]]; then - echo "$_d/bun" - return 0 - fi - done - return 1 -} - -# Delegate `bun run ` and `bun test ` to the real bun. -# fly/ agent shims use `bun run main.ts` — must pass through. -if [[ "$1" == "run" || "$1" == "test" ]]; then - _real_bun=$(_find_real_bun) || { echo "real bun not found" >&2; exit 1; } - exec "$_real_bun" "$@" -fi - -if [[ "$1" == "-e" ]]; then - _code="$2" - shift 2 # remove -e and the code, leaving extra args (e.g. -- field default) - _real_bun=$(_find_real_bun) - if [[ -n "$_real_bun" ]]; then - exec "$_real_bun" -e "$_code" "$@" - fi - # No real bun found — try node with a Bun.stdin polyfill - _self_dir="$(cd "$(dirname "$0")" && pwd)" - IFS=: read -ra _path_dirs <<< "$PATH" - _real_node="" - for _d in "${_path_dirs[@]}"; do - if [[ "$_d" != "$_self_dir" && -x "$_d/node" ]]; then - _real_node="$_d/node" - break - fi - done - if [[ -n "$_real_node" ]]; then - # Polyfill Bun.stdin.text() for node: read all of stdin as a string. - # --input-type=module enables top-level await (used by fly/lib scripts). - _polyfill='globalThis.Bun={stdin:{text:()=>new Promise(r=>{let d="";process.stdin.setEncoding("utf8");process.stdin.on("data",c=>d+=c);process.stdin.on("end",()=>r(d))})}};' - # Strip TypeScript type annotations for node compatibility. - _js_code=$(printf '%s' "$_code" | sed -E 's/: (any\[\]|any|string|number|void)//g; s/ as any//g') - exec "$_real_node" --input-type=module -e "${_polyfill}${_js_code}" "$@" - fi -fi -exit 0 -MOCKBUN - chmod +x "${TEST_DIR}/bun" - - # Silent mocks (no logging needed) - _create_silent_mock clear sleep - - # Mock timeout/gtimeout to just run the command (skip the timeout value) - cat > "${TEST_DIR}/timeout" << 'MOCK' -#!/bin/bash -shift -exec "$@" -MOCK - chmod +x "${TEST_DIR}/timeout" - cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout" - - # Mock python3 — delegate to real python3 for JSON parsing - cat > "${TEST_DIR}/python3" << 'MOCK' -#!/bin/bash -exec /usr/bin/python3 "$@" -MOCK - chmod +x "${TEST_DIR}/python3" - - # Mock 'ssh-keygen' — returns MD5 fingerprint matching fixture data - _create_ssh_keygen_mock - - # Mock fly/flyctl CLI — handles ssh console, auth token, version - _create_fly_mock -} - -_create_fly_mock() { - cat > "${TEST_DIR}/fly" << 'MOCK' -#!/bin/bash -echo "fly $*" >> "${MOCK_LOG}" - -# Simulate fly CLI failures when MOCK_ERROR_SCENARIO is set -case "${MOCK_ERROR_SCENARIO:-}" in - ssh_tunnel_failure) - case "$1" in - ssh) - echo "Error: failed to connect to tunnel: context deadline exceeded" >&2 - exit 1 ;; - machine) - case "${2:-}" in - exec) - echo "Error: machine not reachable" >&2 - exit 1 ;; - esac ;; - esac ;; - ssh_timeout) - case "$1" in - ssh|machine) - # Never return "ok" — simulates SSH not becoming ready - exit 1 ;; - esac ;; -esac - -case "$1" in - auth) - case "${2:-}" in - token) echo "test-token-fly" ;; - esac ;; - machine) - case "${2:-}" in - exec) - # fly machine exec MACHINE_ID --app APP -- bash -c CMD - all_args="$*" - if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then - echo "ok" - fi - ;; - list) echo "[]" ;; - esac ;; - ssh) - # fly ssh console -a APP -C "bash -c CMD" --quiet - # Extract the command and simulate its output - all_args="$*" - # Check for "echo ok" (may be escaped as echo\ ok by printf %q) - if [[ "$all_args" == *"echo ok"* ]] || [[ "$all_args" == *'echo\ ok'* ]]; then - echo "ok" - fi - ;; - version) - echo "fly v0.3.50" ;; -esac -exit 0 -MOCK - chmod +x "${TEST_DIR}/fly" - cp "${TEST_DIR}/fly" "${TEST_DIR}/flyctl" -} - -setup_fake_home() { - local fake_home="${TEST_DIR}/fakehome" - mkdir -p "${fake_home}/.ssh" - mkdir -p "${fake_home}/.config/spawn" - mkdir -p "${fake_home}/.claude" - mkdir -p "${fake_home}/.local/bin" - # Create dummy SSH key pair - echo "-----BEGIN OPENSSH PRIVATE KEY-----" > "${fake_home}/.ssh/id_ed25519" - echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHmcVdzydp72a/B69nmENZvCvjuk7xGpKdi5CvhkmNsv test@test" > "${fake_home}/.ssh/id_ed25519.pub" - chmod 600 "${fake_home}/.ssh/id_ed25519" - echo "${fake_home}" -} - -# ============================================================ -# Cloud API helpers (for use by test infra tests) -# ============================================================ - -# Strip API base URL to get just the endpoint path. -# Used by test/test-infra-sync.test.ts to validate cloud coverage. -_strip_simple_base() { - local url="$1" pattern="$2" - echo "$url" | sed "s|${pattern}||" -} - -_strip_pattern_base() { - local url="$1" sed_pattern="$2" - echo "$url" | sed "$sed_pattern" -} - - -_strip_api_base() { - local url="$1" - local endpoint="$url" - - case "$url" in - https://api.hetzner.cloud/v1*) - endpoint="${url#https://api.hetzner.cloud/v1}" ;; - https://api.digitalocean.com/v2*) - endpoint="${url#https://api.digitalocean.com/v2}" ;; - https://api.machines.dev/v1*) - endpoint="${url#https://api.machines.dev/v1}" ;; - esac - - echo "$endpoint" | sed 's|?.*||' -} - -# Get required POST body fields for a cloud endpoint. -_get_required_fields() { - local cloud="$1" - local endpoint="$2" - - case "${cloud}:${endpoint}" in - hetzner:/servers) echo "name server_type image location" ;; - digitalocean:/droplets) echo "name region size image" ;; - fly:*/machines) echo "name region config" ;; - esac -} - -# Validate POST request body contains required fields for major clouds. -# Used during mock script execution to catch invalid API requests. -# Args: cloud method endpoint body -_validate_body() { - local cloud="$1" - local method="$2" - local endpoint="$3" - local body="$4" - - [[ "$method" != "POST" ]] && return 0 - [[ -z "$body" ]] && return 0 - - local required_fields - required_fields=$(_get_required_fields "$cloud" "$endpoint") - [[ -z "$required_fields" ]] && return 0 - - # Check if body is valid JSON - if ! printf '%s' "$body" | python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null; then - echo "BODY_ERROR:invalid_json:${endpoint}" >> "${MOCK_LOG}" - return 1 - fi - - # Check for required fields - for field in $required_fields; do - if ! printf '%s' "$body" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); assert '$field' in d" 2>/dev/null; then - echo "BODY_ERROR:missing_field:${field}:${endpoint}" >> "${MOCK_LOG}" - fi - done - - return 0 -} - -# ============================================================ -# Cloud-specific env var setup -# ============================================================ - -setup_env_for_cloud() { - local cloud="$1" - - # Universal env vars - export OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000" - export INSTANCE_STATUS_POLL_DELAY=0 - - # Cloud-specific env vars from fixture data - local env_file="${FIXTURES_DIR}/${cloud}/_env.sh" - if [[ -f "$env_file" ]]; then - # shellcheck disable=SC1090 - source "$env_file" - fi -} - -# ============================================================ -# Discovery -# ============================================================ - -discover_clouds() { - for fixture_dir in "${FIXTURES_DIR}"/*/; do - local cloud - cloud=$(basename "$fixture_dir") - if [[ -f "${fixture_dir}/_metadata.json" ]]; then - echo "$cloud" - fi - done -} - -discover_agents() { - local cloud="$1" - for script in "${REPO_ROOT}/${cloud}"/*.sh; do - [[ -f "$script" ]] || continue - local agent - agent=$(basename "$script" .sh) - echo "$agent" - done -} - -# ============================================================ -# Test runner helpers -# ============================================================ - -# Wait for a process to complete or timeout -# Args: pid timeout_seconds exit_code_var -_wait_with_timeout() { - local pid="$1" - local timeout="$2" - local exit_code_var="$3" - local i=0 - - while kill -0 "$pid" 2>/dev/null; do - if [[ "$i" -ge "$timeout" ]]; then - kill -9 "$pid" 2>/dev/null - wait "$pid" 2>/dev/null || true - eval "${exit_code_var}=124" - return - fi - sleep 1 - i=$((i + 1)) - done - wait "$pid" 2>/dev/null || eval "${exit_code_var}=$?" -} - -# Run a script in a sandboxed environment with a 4-second timeout. -# Sets exit_code variable in the caller's scope. -# Args: script_path cloud state_file fake_home -run_script_with_timeout() { - local script_path="$1" - local cloud="$2" - local state_file="$3" - local fake_home="$4" - - exit_code=0 - - MOCK_LOG="${MOCK_LOG}" \ - MOCK_FIXTURE_DIR="${FIXTURES_DIR}/${cloud}" \ - MOCK_CLOUD="${cloud}" \ - MOCK_REPO_ROOT="${REPO_ROOT}" \ - MOCK_VALIDATE_BODY="${MOCK_VALIDATE_BODY:-}" \ - MOCK_TRACK_STATE="${MOCK_TRACK_STATE:-}" \ - MOCK_STATE_FILE="${state_file}" \ - MOCK_ERROR_SCENARIO="${MOCK_ERROR_SCENARIO:-}" \ - PATH="${TEST_DIR}:${PATH}" \ - HOME="${fake_home}" \ - bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 & - local pid=$! - _wait_with_timeout "$pid" 4 "exit_code" -} - -# Print last 20 lines of output on script failure. -# Args: exit_code -show_failure_output() { - local exit_code="$1" - if [[ "${exit_code}" -ne 0 ]]; then - printf '%b\n' " ${RED}--- output (last 20 lines) ---${NC}" - tail -20 "${TEST_DIR}/output.log" 2>/dev/null | while IFS= read -r line; do - printf ' %s\n' "$line" - done - printf '%b\n' " ${RED}--- end output ---${NC}" - fi -} - -# Assert that the script failed when an error scenario was injected. -# Returns 0 (with result recorded) if an error scenario is active, 1 otherwise. -# Args: exit_code cloud agent -assert_error_scenario() { - local exit_code="$1" - local cloud="$2" - local agent="$3" - - [[ -n "${MOCK_ERROR_SCENARIO:-}" ]] || return 1 - - if [[ "${exit_code}" -ne 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} fails on ${MOCK_ERROR_SCENARIO} (exit code ${exit_code})" - PASSED=$((PASSED + 1)) - record_test_result "${cloud}" "${agent}" "pass" - else - printf '%b\n' " ${RED}✗${NC} should fail on ${MOCK_ERROR_SCENARIO} but exited 0" - FAILED=$((FAILED + 1)) - record_test_result "${cloud}" "${agent}" "fail" - fi - return 0 -} - -# Assert that the expected cloud-specific API calls were made. -# Reads assertions from test/fixtures/{cloud}/_api_assertions.sh if present, -# otherwise falls back to a generic API call check. -# Args: cloud -assert_cloud_api_calls() { - local cloud="$1" - local assertions_file="${FIXTURES_DIR}/${cloud}/_api_assertions.sh" - if [[ -f "$assertions_file" ]]; then - # shellcheck disable=SC1090 - source "$assertions_file" - else - assert_log_contains "curl (GET|POST) https://" "makes API calls" - fi -} - -# Write pass/fail result to RESULTS_FILE if set. -# Args: cloud agent result [reason] -# Result format: cloud/agent:pass or cloud/agent:fail[:reason] -# Reasons: exit_code, missing_api_call, missing_env, no_fixture -record_test_result() { - local cloud="$1" - local agent="$2" - local result="$3" - local reason="${4:-}" - [[ -n "${RESULTS_FILE:-}" ]] || return 0 - if [[ -n "$reason" ]]; then - printf '%s/%s:%s:%s\n' "${cloud}" "${agent}" "${result}" "${reason}" >> "${RESULTS_FILE}" - else - printf '%s/%s:%s\n' "${cloud}" "${agent}" "${result}" >> "${RESULTS_FILE}" - fi -} - -# ============================================================ -# Test runner -# ============================================================ - -# Run an assertion and store the number of new failures in _ASSERT_DELTA. -# Usage: _tracked_assert [args...] -# The assertion runs in the current shell so PASSED/FAILED propagate. -_tracked_assert() { - local _before=$FAILED - "$@" - _ASSERT_DELTA=$(( FAILED - _before )) -} - -# Determine the primary failure reason from tracked failure counts. -# Args: has_no_fixture exit_fails api_fails ssh_fails env_fails -# Prints the reason string to stdout. -_categorize_failure() { - local has_no_fixture="$1" exit_fails="$2" api_fails="$3" ssh_fails="$4" env_fails="$5" - if [[ "$has_no_fixture" -gt 0 ]]; then echo "no_fixture" - elif [[ "$exit_fails" -gt 0 ]]; then echo "exit_code" - elif [[ "$api_fails" -gt 0 ]]; then echo "missing_api_call" - elif [[ "$env_fails" -gt 0 ]]; then echo "missing_env" - elif [[ "$ssh_fails" -gt 0 ]]; then echo "missing_ssh" - else echo "unknown" - fi -} - -# Run assertions for a script and track which categories failed. -# Outputs: _exit_failed, _api_failed, _ssh_failed, _env_failed (as 0/1) -_run_assertions_and_track() { - local exit_code="$1" cloud="$2" - local _ASSERT_DELTA=0 - - _tracked_assert assert_exit_code "${exit_code}" 0 "exits successfully" - _exit_failed=$_ASSERT_DELTA - - _tracked_assert assert_cloud_api_calls "$cloud" - _api_failed=$_ASSERT_DELTA - - _tracked_assert assert_log_contains "ssh " "uses SSH" - _ssh_failed=$_ASSERT_DELTA - - _tracked_assert assert_env_injected "OPENROUTER_API_KEY" - _env_failed=$_ASSERT_DELTA - - if [[ "${MOCK_VALIDATE_BODY:-}" == "1" ]]; then - assert_no_body_errors - fi - if [[ "${MOCK_TRACK_STATE:-}" == "1" ]]; then - assert_server_cleaned_up "$3" - fi -} - -# Check for missing fixtures in the mock log. -_has_missing_fixture() { - grep -q "NO_FIXTURE:" "${MOCK_LOG}" 2>/dev/null && echo 1 || echo 0 -} - -# Setup test environment for a script -# Args: cloud state_file -_setup_test_env() { - local cloud="$1" - local state_file="$2" - : > "${MOCK_LOG}" - setup_env_for_cloud "$cloud" - : > "${state_file}" -} - -# Record test result based on failure categories -# Args: cloud agent pre_failed -_record_categorized_result() { - local cloud="$1" - local agent="$2" - local pre_failed="$3" - - local pre_fail=$((FAILED - pre_failed)) - if [[ "$pre_fail" -gt 0 ]]; then - local _has_no_fixture - _has_no_fixture=$(_has_missing_fixture) - local _reason - _reason=$(_categorize_failure "$_has_no_fixture" "$_exit_failed" "$_api_failed" "$_ssh_failed" "$_env_failed") - record_test_result "${cloud}" "${agent}" "fail" "${_reason}" - else - record_test_result "${cloud}" "${agent}" "pass" - fi -} - -# Run per-agent install assertions. -# Sources shared assertions file and optional per-cloud overrides. -_run_agent_assertions() { - local cloud="$1" - local agent="$2" - local shared_file="${FIXTURES_DIR}/_shared_agent_assertions.sh" - local cloud_file="${FIXTURES_DIR}/${cloud}/_agent_assertions.sh" - - if [[ -f "$shared_file" ]]; then - # shellcheck disable=SC1090 - source "$shared_file" - # Apply per-cloud overrides if they exist - if [[ -f "$cloud_file" ]]; then - # shellcheck disable=SC1090 - source "$cloud_file" - fi - assert_agent_install "$cloud" "$agent" - fi -} - -run_test() { - local cloud="$1" - local agent="$2" - local script_path="${REPO_ROOT}/${cloud}/${agent}.sh" - - if [[ ! -f "$script_path" ]]; then - printf '%b\n' " ${YELLOW}skip${NC} ${cloud}/${agent}.sh — file not found" - SKIPPED=$((SKIPPED + 1)) - return 0 - fi - - # TypeScript-based providers (fly, digitalocean) use bun with native fetch() - # for API calls. Fixture-based mock tests (which intercept curl) don't apply. - # Coverage comes from: bun test + failure mode tests. - if [[ ("$cloud" == "fly" || "$cloud" == "digitalocean") && -z "${MOCK_ERROR_SCENARIO:-}" ]]; then - printf '%b\n' " ${YELLOW}skip${NC} ${cloud}/${agent}.sh — TS provider (tested via bun test + failure modes)" - SKIPPED=$((SKIPPED + 1)) - return 0 - fi - - printf '%b\n' " ${CYAN}test${NC} ${cloud}/${agent}.sh" - - local _pre_failed="${FAILED}" - local fake_home - fake_home=$(setup_fake_home) - local state_file="${TEST_DIR}/state_${cloud}_${agent}.log" - - _setup_test_env "$cloud" "$state_file" - - local exit_code - run_script_with_timeout "${script_path}" "${cloud}" "${state_file}" "${fake_home}" - show_failure_output "${exit_code}" - - # Error scenario mode: just check that script failed, then return - if assert_error_scenario "${exit_code}" "${cloud}" "${agent}"; then - printf '\n' - return 0 - fi - - # Normal mode: run standard assertions and track failures per category - _run_assertions_and_track "${exit_code}" "${cloud}" "${state_file}" - - # Per-agent install assertions - _run_agent_assertions "$cloud" "$agent" - - _record_categorized_result "${cloud}" "${agent}" "$_pre_failed" - - printf '\n' -} - -# ============================================================ -# Fly.io failure-mode tests (#1579) -# ============================================================ - -# Run a single Fly.io agent script under a specific error scenario. -# Expects MOCK_ERROR_SCENARIO to trigger error injection in mock curl and/or fly CLI. -# Args: scenario agent -_run_fly_error_test() { - local scenario="$1" - local agent="$2" - local script_path="${REPO_ROOT}/fly/${agent}.sh" - - [[ -f "$script_path" ]] || return 0 - - printf '%b\n' " ${CYAN}test${NC} fly/${agent}.sh [${scenario}]" - - local fake_home - fake_home=$(setup_fake_home) - local state_file="${TEST_DIR}/state_fly_${agent}_${scenario}.log" - - : > "${MOCK_LOG}" - setup_env_for_cloud "fly" - : > "${state_file}" - - # Re-create fly mock so it picks up the error scenario - _create_fly_mock - - local exit_code=0 - MOCK_LOG="${MOCK_LOG}" \ - MOCK_FIXTURE_DIR="${FIXTURES_DIR}/fly" \ - MOCK_CLOUD="fly" \ - MOCK_REPO_ROOT="${REPO_ROOT}" \ - MOCK_ERROR_SCENARIO="${scenario}" \ - MOCK_STATE_FILE="${state_file}" \ - SPAWN_NON_INTERACTIVE=1 \ - PATH="${TEST_DIR}:${PATH}" \ - HOME="${fake_home}" \ - bash "${script_path}" < /dev/null > "${TEST_DIR}/output.log" 2>&1 & - local pid=$! - _wait_with_timeout "$pid" 10 "exit_code" - - if [[ "${exit_code}" -ne 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} fails on ${scenario} (exit code ${exit_code})" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} should fail on ${scenario} but exited 0" - FAILED=$((FAILED + 1)) - fi - printf '\n' -} - -# Run all Fly.io failure-mode tests using a single representative agent. -# Uses claude.sh as the test subject since it exercises the full provisioning path. -run_fly_failure_tests() { - printf '%b\n' "${CYAN}━━━ fly failure modes (#1579) ━━━${NC}" - - local test_agent="claude" - if [[ ! -f "${REPO_ROOT}/fly/${test_agent}.sh" ]]; then - printf '%b\n' " ${YELLOW}skip${NC} fly/${test_agent}.sh not found" - SKIPPED=$((SKIPPED + 1)) - return 0 - fi - - # 1. API rate limit (429) — mock curl returns 429 for cloud API calls - _run_fly_error_test "rate_limit" "$test_agent" - - # 2. Machine creation failure (422) — mock curl returns 422 for POST to */machines* - _run_fly_error_test "create_failure" "$test_agent" - - # 3. SSH tunnel failure — fly ssh console / fly machine exec exit non-zero - _run_fly_error_test "ssh_tunnel_failure" "$test_agent" - - # 4. SSH timeout — fly CLI never returns "ok", _fly_wait_for_ssh exhausts retries - _run_fly_error_test "ssh_timeout" "$test_agent" -} - -# ============================================================ -# Main -# ============================================================ - -printf '%b\n' "${CYAN}===============================${NC}" -printf '%b\n' "${CYAN} Spawn Mock Test Suite${NC}" -printf '%b\n' "${CYAN}===============================${NC}" -printf '\n' - -# Parse arguments -FILTER_CLOUD="${1:-}" -FILTER_AGENT="${2:-}" - -# Set up mocks once -setup_mock_curl -setup_mock_ssh -setup_mock_agents - -# Discover what to test -if [[ -n "$FILTER_CLOUD" ]]; then - CLOUDS="$FILTER_CLOUD" - if [[ ! -d "${FIXTURES_DIR}/${FILTER_CLOUD}" ]]; then - printf '%b\n' "${RED}No fixtures for cloud: ${FILTER_CLOUD}${NC}" - printf "Available: %s\n" "$(discover_clouds | tr '\n' ' ')" - exit 1 - fi -else - CLOUDS=$(discover_clouds) -fi - -if [[ -z "$CLOUDS" ]]; then - printf '%b\n' "${YELLOW}No fixture data found in ${FIXTURES_DIR}/${NC}" - printf "Run test/record.sh first to record API fixtures.\n" - exit 0 -fi - -printf "Fixtures dir: %s\n" "${FIXTURES_DIR}" -printf "Clouds: %s\n" "$CLOUDS" -printf '\n' - -# --- Run clouds in parallel --- -CLOUD_RESULTS_DIR="${TEST_DIR}/cloud_results" -mkdir -p "${CLOUD_RESULTS_DIR}" - -CLOUD_PIDS="" -for cloud in $CLOUDS; do - ( - # Isolated per-cloud state - CLOUD_TEST_DIR=$(mktemp -d) - MOCK_LOG="${CLOUD_TEST_DIR}/mock_calls.log" - CLOUD_PASSED=0 - CLOUD_FAILED=0 - CLOUD_SKIPPED=0 - - # Re-create mocks in per-cloud temp dir (curl/ssh/agents need own copies) - TEST_DIR="${CLOUD_TEST_DIR}" - setup_mock_curl - setup_mock_ssh - setup_mock_agents - - # Override counters used by assertions (they modify PASSED/FAILED/SKIPPED) - PASSED=0 - FAILED=0 - SKIPPED=0 - - printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}" - - if [[ -n "$FILTER_AGENT" ]]; then - AGENTS="$FILTER_AGENT" - else - AGENTS=$(discover_agents "$cloud") - fi - - if [[ -z "$AGENTS" ]]; then - printf '%b\n' " ${YELLOW}skip${NC} no agent scripts found in ${cloud}/" - SKIPPED=$((SKIPPED + 1)) - else - for agent in $AGENTS; do - run_test "$cloud" "$agent" - done - fi - printf '\n' - - # Write counts to results file for aggregation - printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/${cloud}.counts" - - rm -rf "${CLOUD_TEST_DIR}" - ) > "${CLOUD_RESULTS_DIR}/${cloud}.log" 2>&1 & - CLOUD_PIDS="${CLOUD_PIDS} $!" -done - -# Wait for all clouds to finish -for pid in $CLOUD_PIDS; do - wait "$pid" 2>/dev/null || true -done - -# Print output from each cloud (in discovery order for consistent output) -for cloud in $CLOUDS; do - if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.log" ]]; then - cat "${CLOUD_RESULTS_DIR}/${cloud}.log" - fi -done - -# Aggregate results from all clouds -for cloud in $CLOUDS; do - if [[ -f "${CLOUD_RESULTS_DIR}/${cloud}.counts" ]]; then - read -r p f s < "${CLOUD_RESULTS_DIR}/${cloud}.counts" - PASSED=$((PASSED + p)) - FAILED=$((FAILED + f)) - SKIPPED=$((SKIPPED + s)) - fi -done - -# --- Fly.io failure-mode tests (#1579) --- -# Run only when fly fixtures exist and no agent filter is active -if [[ -d "${FIXTURES_DIR}/fly" && ( -z "$FILTER_CLOUD" || "$FILTER_CLOUD" == "fly" ) && -z "$FILTER_AGENT" ]]; then - ( - FLY_FAIL_TEST_DIR=$(mktemp -d) - TEST_DIR="${FLY_FAIL_TEST_DIR}" - MOCK_LOG="${FLY_FAIL_TEST_DIR}/mock_calls.log" - PASSED=0 - FAILED=0 - SKIPPED=0 - - setup_mock_curl - setup_mock_ssh - setup_mock_agents - - run_fly_failure_tests - - printf '%d %d %d\n' "$PASSED" "$FAILED" "$SKIPPED" > "${CLOUD_RESULTS_DIR}/fly_failures.counts" - rm -rf "${FLY_FAIL_TEST_DIR}" - ) > "${CLOUD_RESULTS_DIR}/fly_failures.log" 2>&1 - - if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.log" ]]; then - cat "${CLOUD_RESULTS_DIR}/fly_failures.log" - fi - if [[ -f "${CLOUD_RESULTS_DIR}/fly_failures.counts" ]]; then - read -r p f s < "${CLOUD_RESULTS_DIR}/fly_failures.counts" - PASSED=$((PASSED + p)) - FAILED=$((FAILED + f)) - SKIPPED=$((SKIPPED + s)) - fi -fi - -# --- Summary --- -printf '%b\n' "${CYAN}===============================${NC}" -TOTAL=$((PASSED + FAILED + SKIPPED)) -printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${TOTAL} total" -printf '%b\n' "${CYAN}===============================${NC}" - -if [[ "$FAILED" -gt 0 ]]; then - exit 1 -fi -exit 0 diff --git a/test/qa-dry-run.sh b/test/qa-dry-run.sh deleted file mode 100644 index c83e17dc..00000000 --- a/test/qa-dry-run.sh +++ /dev/null @@ -1,671 +0,0 @@ -#!/bin/bash -set -eo pipefail - -# QA Dry Run — Local-only version of qa-cycle.sh -# Does everything qa-cycle.sh does but with NO git/gh commands. -# All output goes to .docs/qa-dry-run-latest/. -# -# Usage: -# bash test/qa-dry-run.sh - -REPO_ROOT="$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse --show-toplevel 2>/dev/null)" -if [[ -z "${REPO_ROOT}" ]]; then - REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -fi -cd "${REPO_ROOT}" - -DRY_RUN_DIR="${REPO_ROOT}/.docs/qa-dry-run-latest" -LOG_FILE="${DRY_RUN_DIR}/qa-dry-run.log" -WOULD_COMMIT_LOG="${DRY_RUN_DIR}/would-commit.txt" -CYCLE_TIMEOUT=2700 # 45 min total -AGENT_TIMEOUT=600 # 10 min per agent - -# Results files -RESULTS_PHASE2="${DRY_RUN_DIR}/results-phase2.txt" -RESULTS_PHASE4="${DRY_RUN_DIR}/results-phase4.txt" - -# Clean and create output directory -rm -rf "${DRY_RUN_DIR}" -mkdir -p "${DRY_RUN_DIR}" -: > "${LOG_FILE}" -: > "${WOULD_COMMIT_LOG}" - -log() { - printf '[%s] [qa-dry] %s\n' "$(date +'%Y-%m-%d %H:%M:%S')" "$*" | tee -a "${LOG_FILE}" -} - -cleanup() { - local exit_code=$? - log "=== QA Dry Run Done (exit_code=${exit_code}) ===" -} -trap cleanup EXIT SIGTERM SIGINT - -# macOS-compatible timeout: run command with a time limit -# Usage: run_with_timeout SECONDS COMMAND [ARGS...] -run_with_timeout() { - local secs="$1"; shift - "$@" & - local pid=$! - local elapsed=0 - while kill -0 "$pid" 2>/dev/null; do - if [[ "$elapsed" -ge "$secs" ]]; then - kill "$pid" 2>/dev/null - sleep 1 - kill -9 "$pid" 2>/dev/null || true - wait "$pid" 2>/dev/null || true - return 124 - fi - sleep 1 - elapsed=$((elapsed + 1)) - done - wait "$pid" 2>/dev/null -} - -log "=== Starting QA Dry Run ===" -log "Repo root: ${REPO_ROOT}" -log "Output dir: ${DRY_RUN_DIR}" -log "Timeout: ${CYCLE_TIMEOUT}s" - -# Track start time for total cycle timeout -CYCLE_START=$(date +%s) - -check_timeout() { - local now elapsed - now=$(date +%s) - elapsed=$((now - CYCLE_START)) - if [[ "$elapsed" -ge "$CYCLE_TIMEOUT" ]]; then - log "TIMEOUT: Cycle exceeded ${CYCLE_TIMEOUT}s, stopping" - return 1 - fi - return 0 -} - -would_commit() { - printf '[would-run] %s\n' "$*" >> "${WOULD_COMMIT_LOG}" -} - -# ============================================================ -# Phase 0: Key Preflight -# ============================================================ -log "=== Phase 0: Key Preflight ===" - -if [[ -f "${REPO_ROOT}/shared/key-request.sh" ]]; then - source "${REPO_ROOT}/shared/key-request.sh" - load_cloud_keys_from_config - if [[ -n "${MISSING_KEY_PROVIDERS:-}" ]]; then - log "Phase 0: Missing keys for: ${MISSING_KEY_PROVIDERS}" - if [[ -n "${KEY_SERVER_URL:-}" ]]; then - log "Phase 0: Requesting keys via key-server (will trigger email notification)" - request_missing_cloud_keys - else - log "Phase 0: KEY_SERVER_URL not set — skipping email notification" - log "Phase 0: Set KEY_SERVER_URL and KEY_SERVER_SECRET to enable email flow" - fi - else - log "Phase 0: All cloud keys available" - fi -else - log "Phase 0: shared/key-request.sh not found, skipping key preflight" -fi - -check_timeout || exit 0 - -# ============================================================ -# Phase 0.5: macOS Compatibility Lint -# ============================================================ -log "=== Phase 0.5: macOS Compatibility Lint ===" - -LINT_OUTPUT="${DRY_RUN_DIR}/macos-compat-output.txt" -LINT_ERRORS=0 -LINT_WARNS=0 - -if [[ -f "${REPO_ROOT}/test/macos-compat.sh" ]]; then - LINT_EXIT=0 - bash "${REPO_ROOT}/test/macos-compat.sh" > "${LINT_OUTPUT}" 2>&1 || LINT_EXIT=$? - - if [[ -f "${LINT_OUTPUT}" ]]; then - LINT_ERRORS=$(grep -c "^error " "${LINT_OUTPUT}" 2>/dev/null || true) - LINT_WARNS=$(grep -c "^warn " "${LINT_OUTPUT}" 2>/dev/null || true) - fi - - if [[ "${LINT_EXIT}" -eq 0 ]]; then - log "Phase 0.5: macOS compat lint passed (${LINT_WARNS} warning(s))" - else - log "Phase 0.5: macOS compat lint found ${LINT_ERRORS} error(s), ${LINT_WARNS} warning(s)" - log "Phase 0.5: Continuing (lint is advisory for now)" - fi -else - log "Phase 0.5: test/macos-compat.sh not found, skipping" -fi - -check_timeout || exit 0 - -# ============================================================ -# Phase 1: Record fixtures -# ============================================================ -log "=== Phase 1: Record fixtures ===" - -RECORD_OUTPUT="${DRY_RUN_DIR}/record-output.txt" - -RECORD_EXIT=0 -bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" | tee "${RECORD_OUTPUT}" || RECORD_EXIT=$? - -if [[ "${RECORD_EXIT}" -eq 0 ]]; then - log "Phase 1: All fixtures recorded successfully" -else - log "Phase 1: Some fixture recordings failed, identifying failed clouds..." - - # Parse which clouds had failures - RECORD_FAILED_CLOUDS="" - current_cloud="" - while IFS= read -r line; do - clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g') - case "$clean" in - *"Recording "*" ━━━"*) - current_cloud=$(printf '%s' "$clean" | sed 's/.*Recording //; s/ ━━━.*//') - ;; - *"fail "*) - if [[ -n "${current_cloud}" ]]; then - case " ${RECORD_FAILED_CLOUDS} " in - *" ${current_cloud} "*) ;; - *) RECORD_FAILED_CLOUDS="${RECORD_FAILED_CLOUDS} ${current_cloud}" ;; - esac - fi - ;; - esac - done < "${RECORD_OUTPUT}" - RECORD_FAILED_CLOUDS=$(printf '%s' "${RECORD_FAILED_CLOUDS}" | sed 's/^ //') - - if [[ -n "${RECORD_FAILED_CLOUDS}" ]]; then - log "Phase 1: Failed clouds: ${RECORD_FAILED_CLOUDS}" - - # Separate auth failures from code failures - NON_AUTH_FAILED_CLOUDS="" - STALE_KEY_PROVIDERS="" - AUTH_PATTERN="401|403|[Uu]nauthorized|[Ff]orbidden|[Ii]nvalid.*(token|key|api)|[Aa]ccess.denied|[Aa]uthentication.failed" - - for cloud in ${RECORD_FAILED_CLOUDS}; do - error_output=$(sed -n "/Recording ${cloud}/,/Recording \|━━━ \|Results:/p" "${RECORD_OUTPUT}" | head -50 || true) - - if printf '%s' "${error_output}" | grep -iqE "${AUTH_PATTERN}"; then - log "Phase 1: Auth failure for ${cloud} — key is stale" - if type invalidate_cloud_key &>/dev/null; then - invalidate_cloud_key "${cloud}" - while IFS= read -r var_name; do - [[ -n "${var_name}" ]] && unset "${var_name}" 2>/dev/null || true - done <<< "$(get_cloud_env_vars "${cloud}")" - fi - STALE_KEY_PROVIDERS="${STALE_KEY_PROVIDERS} ${cloud}" - else - NON_AUTH_FAILED_CLOUDS="${NON_AUTH_FAILED_CLOUDS} ${cloud}" - fi - done - NON_AUTH_FAILED_CLOUDS=$(printf '%s' "${NON_AUTH_FAILED_CLOUDS}" | sed 's/^ //') - STALE_KEY_PROVIDERS=$(printf '%s' "${STALE_KEY_PROVIDERS}" | sed 's/^ //') - - if [[ -n "${STALE_KEY_PROVIDERS}" ]]; then - log "Phase 1: Stale keys detected: ${STALE_KEY_PROVIDERS}" - fi - - # Spawn all record-fix agents in parallel (one per non-auth failed cloud) - RECORD_FIX_PIDS="" - RECORD_FIX_WORK_DIRS="" - - for cloud in ${NON_AUTH_FAILED_CLOUDS}; do - check_timeout || break - - error_lines=$(sed -n "/Recording ${cloud}/,/Recording \|━━━ \|Results:/p" "${RECORD_OUTPUT}" | head -30 || true) - - log "Phase 1: Spawning agent to debug ${cloud} recording failure (async)" - would_commit "git worktree add ... -b qa/record-fix-${cloud} origin/main" - - WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX") - cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true - - ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD="" - - ( - cd "${WORK_DIR}" - run_with_timeout "${AGENT_TIMEOUT}" claude -p "The API fixture recording for cloud '${cloud}' is failing in test/record.sh. - -Error output: -${error_lines} - -Investigate and fix. Only modify ${cloud}/lib/common.sh and test/record.sh." \ - 2>&1 | tee -a "${DRY_RUN_DIR}/agent-record-fix-${cloud}.log" || true - - # Copy changed files directly back to repo - changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true) - if [[ -n "$changed" ]]; then - printf '%s\n' "$changed" | while IFS= read -r f; do - [[ -f "$f" ]] || continue - mkdir -p "${REPO_ROOT}/$(dirname "$f")" - cp "$f" "${REPO_ROOT}/$f" - done - fi - ) & - RECORD_FIX_PIDS="${RECORD_FIX_PIDS} $!" - RECORD_FIX_WORK_DIRS="${RECORD_FIX_WORK_DIRS} ${WORK_DIR}" - done - - # Wait for all record-fix agents - if [[ -n "${RECORD_FIX_PIDS}" ]]; then - log "Phase 1: Waiting for record-fix agents..." - for pid in ${RECORD_FIX_PIDS}; do - wait "$pid" 2>/dev/null || true - done - fi - - # Log what changed and clean up work dirs - for cloud in ${NON_AUTH_FAILED_CLOUDS}; do - would_commit "git add ${cloud}/lib/common.sh test/record.sh && git commit && git push && gh pr create && gh pr merge" - done - for work_dir in ${RECORD_FIX_WORK_DIRS}; do - rm -rf "${work_dir}" - done - - # Re-record after fixes - log "Phase 1: Re-recording after fixes..." - bash test/record.sh allsaved 2>&1 | tee -a "${LOG_FILE}" || { - log "Phase 1: Re-record still has failures — continuing with existing fixtures" - } - fi - - # Request fresh keys for stale providers (triggers email via key-server) - if [[ -n "${STALE_KEY_PROVIDERS:-}" ]] && type request_missing_cloud_keys &>/dev/null; then - MISSING_KEY_PROVIDERS="${STALE_KEY_PROVIDERS}" - log "Phase 1: Requesting fresh keys for stale providers: ${STALE_KEY_PROVIDERS}" - request_missing_cloud_keys - log "Phase 1: Key request sent (email notification will be sent if KEY_SERVER_URL is configured)" - fi -fi - -rm -f "${RECORD_OUTPUT}" -check_timeout || exit 0 - -# ============================================================ -# Phase 2: Run mock tests -# ============================================================ -log "=== Phase 2: Run mock tests ===" - -rm -f "${RESULTS_PHASE2}" -MOCK_EXIT=0 -RESULTS_FILE="${RESULTS_PHASE2}" bash test/mock.sh 2>&1 | tee -a "${LOG_FILE}" || MOCK_EXIT=$? - -PASS_COUNT=0 -FAIL_COUNT=0 -if [[ -f "${RESULTS_PHASE2}" ]]; then - TOTAL_TESTS=$(wc -l < "${RESULTS_PHASE2}" | tr -d ' ') - PASS_COUNT=$(grep -c ':pass$' "${RESULTS_PHASE2}" || true) - FAIL_COUNT=$(grep -c ':fail$' "${RESULTS_PHASE2}" || true) - log "Phase 2: ${PASS_COUNT} passed, ${FAIL_COUNT} failed, ${TOTAL_TESTS} total" -else - log "Phase 2: No results file generated" -fi - -check_timeout || exit 0 - -# ============================================================ -# Phase 3: Fix mock failures -# ============================================================ -log "=== Phase 3: Fix failures ===" - -if [[ "${FAIL_COUNT:-0}" -eq 0 ]]; then - log "Phase 3: No failures to fix" -else - FAILURES="" - FAILED_CLOUDS="" - if [[ -f "${RESULTS_PHASE2}" ]]; then - FAILURES=$(grep ':fail$' "${RESULTS_PHASE2}" | sed 's/:fail$//' || true) - FAILED_CLOUDS=$(grep ':fail$' "${RESULTS_PHASE2}" | sed 's/:fail$//' | cut -d/ -f1 | sort -u || true) - fi - - # Spawn all fix agents in parallel (one per failed cloud) - FIX_PIDS="" - FIX_WORK_DIRS="" - FIX_ORIG_HEADS="" - - for cloud in $FAILED_CLOUDS; do - check_timeout || break - - cloud_failures=$(printf '%s\n' $FAILURES | grep "^${cloud}/" || true) - failing_scripts="" - error_context="" - for combo in $cloud_failures; do - agent=$(printf '%s' "$combo" | cut -d/ -f2) - script_path="${cloud}/${agent}.sh" - failing_scripts="${failing_scripts} ${script_path}" - if [[ -f "${LOG_FILE}" ]]; then - ctx=$(grep -A 10 "test ${script_path}" "${LOG_FILE}" | tail -10 || true) - if [[ -n "$ctx" ]]; then - error_context="${error_context} ---- ${script_path} --- -${ctx} -" - fi - fi - done - failing_scripts=$(printf '%s' "$failing_scripts" | sed 's/^ //') - - fail_count=$(printf '%s\n' $cloud_failures | wc -l | tr -d ' ') - log "Phase 3: Spawning agent to fix ${fail_count} failing script(s) in ${cloud} (async)" - would_commit "git worktree add ... -b qa/fix-${cloud} origin/main" - - WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX") - cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true - - ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD="" - - # Run agent in background subshell — log to per-cloud file to avoid interleaving - ( - cd "${WORK_DIR}" - run_with_timeout 900 claude -p "Fix the failing mock tests for cloud '${cloud}' in the spawn codebase. - -Failing scripts: ${failing_scripts} - -Error context from test run: -${error_context} - -Investigate the root cause and fix. You can modify: scripts in ${cloud}/, test/fixtures/${cloud}/, and test/mock.sh." \ - 2>&1 | tee -a "${DRY_RUN_DIR}/agent-fix-${cloud}.log" || true - - # Copy changed files directly back to repo - changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true) - if [[ -n "$changed" ]]; then - printf '%s\n' "$changed" | while IFS= read -r f; do - [[ -f "$f" ]] || continue - mkdir -p "${REPO_ROOT}/$(dirname "$f")" - cp "$f" "${REPO_ROOT}/$f" - done - fi - ) & - FIX_PIDS="${FIX_PIDS} $!" - FIX_WORK_DIRS="${FIX_WORK_DIRS} ${WORK_DIR}" - done - - # Wait for all agents to finish - if [[ -n "${FIX_PIDS}" ]]; then - log "Phase 3: Waiting for ${FAILED_CLOUDS} fix agents..." - for pid in ${FIX_PIDS}; do - wait "$pid" 2>/dev/null || true - done - fi - - # Log and clean up work dirs - for cloud in $FAILED_CLOUDS; do - would_commit "git add ${cloud}/ test/fixtures/${cloud}/ test/mock.sh && git commit && git push && gh pr create && gh pr merge" - done - for work_dir in ${FIX_WORK_DIRS}; do - rm -rf "${work_dir}" - done - - log "Phase 3: Fix agents complete" -fi - -check_timeout || exit 0 - -# ============================================================ -# Phase 4: Re-run mock tests + update README (no commit) -# ============================================================ -log "=== Phase 4: Re-run tests and update README ===" - -rm -f "${RESULTS_PHASE4}" -RESULTS_FILE="${RESULTS_PHASE4}" bash test/mock.sh 2>&1 | tee -a "${LOG_FILE}" || true - -RETRY_PASS=0 -RETRY_FAIL=0 -if [[ -f "${RESULTS_PHASE4}" ]]; then - RETRY_PASS=$(grep -c ':pass$' "${RESULTS_PHASE4}" || true) - RETRY_FAIL=$(grep -c ':fail$' "${RESULTS_PHASE4}" || true) - log "Phase 4: ${RETRY_PASS} passed, ${RETRY_FAIL} failed" - - if [[ -f "test/update-readme.py" ]]; then - python3 test/update-readme.py "${RESULTS_PHASE4}" 2>&1 | tee -a "${LOG_FILE}" || true - - if [[ -n "$(git diff --name-only README.md 2>/dev/null)" ]]; then - would_commit "git checkout -b qa/readme-update-\$(date +%s) && git add README.md && git commit && git push && gh pr create && gh pr merge" - # Show the diff but don't commit - git diff README.md > "${DRY_RUN_DIR}/diff-readme.patch" 2>/dev/null || true - # Revert README changes (dry run) - use git restore to avoid checkout pollution - git restore README.md 2>/dev/null || git checkout -- README.md 2>/dev/null || true - log "Phase 4: README diff saved to diff-readme.patch (not committed)" - else - log "Phase 4: No README changes needed" - fi - fi -else - log "Phase 4: No results file generated" -fi - -# ============================================================ -# Phase 5: E2E Tests (optional — requires cloud credentials) -# ============================================================ -E2E_PASS=0 -E2E_FAIL=0 -E2E_SKIPPED=0 - -if [[ -f "${REPO_ROOT}/test/e2e.sh" ]]; then - # Check if any cloud credentials are available - HAS_CLOUD_CREDS=0 - for _var in FLY_API_TOKEN HCLOUD_TOKEN DO_API_TOKEN DAYTONA_API_KEY OVH_APP_KEY; do - if [[ -n "${!_var:-}" ]]; then - HAS_CLOUD_CREDS=1 - break - fi - done - - if [[ "${HAS_CLOUD_CREDS}" -eq 1 ]] && [[ -n "${OPENROUTER_API_KEY:-}" ]]; then - log "=== Phase 5: E2E Tests ===" - - E2E_OUTPUT="${DRY_RUN_DIR}/e2e-output.txt" - E2E_EXIT=0 - # Stream live so failures are visible immediately, not after the full run - E2E_AUTO_FIX=0 bash "${REPO_ROOT}/test/e2e.sh" \ - 2>&1 | tee "${E2E_OUTPUT}" | tee -a "${LOG_FILE}" || E2E_EXIT=$? - - # Count only cloud/agent lines (contain "/"), not pre-flight checkmarks - if [[ -f "${E2E_OUTPUT}" ]]; then - E2E_PASS=$(grep '✓' "${E2E_OUTPUT}" | grep -c '/' 2>/dev/null || true) - E2E_FAIL=$(grep '✗' "${E2E_OUTPUT}" | grep -c '/' 2>/dev/null || true) - fi - - if [[ "${E2E_EXIT}" -eq 0 ]]; then - log "Phase 5: E2E tests passed (${E2E_PASS} passed)" - else - log "Phase 5: E2E tests had ${E2E_FAIL} failure(s), ${E2E_PASS} passed" - fi - - # --- Phase 5b: Fix E2E failures (dry run — copies, no git/PR) --- - if [[ "${E2E_FAIL}" -gt 0 ]] && [[ -f "${E2E_OUTPUT}" ]]; then - check_timeout || true - - log "=== Phase 5b: Fix E2E failures ===" - - # Parse failing combos — only lines with "/" (skip pre-flight) - E2E_FAILED_COMBOS="" - E2E_FAILED_AGENTS="" - while IFS= read -r line; do - clean=$(printf '%s' "$line" | sed 's/\x1b\[[0-9;]*m//g') - case "$clean" in - *"✗ "*"/"*) - combo=$(printf '%s' "$clean" | sed 's/.*✗ //; s/ .*//') - reason=$(printf '%s' "$clean" | sed 's/.*(\(.*\))/\1/' || true) - cloud="${combo%%/*}" - agent="${combo##*/}" - E2E_FAILED_COMBOS="${E2E_FAILED_COMBOS} ${cloud}/${agent}|${reason}" - case " ${E2E_FAILED_AGENTS} " in - *" ${agent} "*) ;; - *) E2E_FAILED_AGENTS="${E2E_FAILED_AGENTS} ${agent}" ;; - esac - ;; - esac - done < "${E2E_OUTPUT}" - E2E_FAILED_COMBOS=$(printf '%s' "${E2E_FAILED_COMBOS}" | sed 's/^ //') - E2E_FAILED_AGENTS=$(printf '%s' "${E2E_FAILED_AGENTS}" | sed 's/^ //') - - if [[ -n "${E2E_FAILED_AGENTS}" ]]; then - log "Phase 5b: Failing agents: ${E2E_FAILED_AGENTS}" - - E2E_FIX_PIDS="" - E2E_FIX_WORK_DIRS="" - - for agent in ${E2E_FAILED_AGENTS}; do - check_timeout || break - - # Collect failing clouds and reasons - failing_clouds="" - failure_summary="" - for entry in ${E2E_FAILED_COMBOS}; do - entry_combo="${entry%%|*}" - entry_reason="${entry#*|}" - entry_agent="${entry_combo##*/}" - entry_cloud="${entry_combo%%/*}" - if [[ "${entry_agent}" == "${agent}" ]]; then - failing_clouds="${failing_clouds} ${entry_cloud}" - failure_summary="${failure_summary} - ${entry_cloud}/${agent}.sh: ${entry_reason}\n" - fi - done - failing_clouds=$(printf '%s' "${failing_clouds}" | sed 's/^ //') - - # Find ALL clouds with this agent - all_clouds_for_agent="" - other_cloud_scripts="" - for cloud_dir in "${REPO_ROOT}"/*/; do - cname=$(basename "${cloud_dir}") - [[ "${cname}" == "shared" || "${cname}" == "cli" || "${cname}" == "test" || "${cname}" == ".claude" || "${cname}" == ".github" || "${cname}" == ".docs" ]] && continue - if [[ -f "${cloud_dir}${agent}.sh" ]]; then - all_clouds_for_agent="${all_clouds_for_agent} ${cname}" - case " ${failing_clouds} " in - *" ${cname} "*) ;; - *) other_cloud_scripts="${other_cloud_scripts} ${cname}/${agent}.sh" ;; - esac - fi - done - all_clouds_for_agent=$(printf '%s' "${all_clouds_for_agent}" | sed 's/^ //') - other_cloud_scripts=$(printf '%s' "${other_cloud_scripts}" | sed 's/^ //') - - fail_count=0 - for _c in ${failing_clouds}; do fail_count=$((fail_count + 1)); done - - log "Phase 5b: Spawning agent for '${agent}' (${fail_count} failure(s), propagating to: ${other_cloud_scripts:-none})" - would_commit "git worktree add ... -b qa/e2e-fix-${agent} origin/main" - - WORK_DIR=$(mktemp -d "/tmp/spawn-qa-dry-XXXXXX") - cp -r "${REPO_ROOT}/." "${WORK_DIR}/" 2>/dev/null || true - ORIG_HEAD=$(cd "${WORK_DIR}" && git rev-parse HEAD 2>/dev/null) || ORIG_HEAD="" - - modify_files="" - for _c in ${all_clouds_for_agent}; do - modify_files="${modify_files} ${_c}/${agent}.sh ${_c}/lib/common.sh" - done - - ( - cd "${WORK_DIR}" - run_with_timeout "${AGENT_TIMEOUT}" claude -p "Fix E2E test failures for agent **${agent}** and propagate fixes to all clouds. - -## E2E Failure Summary -$(printf '%b' "${failure_summary}") -## All clouds with ${agent} -${all_clouds_for_agent} - -## What happened -These scripts were run with real cloud servers (SPAWN_NON_INTERACTIVE=1, no TTY). -A script passes if it prints 'setup completed successfully' before the session step. -Common E2E failure causes: -- Install command fails (wrong package name, missing repo, network timeout) -- Config file written to wrong path or with wrong permissions -- Env var injection missing (OPENROUTER_API_KEY, ANTHROPIC_BASE_URL, etc.) -- Script hangs on an interactive prompt that wasn't guarded by SPAWN_NON_INTERACTIVE -- SSH wait/connect fails (firewall, wrong port, key not imported) - -## Fix Process - -1. **Read each failing script** and its cloud's lib/common.sh. -2. **Compare with working clouds.** Diff the scripts — look for divergence. -3. **Fix the root cause** in each failing script. -4. **Propagate to other clouds:** ${other_cloud_scripts:-"(no other clouds)"} - Only propagate if the same problematic pattern exists. -5. **Validate:** Run bash -n on every modified .sh file. - -You may modify:${modify_files}" \ - 2>&1 | tee -a "${DRY_RUN_DIR}/agent-e2e-fix-${agent}.log" || true - - # Copy changed files back to repo - changed=$(git diff --name-only "${ORIG_HEAD}" 2>/dev/null || true) - uncommitted=$(git status --porcelain 2>/dev/null | sed 's/^.. //' || true) - for f in ${changed} ${uncommitted}; do - [[ -f "$f" ]] || continue - mkdir -p "${REPO_ROOT}/$(dirname "$f")" - cp "$f" "${REPO_ROOT}/$f" - done - ) & - E2E_FIX_PIDS="${E2E_FIX_PIDS} $!" - E2E_FIX_WORK_DIRS="${E2E_FIX_WORK_DIRS} ${WORK_DIR}" - done - - # Wait for all E2E fix agents - for pid in ${E2E_FIX_PIDS}; do - wait "$pid" 2>/dev/null || true - done - - for agent in ${E2E_FAILED_AGENTS}; do - would_commit "git add */\${agent}.sh && git commit && git push && gh pr create && gh pr merge" - done - for work_dir in ${E2E_FIX_WORK_DIRS}; do - rm -rf "${work_dir}" - done - - log "Phase 5b: E2E fix agents complete" - fi - fi - else - E2E_SKIPPED=1 - log "=== Phase 5: E2E Tests (Skipped — no cloud credentials or OPENROUTER_API_KEY) ===" - fi -else - E2E_SKIPPED=1 - log "=== Phase 5: E2E Tests (Skipped — test/e2e.sh not found) ===" -fi - -check_timeout || exit 0 - -# ============================================================ -# Summary -# ============================================================ -log "" -log "=== QA Dry Run Summary ===" -log "Phase 0.5 (lint): ${LINT_ERRORS:-0} error(s) / ${LINT_WARNS:-0} warning(s)" -log "Phase 2 (initial): ${PASS_COUNT:-0} pass / ${FAIL_COUNT:-0} fail" -log "Phase 4 (after fix): ${RETRY_PASS:-0} pass / ${RETRY_FAIL:-0} fail" -if [[ "${FAIL_COUNT:-0}" -gt 0 ]] && [[ "${RETRY_FAIL:-0}" -lt "${FAIL_COUNT:-0}" ]]; then - FIXED=$(( ${FAIL_COUNT:-0} - ${RETRY_FAIL:-0} )) - log "Fixed ${FIXED} failure(s) this cycle" -fi -if [[ "${E2E_SKIPPED:-0}" -eq 0 ]]; then - log "Phase 5 (e2e): ${E2E_PASS:-0} pass / ${E2E_FAIL:-0} fail" -else - log "Phase 5 (e2e): skipped" -fi -log "" -log "Output files:" -log " ${DRY_RUN_DIR}/qa-dry-run.log — full log" -log " ${DRY_RUN_DIR}/macos-compat-output.txt — macOS compat lint output" -log " ${DRY_RUN_DIR}/results-phase2.txt — mock test results (initial)" -log " ${DRY_RUN_DIR}/results-phase4.txt — mock test results (after fixes)" -log " ${DRY_RUN_DIR}/would-commit.txt — git/gh commands that would have run" - -# List patch files -PATCH_COUNT=0 -for pf in "${DRY_RUN_DIR}"/diff-*.patch; do - [[ -f "$pf" ]] || continue - if [[ -s "$pf" ]]; then - log " $(basename "$pf") — $(wc -l < "$pf" | tr -d ' ') lines" - PATCH_COUNT=$((PATCH_COUNT + 1)) - fi -done -if [[ "$PATCH_COUNT" -eq 0 ]]; then - log " (no patches generated)" -fi - -log "" -log "=== QA Dry Run Complete ===" diff --git a/test/record.sh b/test/record.sh deleted file mode 100644 index 5416cefc..00000000 --- a/test/record.sh +++ /dev/null @@ -1,959 +0,0 @@ -#!/bin/bash -# Record real API responses from cloud providers as test fixtures -# -# Hits safe GET-only endpoints using each cloud's existing API wrapper, -# validates the response, and saves it as pretty-printed JSON. -# -# Usage: -# bash test/record.sh hetzner # Record one cloud -# bash test/record.sh hetzner digitalocean # Record multiple -# bash test/record.sh all # All clouds with available credentials -# bash test/record.sh --list # Show recordable clouds + credential status - -set -eo pipefail - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -FIXTURES_DIR="${REPO_ROOT}/test/fixtures" - -# Sandbox: Use test-specific config directory if TEST_CONFIG_DIR is set -# This prevents polluting production ~/.config/spawn/ during tests -if [[ -n "${TEST_CONFIG_DIR:-}" ]]; then - export HOME="${TEST_CONFIG_DIR}" -fi - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -CYAN='\033[0;36m' -NC='\033[0m' - -# Counters -RECORDED=0 -SKIPPED=0 -ERRORS=0 - -# Whether to prompt for missing credentials (set by 'all' vs 'allsaved') -PROMPT_FOR_CREDS=true - -# All clouds with REST APIs that we can record from -ALL_RECORDABLE_CLOUDS="hetzner digitalocean fly" - -# --- Endpoint registry --- -# Declare endpoints as string literal for each cloud -# Format: "fixture_name:endpoint" (one per line, indented) -_ENDPOINTS_hetzner=" -server_types:/server_types?per_page=50 -locations:/locations -ssh_keys:/ssh_keys -servers:/servers -" - -_ENDPOINTS_digitalocean=" -account:/account -ssh_keys:/account/keys -droplets:/droplets -sizes:/sizes -regions:/regions -" - - -_ENDPOINTS_fly=" -apps:/apps?org_slug=personal -" - -get_endpoints() { - local cloud="$1" - local var_name="_ENDPOINTS_${cloud}" - if [[ -n "${!var_name:-}" ]]; then - printf '%s\n' "${!var_name}" | grep -v '^$' - fi -} - -# --- Multi-credential cloud specs --- -# Returns "config_key:env_var" pairs (one per line) for multi-credential clouds. -# Single-credential clouds return nothing (handled by get_auth_env_var). -_get_multi_cred_spec() { - local cloud="$1" - case "$cloud" in - esac -} - -# Load multiple fields from a JSON config file and export as env vars. -# Arguments: CONFIG_FILE SPEC... (each spec is "config_key:ENV_VAR") -_load_multi_config_from_file() { - local config_file="$1"; shift - [[ -f "$config_file" ]] || return 1 - - local config_keys=() env_vars=() - local spec - for spec in "$@"; do - config_keys+=("${spec%%:*}") - env_vars+=("${spec#*:}") - done - - local vals - vals=$(python3 -c " -import json, sys -try: - d = json.load(open(sys.argv[1])) - print('\t'.join(d.get(k, '') for k in sys.argv[2:])) -except: pass -" "$config_file" "${config_keys[@]}" 2>/dev/null) || return 1 - - [[ -n "${vals:-}" ]] || return 1 - - local IFS=$'\t' - local fields - read -ra fields <<< "$vals" - - local i - for i in "${!env_vars[@]}"; do - if [[ -n "${fields[$i]:-}" ]]; then - # SECURITY: Validate env var name before export - if [[ ! "${env_vars[$i]}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - echo "SECURITY: Invalid env var name rejected: ${env_vars[$i]}" >&2 - return 1 - fi - export "${env_vars[$i]}=${fields[$i]}" - fi - done -} - -# Save multiple env vars to a JSON config file. -# Arguments: CONFIG_FILE SPEC... (each spec is "config_key:ENV_VAR") -_save_multi_config_to_file() { - local config_file="$1"; shift - - local py_args=() - local py_keys="" - local idx=1 - local spec - for spec in "$@"; do - local config_key="${spec%%:*}" - local env_var="${spec#*:}" - local val="${!env_var:-}" - py_args+=("$val") - py_keys="${py_keys}'${config_key}': sys.argv[${idx}], " - idx=$((idx + 1)) - done - - python3 -c " -import json, sys -print(json.dumps({${py_keys}}, indent=2)) -" "${py_args[@]}" > "$config_file" -} - -# --- Auth env var check --- -get_auth_env_var() { - local cloud="$1" - case "$cloud" in - hetzner) printf "HCLOUD_TOKEN" ;; - digitalocean) printf "DO_API_TOKEN" ;; - fly) printf "FLY_API_TOKEN" ;; - esac -} - -# Try loading token from ~/.config/spawn/{cloud}.json (same config the agent scripts use) -# Load a single API token from JSON config and export it. -# Arguments: ENV_VAR CONFIG_FILE -_load_single_token_config() { - local env_var="$1" - local config_file="$2" - - [[ -f "$config_file" ]] || return 0 - - # SECURITY: Validate env var name before export - if [[ ! "${env_var}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - echo "SECURITY: Invalid env var name rejected: ${env_var}" >&2 - return 1 - fi - - local token - token=$(python3 -c "import json,sys; d=json.load(open(sys.argv[1])); print(d.get('api_key','') or d.get('token',''))" "$config_file" 2>/dev/null) || true - if [[ -n "${token:-}" ]]; then - export "${env_var}=${token}" - fi -} - -try_load_config() { - local cloud="$1" - local env_var - env_var=$(get_auth_env_var "$cloud") - - # Already set via env var — nothing to do - local current_val="${!env_var:-}" - if [[ -n "$current_val" ]]; then - return 0 - fi - - local config_file="$HOME/.config/spawn/${cloud}.json" - - # Multi-credential clouds (OVH, etc.) - local specs - specs=$(_get_multi_cred_spec "$cloud") - if [[ -n "$specs" ]]; then - local spec_args=() - while IFS= read -r line; do - spec_args+=("$line") - done <<< "$specs" - _load_multi_config_from_file "$config_file" "${spec_args[@]}" || true - return 0 - fi - - # Standard single-token config - _load_single_token_config "$env_var" "$config_file" -} - -has_credentials() { - local cloud="$1" - - # Try loading from config file first - try_load_config "$cloud" - - # Multi-credential clouds: check all env vars from spec - local specs - specs=$(_get_multi_cred_spec "$cloud") - if [[ -n "$specs" ]]; then - local line - while IFS= read -r line; do - local env_var="${line#*:}" - [[ -n "${!env_var:-}" ]] || return 1 - done <<< "$specs" - return 0 - fi - - # Single-credential clouds - local env_var - env_var=$(get_auth_env_var "$cloud") - [[ -n "${!env_var:-}" ]] -} - -# Save credentials to ~/.config/spawn/{cloud}.json for future use -save_config() { - local cloud="$1" - local config_dir="$HOME/.config/spawn" - local config_file="${config_dir}/${cloud}.json" - mkdir -p "$config_dir" - - # Multi-credential clouds - local specs - specs=$(_get_multi_cred_spec "$cloud") - if [[ -n "$specs" ]]; then - local spec_args=() - while IFS= read -r line; do - spec_args+=("$line") - done <<< "$specs" - _save_multi_config_to_file "$config_file" "${spec_args[@]}" - else - # Standard single-token config - local env_var - env_var=$(get_auth_env_var "$cloud") - local val="${!env_var:-}" - python3 -c "import json, sys; print(json.dumps({'api_key': sys.argv[1]}, indent=2))" "$val" > "$config_file" - fi - printf '%b\n' " ${GREEN}saved${NC} → ${config_file}" -} - -# Prompt user for missing credentials, export them, and save to config -prompt_credentials() { - local cloud="$1" - local vars_needed="" - local val="" - - # Multi-credential clouds: extract env var names from spec - local specs - specs=$(_get_multi_cred_spec "$cloud") - if [[ -n "$specs" ]]; then - local line - while IFS= read -r line; do - vars_needed="${vars_needed} ${line#*:}" - done <<< "$specs" - else - vars_needed=$(get_auth_env_var "$cloud") - fi - - for var_name in $vars_needed; do - # SECURITY: Validate env var name before using in indirect expansion or export - if [[ ! "${var_name}" =~ ^[A-Z_][A-Z0-9_]*$ ]]; then - echo "SECURITY: Invalid env var name rejected: ${var_name}" >&2 - return 1 - fi - local current="${!var_name:-}" - if [[ -n "$current" ]]; then - continue - fi - printf " Enter %s (press Enter to skip %s): " "$var_name" "$cloud" >&2 - read -r val - if [[ -z "$val" ]]; then - return 1 - fi - export "${var_name}=${val}" - done - - # Save so they don't have to enter again - save_config "$cloud" - return 0 -} - -# --- API call dispatcher --- -# Each cloud sources its lib and calls its wrapper function -call_api() { - local cloud="$1" - local endpoint="$2" - case "$cloud" in - hetzner) hetzner_api GET "$endpoint" ;; - digitalocean) do_api GET "$endpoint" ;; - fly) curl -fsSL -H "Authorization: ${FLY_API_TOKEN}" "https://api.machines.dev/v1${endpoint}" ;; - esac -} - -# --- Validation --- -is_valid_json() { - python3 -c "import json,sys; json.loads(sys.stdin.read())" 2>/dev/null -} - -has_api_error() { - local cloud="$1" - local response="$2" - - _RESPONSE="$response" _CLOUD="$cloud" python3 << 'VALIDATION_EOF' 2>/dev/null -import json, sys, os -d = json.loads(os.environ['_RESPONSE']) -cloud = os.environ['_CLOUD'] - -# Helper: data keys that indicate success responses (not errors) -success_keys = {'servers','images','ssh_keys','flavors','sizes','regions','count','results','id','name','slug','status','ipv4'} - -error_checks = { - 'hetzner': lambda d: d.get('error') and isinstance(d.get('error'), dict), - 'digitalocean': lambda d: 'id' in d and isinstance(d.get('id'), str) and 'message' in d, - 'fly': lambda d: 'error' in d and isinstance(d.get('error'), str), -} - -if cloud in error_checks: - sys.exit(0 if error_checks[cloud](d) else 1) -else: - sys.exit(1) -VALIDATION_EOF -} - -# --- Pretty print JSON --- -pretty_json() { - python3 -c "import json,sys; print(json.dumps(json.loads(sys.stdin.read()), indent=2, sort_keys=True))" -} - -# --- Live create+delete cycle (captures real POST/DELETE responses) --- -# Creates a server with a timestamped name, records the response, then deletes it. -# These functions access cloud_recorded, cloud_errors, metadata_entries from the -# calling scope (record_cloud) via bash dynamic scoping — no namerefs needed. -_record_live_cycle() { - local cloud="$1" - local fixture_dir="$2" - - # Source cloud lib so API wrappers are available (dynamic scoping - # lets _live_* functions update caller's counters/metadata) - source "${REPO_ROOT}/${cloud}/lib/common.sh" 2>/dev/null || true - - case "$cloud" in - hetzner) _live_hetzner "$fixture_dir" ;; - digitalocean) _live_digitalocean "$fixture_dir" ;; - fly) _live_fly "$fixture_dir" ;; - *) return 0 ;; # No live cycle for this cloud yet - esac -} - -# Validate response is not empty -_validate_response_not_empty() { - local fixture_name="$1" - local response="$2" - if [[ -z "$response" ]]; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — empty response" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - return 0 -} - -# Validate response is valid JSON -_validate_response_json() { - local fixture_name="$1" - local response="$2" - if ! echo "$response" | is_valid_json; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — invalid JSON" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - return 0 -} - -# Validate response is not an API error -_validate_response_no_error() { - local fixture_name="$1" - local response="$2" - if has_api_error "$cloud" "$response"; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — API error response" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - return 0 -} - -# Record fixture metadata entry -_record_fixture_metadata() { - local fixture_name="$1" - local endpoint="$2" - local ts - ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - metadata_entries="${metadata_entries} \"${fixture_name}\": {\"endpoint\": \"${endpoint}\", \"type\": \"live\", \"recorded_at\": \"${ts}\"}, -" -} - -# Save a live fixture and update the caller's counters/metadata -_save_live_fixture() { - local fixture_dir="$1" - local fixture_name="$2" - local endpoint="$3" - local response="$4" - - _validate_response_not_empty "$fixture_name" "$response" || return 1 - _validate_response_json "$fixture_name" "$response" || return 1 - _validate_response_no_error "$fixture_name" "$response" || return 1 - - echo "$response" | pretty_json > "${fixture_dir}/${fixture_name}.json" - printf '%b\n' " ${GREEN} ok${NC} ${fixture_name} (live)" - - _record_fixture_metadata "$fixture_name" "$endpoint" - cloud_recorded=$((cloud_recorded + 1)) - return 0 -} - -# Generic live create+delete cycle for any cloud provider. -# Calls a per-cloud builder function that prints the API body to stdout, -# then runs the shared create -> save -> extract-id -> delete -> save flow. -# -# Usage: _live_create_delete_cycle FIXTURE_DIR API_FUNC CREATE_ENDPOINT \ -# DELETE_ENDPOINT_TEMPLATE ID_PY_EXPR BUILDER_FUNC \ -# [DELETE_DELAY] [EMPTY_DELETE_FALLBACK] -# -# Arguments: -# FIXTURE_DIR - Directory for fixture JSON files -# API_FUNC - Cloud API function (e.g., "hetzner_api") -# CREATE_ENDPOINT - POST endpoint (e.g., "/servers") -# DELETE_ENDPOINT_TEMPLATE - DELETE endpoint with {id} placeholder -# ID_PY_EXPR - Python expression to extract ID from response (receives 'd') -# BUILDER_FUNC - Function that prints the JSON create body to stdout -# DELETE_DELAY - Seconds to sleep before delete (default: 3) -# EMPTY_DELETE_FALLBACK - JSON to use when DELETE returns empty body (optional) -# Extract resource ID from API response using Python expression -# Sets global resource_id; returns 0 on success, 1 on failure -_extract_resource_id() { - local response="$1" id_py_expr="$2" - - resource_id=$(echo "$response" | python3 -c "import json,sys; d=json.loads(sys.stdin.read()); print(${id_py_expr})" 2>/dev/null) || true - - if [[ -z "${resource_id:-}" ]]; then - printf '%b\n' " ${RED}fail${NC} Could not extract resource ID from create response" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - - return 0 -} - -# Handle delete response, using fallback if empty -_handle_delete_response() { - local response="$1" empty_delete_fallback="$2" - - if [[ -z "$response" && -n "$empty_delete_fallback" ]]; then - echo "$empty_delete_fallback" - else - echo "$response" - fi -} - -_live_create_delete_cycle() { - local fixture_dir="$1" - local api_func="$2" - local create_endpoint="$3" - local delete_endpoint_template="$4" - local id_py_expr="$5" - local builder_func="$6" - local delete_delay="${7:-3}" - local empty_delete_fallback="${8:-}" - - local body - body=$("${builder_func}" "${fixture_dir}") || return 0 - - local create_response - create_response=$("${api_func}" POST "${create_endpoint}" "$body") - - _save_live_fixture "$fixture_dir" "create_server" "POST ${create_endpoint}" "$create_response" || { - printf '%b\n' " ${RED}fail${NC} Could not create — skipping delete fixture" - return 0 - } - - local resource_id - _extract_resource_id "$create_response" "$id_py_expr" || return 0 - - printf '%b\n' " ${CYAN}live${NC} Created (ID: ${resource_id}). Deleting..." - sleep "$delete_delay" - - local delete_endpoint="${delete_endpoint_template/\{id\}/${resource_id}}" - local delete_response - delete_response=$("${api_func}" DELETE "${delete_endpoint}") - - delete_response=$(_handle_delete_response "$delete_response" "$empty_delete_fallback") - - _save_live_fixture "$fixture_dir" "delete_server" "DELETE ${delete_endpoint_template}" "$delete_response" - printf '%b\n' " ${CYAN}live${NC} Resource ${resource_id} deleted" -} - -# --- Per-cloud body builders --- -# Each prints the JSON create body to stdout and logs setup info to stderr. - -_live_hetzner_body() { - local fixture_dir="$1" - local name="spawn-record-$(date +%s)" - printf '%b\n' " ${CYAN}live${NC} Creating test server '${name}' (cx23, nbg1)..." >&2 - - local ssh_keys_response - ssh_keys_response=$(hetzner_api GET "/ssh_keys") - local ssh_key_ids - ssh_key_ids=$(echo "$ssh_keys_response" | python3 -c " -import json, sys -d = json.loads(sys.stdin.read()) -print(json.dumps([k['id'] for k in d.get('ssh_keys', [])])) -" 2>/dev/null) || ssh_key_ids="[]" - - python3 -c " -import json, sys -print(json.dumps({ - 'name': sys.argv[1], 'server_type': 'cx23', 'location': 'nbg1', - 'image': 'ubuntu-24.04', 'ssh_keys': json.loads(sys.argv[2]), - 'start_after_create': True -})) -" "$name" "$ssh_key_ids" -} - -_live_hetzner() { - _live_create_delete_cycle "$1" hetzner_api "/servers" "/servers/{id}" \ - "d['server']['id']" _live_hetzner_body 2 -} - -_live_digitalocean_body() { - local fixture_dir="$1" - local name="spawn-record-$(date +%s)" - printf '%b\n' " ${CYAN}live${NC} Creating test droplet '${name}' (s-1vcpu-512mb-10gb, nyc3)..." >&2 - - local ssh_keys_response - ssh_keys_response=$(do_api GET "/account/keys") - local ssh_key_ids - ssh_key_ids=$(echo "$ssh_keys_response" | python3 -c " -import json, sys -d = json.loads(sys.stdin.read()) -print(json.dumps([k['id'] for k in d.get('ssh_keys', [])])) -" 2>/dev/null) || ssh_key_ids="[]" - - python3 -c " -import json, sys -print(json.dumps({ - 'name': sys.argv[1], 'region': 'nyc3', 'size': 's-1vcpu-512mb-10gb', - 'image': 'ubuntu-24-04-x64', 'ssh_keys': json.loads(sys.argv[2]) -})) -" "$name" "$ssh_key_ids" -} - -_live_digitalocean() { - _live_create_delete_cycle "$1" do_api "/droplets" "/droplets/{id}" \ - "d['droplet']['id']" _live_digitalocean_body 3 \ - '{"status":"deleted","http_code":204}' -} - -_live_fly_body() { - local fixture_dir="$1" - local name="spawn-record-$(date +%s)" - printf '%b\n' " ${CYAN}live${NC} Creating test app+machine '${name}' (shared-cpu-1x, iad)..." >&2 - - python3 -c " -import json, sys -print(json.dumps({ - 'name': sys.argv[1], 'region': 'iad', - 'config': { - 'image': 'ubuntu:24.04', 'auto_destroy': True, - 'guest': {'cpu_kind': 'shared', 'cpus': 1, 'memory_mb': 256} - } -})) -" "$name" -} - -_live_fly() { - local fixture_dir="$1" - local name="spawn-record-$(date +%s)" - local fly_api_base="https://api.machines.dev/v1" - local auth_header="Authorization: ${FLY_API_TOKEN}" - - # Detect FlyV1 tokens (dashboard/deploy tokens use FlyV1 scheme, not Bearer) - if [[ "$FLY_API_TOKEN" == FlyV1\ * ]]; then - auth_header="Authorization: ${FLY_API_TOKEN}" - else - auth_header="Authorization: Bearer ${FLY_API_TOKEN}" - fi - - # Create app - printf '%b\n' " ${CYAN}live${NC} Creating Fly.io app '${name}'..." - local app_resp - app_resp=$(curl -fsSL -X POST "${fly_api_base}/apps" \ - -H "${auth_header}" \ - -H "Content-Type: application/json" \ - -d "{\"app_name\":\"${name}\",\"org_slug\":\"personal\"}") || true - - if [[ -n "$app_resp" ]]; then - _save_live_fixture "$fixture_dir" "create_app" "POST /apps" "$app_resp" || { - printf '%b\n' " ${RED}fail${NC} App creation failed — skipping machine" - return 0 - } - fi - - # Create machine - local body - body=$(_live_fly_body "$fixture_dir") - local machine_resp - machine_resp=$(curl -fsSL -X POST "${fly_api_base}/apps/${name}/machines" \ - -H "${auth_header}" \ - -H "Content-Type: application/json" \ - -d "$body") || true - - _save_live_fixture "$fixture_dir" "create_server" "POST /apps/{name}/machines" "$machine_resp" || { - # Cleanup app even if machine failed - curl -fsSL -X DELETE "${fly_api_base}/apps/${name}" -H "${auth_header}" >/dev/null 2>&1 || true - return 0 - } - - local machine_id - machine_id=$(echo "$machine_resp" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])" 2>/dev/null) || true - - # Cleanup: stop + delete machine, delete app - printf '%b\n' " ${CYAN}live${NC} Cleaning up..." - if [[ -n "$machine_id" ]]; then - curl -fsSL -X POST "${fly_api_base}/apps/${name}/machines/${machine_id}/stop" \ - -H "${auth_header}" >/dev/null 2>&1 || true - sleep 3 - local del_resp - del_resp=$(curl -fsSL -X DELETE "${fly_api_base}/apps/${name}/machines/${machine_id}?force=true" \ - -H "${auth_header}" 2>/dev/null) || true - if [[ -n "$del_resp" ]]; then - _save_live_fixture "$fixture_dir" "delete_server" "DELETE /apps/{name}/machines/{id}" "$del_resp" || true - fi - fi - curl -fsSL -X DELETE "${fly_api_base}/apps/${name}" -H "${auth_header}" >/dev/null 2>&1 || true - printf '%b\n' " ${CYAN}live${NC} Cleanup complete" -} - -# --- Record one cloud --- -# Check credentials and prompt if needed; returns 1 to skip this cloud -_record_ensure_credentials() { - local cloud="$1" - if has_credentials "$cloud"; then - return 0 - fi - - local env_var - env_var=$(get_auth_env_var "$cloud") - if [[ "$PROMPT_FOR_CREDS" == "true" ]]; then - printf '%b\n' "${CYAN}━━━ ${cloud} ━━━${NC}" - printf '%b\n' " ${YELLOW}missing${NC} ${env_var}" - if prompt_credentials "$cloud"; then - return 0 - fi - printf '%b\n' " ${YELLOW}skip${NC} ${cloud}" - else - printf '%b\n' " ${YELLOW}skip${NC} ${cloud} — ${env_var} not set" - fi - SKIPPED=$((SKIPPED + 1)) - return 1 -} - -# Record a single endpoint fixture; increments cloud_recorded/cloud_errors -# Usage: _record_endpoint CLOUD FIXTURE_DIR FIXTURE_NAME ENDPOINT -# Validate API response and report errors -# Returns 0 if valid, 1 if invalid/error -_validate_endpoint_response() { - local cloud="$1" fixture_name="$2" response="$3" - - if [[ -z "$response" ]]; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — empty response" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - - if ! echo "$response" | is_valid_json; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — invalid JSON" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - - if has_api_error "$cloud" "$response"; then - printf '%b\n' " ${RED}fail${NC} ${fixture_name} — API error response" - cloud_errors=$((cloud_errors + 1)) - return 1 - fi - - return 0 -} - -# Record endpoint response to fixture file and update metadata -_save_endpoint_fixture() { - local fixture_dir="$1" fixture_name="$2" endpoint="$3" response="$4" - - echo "$response" | pretty_json > "${fixture_dir}/${fixture_name}.json" - printf '%b\n' " ${GREEN} ok${NC} ${fixture_name} → fixtures/${cloud}/${fixture_name}.json" - cloud_recorded=$((cloud_recorded + 1)) - - local timestamp - timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - metadata_entries="${metadata_entries} \"${fixture_name}\": {\"endpoint\": \"${endpoint}\", \"recorded_at\": \"${timestamp}\"}, -" -} - -_record_endpoint() { - local cloud="$1" fixture_dir="$2" fixture_name="$3" endpoint="$4" - - # Call API in a subshell that sources the cloud lib - local tmp_response - tmp_response=$(mktemp /tmp/spawn-record-XXXXXX) - - ( - source "${REPO_ROOT}/${cloud}/lib/common.sh" 2>/dev/null - call_api "$cloud" "$endpoint" 2>/dev/null - ) > "$tmp_response" 2>/dev/null || true - - local response - response=$(cat "$tmp_response") - rm -f "$tmp_response" - - _validate_endpoint_response "$cloud" "$fixture_name" "$response" || return 0 - _save_endpoint_fixture "$fixture_dir" "$fixture_name" "$endpoint" "$response" -} - -# Write the _metadata.json file for a cloud's fixtures -_record_write_metadata() { - local cloud="$1" fixture_dir="$2" - - local meta_timestamp - meta_timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - - # Remove trailing comma and newline from metadata_entries - metadata_entries=$(printf '%s' "$metadata_entries" | sed '$ s/,$//') - - cat > "${fixture_dir}/_metadata.json" << METADATA_EOF -{ - "cloud": "${cloud}", - "recorded_at": "${meta_timestamp}", - "fixtures": { -${metadata_entries} - } -} -METADATA_EOF -} - -record_cloud() { - local cloud="$1" - - _record_ensure_credentials "$cloud" || return 0 - - printf '%b\n' "${CYAN}━━━ Recording ${cloud} ━━━${NC}" - - local fixture_dir="${FIXTURES_DIR}/${cloud}" - mkdir -p "$fixture_dir" - - local endpoints - endpoints=$(get_endpoints "$cloud") - - local cloud_recorded=0 - local cloud_errors=0 - local metadata_entries="" - - while IFS=: read -r fixture_name endpoint; do - [[ -z "$fixture_name" ]] && continue - _record_endpoint "$cloud" "$fixture_dir" "$fixture_name" "$endpoint" - done <<< "$endpoints" - - # Live create+delete cycle for write endpoint fixtures - _record_live_cycle "$cloud" "$fixture_dir" cloud_recorded cloud_errors metadata_entries || true - - _record_write_metadata "$cloud" "$fixture_dir" - - RECORDED=$((RECORDED + cloud_recorded)) - ERRORS=$((ERRORS + cloud_errors)) - - if [[ "$cloud_errors" -eq 0 ]]; then - printf '%b\n' " ${GREEN}done${NC} ${cloud_recorded} fixtures recorded" - else - printf '%b\n' " ${YELLOW}done${NC} ${cloud_recorded} recorded, ${cloud_errors} failed" - fi - printf '\n' -} - -# Format env var name for list display -# Args: cloud -_format_env_var_display() { - local cloud="$1" - local env_var - env_var=$(get_auth_env_var "$cloud") - - # For multi-var clouds, show required env vars from spec - local specs - specs=$(_get_multi_cred_spec "$cloud") - if [[ -n "$specs" ]]; then - local first_var var_count - first_var=$(head -1 <<< "$specs") - first_var="${first_var#*:}" - var_count=$(wc -l <<< "$specs" | tr -d ' ') - if [[ "$var_count" -gt 1 ]]; then - env_var="${first_var} + $((var_count - 1)) more" - else - env_var="$first_var" - fi - fi - printf '%s' "$env_var" -} - -# --- List mode --- -list_clouds() { - printf '%b\n' "${CYAN}Recordable clouds:${NC}" - printf '\n' - printf " %-15s %-30s %s\n" "CLOUD" "AUTH ENV VAR" "STATUS" - printf " %-15s %-30s %s\n" "-----" "------------" "------" - - local ready_count=0 - for cloud in $ALL_RECORDABLE_CLOUDS; do - local env_var - env_var=$(_format_env_var_display "$cloud") - local status - - if has_credentials "$cloud"; then - status=$(printf '%b' "${GREEN}ready${NC}") - ready_count=$((ready_count + 1)) - else - status=$(printf '%b' "${RED}not set${NC}") - fi - - printf " %-15s %-30s %b\n" "$cloud" "$env_var" "$status" - done - - printf '\n' - local total_count - total_count=$(echo "$ALL_RECORDABLE_CLOUDS" | wc -w | tr -d ' ') - printf '%b\n' " ${ready_count}/${total_count} clouds have credentials set" - printf '\n' - printf " CLI-based clouds (not recordable): sprite, gcp, daytona, aws, local\n" -} - -# --- Main --- -printf '%b\n' "${CYAN}===============================${NC}" -printf '%b\n' "${CYAN} Spawn API Response Recorder${NC}" -printf '%b\n' "${CYAN}===============================${NC}" -printf '\n' - -if [[ $# -eq 0 ]]; then - printf "Usage:\n" - printf " bash test/record.sh CLOUD [CLOUD...] Record fixtures for specified clouds\n" - printf " bash test/record.sh all Record all clouds (prompts for missing keys)\n" - printf " bash test/record.sh allsaved Record clouds that already have keys saved\n" - printf " bash test/record.sh --list Show recordable clouds\n" - printf '\n' - exit 0 -fi - -case "$1" in - --list|-l) - list_clouds - exit 0 - ;; - --help|-h) - printf "Usage:\n" - printf " bash test/record.sh CLOUD [CLOUD...] Record fixtures for specified clouds\n" - printf " bash test/record.sh all Record all clouds with credentials\n" - printf " bash test/record.sh --list Show recordable clouds\n" - printf '\n' - exit 0 - ;; -esac - -# Determine which clouds to record -CLOUDS_TO_RECORD="" -if [[ "$1" == "all" ]]; then - CLOUDS_TO_RECORD="$ALL_RECORDABLE_CLOUDS" -elif [[ "$1" == "allsaved" ]]; then - PROMPT_FOR_CREDS=false - CLOUDS_TO_RECORD="$ALL_RECORDABLE_CLOUDS" -else - CLOUDS_TO_RECORD="$*" -fi - -# Validate cloud names -for cloud in $CLOUDS_TO_RECORD; do - if ! echo "$ALL_RECORDABLE_CLOUDS" | grep -qw "$cloud"; then - printf '%b\n' "${RED}Unknown cloud: ${cloud}${NC}" - printf "Recordable clouds: %s\n" "$ALL_RECORDABLE_CLOUDS" - exit 1 - fi -done - -printf "Fixtures dir: %s\n" "$FIXTURES_DIR" -printf "Clouds: %s\n" "$CLOUDS_TO_RECORD" -printf '\n' - -mkdir -p "$FIXTURES_DIR" - -# --- Run clouds in parallel --- -RECORD_RESULTS_DIR=$(mktemp -d) -RECORD_PIDS="" - -for cloud in $CLOUDS_TO_RECORD; do - ( - # Reset counters for this cloud (subshell isolation) - RECORDED=0 - SKIPPED=0 - ERRORS=0 - record_cloud "$cloud" - printf '%d %d %d\n' "$RECORDED" "$SKIPPED" "$ERRORS" > "${RECORD_RESULTS_DIR}/${cloud}.counts" - ) > "${RECORD_RESULTS_DIR}/${cloud}.log" 2>&1 & - RECORD_PIDS="${RECORD_PIDS} $!" -done - -# Wait for all clouds to finish -for pid in $RECORD_PIDS; do - wait "$pid" 2>/dev/null || true -done - -# Print output from each cloud (in order) -for cloud in $CLOUDS_TO_RECORD; do - if [[ -f "${RECORD_RESULTS_DIR}/${cloud}.log" ]]; then - cat "${RECORD_RESULTS_DIR}/${cloud}.log" - fi -done - -# Aggregate results -for cloud in $CLOUDS_TO_RECORD; do - if [[ -f "${RECORD_RESULTS_DIR}/${cloud}.counts" ]]; then - read -r r s e < "${RECORD_RESULTS_DIR}/${cloud}.counts" - RECORDED=$((RECORDED + r)) - SKIPPED=$((SKIPPED + s)) - ERRORS=$((ERRORS + e)) - fi -done - -rm -rf "${RECORD_RESULTS_DIR}" - -# --- Summary --- -printf '%b\n' "${CYAN}===============================${NC}" -TOTAL=$((RECORDED + SKIPPED + ERRORS)) -printf '%b\n' " Results: ${GREEN}${RECORDED} recorded${NC}, ${YELLOW}${SKIPPED} skipped${NC}, ${RED}${ERRORS} failed${NC}" -printf '%b\n' "${CYAN}===============================${NC}" - -if [[ "$ERRORS" -gt 0 ]]; then - exit 1 -fi -exit 0 diff --git a/test/run.sh b/test/run.sh deleted file mode 100644 index a4a173d9..00000000 --- a/test/run.sh +++ /dev/null @@ -1,755 +0,0 @@ -#!/bin/bash -# shellcheck disable=SC2154 -# Test harness for spawn scripts -# -# Tests the shared library and cloud provider scripts: -# 1. shared/common.sh sources correctly (local + remote) -# 2. All shared functions resolve -# 3. Env var flow works (OPENROUTER_API_KEY) -# 4. Temp files are created and cleaned up -# 5. Each script reaches its final launch command -# -# Note: sprite/ cloud provider was converted to TypeScript (PR #1692). -# The sprite/*.sh files are now thin shims that exec bun - shell-level -# integration tests for sprite are covered by bun test instead. -# -# Usage: -# bash test/run.sh # test all scripts -# bash test/run.sh claude # test one script -# bash test/run.sh --remote # test remote source (from GitHub) - -set -eo pipefail - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -TEST_DIR=$(mktemp -d) -MOCK_LOG="${TEST_DIR}/sprite_calls.log" -PASSED=0 -FAILED=0 -FILTER="${1:-}" -REMOTE=false - -if [[ "${FILTER}" == "--remote" ]]; then - REMOTE=true - FILTER="${2:-}" -fi - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -cleanup() { - rm -rf "${TEST_DIR}" - # Clean up any /tmp pollution from mock sprite state files and spawn temp files - rm -f /tmp/sprite_mock_created_* /tmp/sprite_mock_created 2>/dev/null || true - rm -f /tmp/spawn_* 2>/dev/null || true -} -trap 'cleanup' EXIT - -# --- Mock sprite CLI --- -# Records every call to a log, returns success for expected commands -setup_mocks() { - export TEST_DIR - cat > "${TEST_DIR}/sprite" << 'MOCK' -#!/bin/bash -echo "sprite $*" >> "${MOCK_LOG}" - -case "$1" in - org) exit 0 ;; # auth check passes - list) - echo "existing-sprite" - # After create, also return the test sprite name so provisioning poll succeeds - if [[ -f "${TEST_DIR}/sprite_mock_created" ]]; then - echo "${SPRITE_NAME:-}" - fi - exit 0 - ;; - create) - touch "${TEST_DIR}/sprite_mock_created" - exit 0 - ;; - exec) - # If there's a -file flag, just pretend to upload - if [[ "$*" == *"-file"* ]]; then - exit 0 - fi - # If -tty, this is the final interactive launch — signal success and exit - if [[ "$*" == *"-tty"* ]]; then - echo "[MOCK] Would launch interactive session: $*" >> "${MOCK_LOG}" - exit 0 - fi - # Regular exec — just succeed - exit 0 - ;; - login) exit 0 ;; - *) exit 0 ;; -esac -MOCK - chmod +x "${TEST_DIR}/sprite" -} - -# --- Mock other commands that shouldn't run for real --- -setup_extra_mocks() { - # mock claude (for claude.sh install step) - cat > "${TEST_DIR}/claude" << 'MOCK' -#!/bin/bash -echo "claude $*" >> "${MOCK_LOG}" -exit 0 -MOCK - chmod +x "${TEST_DIR}/claude" - - # mock openssl - cat > "${TEST_DIR}/openssl" << 'MOCK' -#!/bin/bash -echo "mock-gateway-token-abc123" -MOCK - chmod +x "${TEST_DIR}/openssl" - - # mock sleep to avoid polling delays - cat > "${TEST_DIR}/sleep" << 'MOCK' -#!/bin/bash -exit 0 -MOCK - chmod +x "${TEST_DIR}/sleep" - - # mock timeout/gtimeout to just run the command - cat > "${TEST_DIR}/timeout" << 'MOCK' -#!/bin/bash -# Skip the timeout value, run the rest -shift -exec "$@" -MOCK - chmod +x "${TEST_DIR}/timeout" - cp "${TEST_DIR}/timeout" "${TEST_DIR}/gtimeout" - - # mock python3 for JSON parsing used by shared/common.sh - cat > "${TEST_DIR}/python3" << 'MOCK' -#!/bin/bash -# Read the python script from -c argument -script="" -for arg in "$@"; do - if [[ "$prev" == "-c" ]]; then - script="$arg" - break - fi - prev="$arg" -done -# Delegate to real python3 for JSON operations -exec /usr/bin/python3 "$@" -MOCK - chmod +x "${TEST_DIR}/python3" -} - -# --- Assertions --- -assert_contains() { - local file="$1" pattern="$2" msg="$3" - if grep -qE "${pattern}" "${file}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - printf '%b\n' " expected pattern: ${pattern}" - printf '%b\n' " in: ${file}" - FAILED=$((FAILED + 1)) - fi -} - -assert_not_contains() { - local file="$1" pattern="$2" msg="$3" - if ! grep -qE "${pattern}" "${file}" 2>/dev/null; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -assert_exit_code() { - local actual="$1" expected="$2" msg="$3" - if [[ "${actual}" -eq "${expected}" ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg} (got exit code ${actual}, expected ${expected})" - FAILED=$((FAILED + 1)) - fi -} - -# Assert that a value equals an expected string -# Usage: assert_equals ACTUAL EXPECTED MSG -assert_equals() { - local actual="$1" expected="$2" msg="$3" - if [[ "${actual}" == "${expected}" ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg} (got '${actual}')" - FAILED=$((FAILED + 1)) - fi -} - -# Assert that a value contains a substring pattern (glob match) -# Usage: assert_match ACTUAL PATTERN MSG -# PATTERN uses glob syntax: *substring* for contains, prefix* for starts-with, etc. -assert_match() { - local actual="$1" pattern="$2" msg="$3" - # Use a case statement for glob matching (compatible with bash 3.x) - case "${actual}" in - ${pattern}) - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - ;; - *) - printf '%b\n' " ${RED}✗${NC} ${msg} (got '${actual}')" - FAILED=$((FAILED + 1)) - ;; - esac -} - -# Run a shared/common.sh function and assert it succeeds (exit 0) -assert_common_succeeds() { - local msg="$1" cmd="$2" - local result - result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && '"${cmd}" 2>/dev/null) - if [[ "${result}" == "valid" ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -# Run a shared/common.sh function and assert it fails (exit non-zero) -assert_common_fails() { - local msg="$1" cmd="$2" - local rc=0 - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && '"${cmd}" /dev/null 2>&1 || rc=$? - if [[ "${rc}" -ne 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} ${msg}" - FAILED=$((FAILED + 1)) - fi -} - -# --- Sprite command assertions --- -# Assert that a sprite script follows the standard command lifecycle: -# auth check -> list -> create -> exec -> env upload -> interactive launch -_assert_sprite_common_commands() { - local script_name="$1" - assert_contains "${MOCK_LOG}" "sprite org list" "Checks sprite authentication" - assert_contains "${MOCK_LOG}" "sprite list" "Checks if sprite exists" - assert_contains "${MOCK_LOG}" "sprite create.*test-sprite-${script_name}" "Creates sprite with correct name" - assert_contains "${MOCK_LOG}" "sprite exec.*test-sprite-${script_name}" "Runs commands on sprite" - assert_contains "${MOCK_LOG}" "sprite exec.*-file.*/tmp/spawn_" "Uploads env config to sprite" - assert_contains "${MOCK_LOG}" "sprite exec.*-tty.*" "Launches interactive session" -} - -# Assert that a sprite script installs agent-specific components -_assert_agent_specific() { - local script_name="$1" - case "${script_name}" in - claude) - assert_contains "${MOCK_LOG}" "sprite exec.*command -v claude" "Checks Claude Code installation" - assert_contains "${MOCK_LOG}" "sprite exec.*-file.*/tmp/.*spawn_config" "Uploads Claude config file" - assert_contains "${MOCK_LOG}" "sprite exec.*mv.*settings.json" "Moves settings.json to final path" - assert_contains "${MOCK_LOG}" "sprite exec.*mv.*\.claude\.json" "Moves .claude.json to final path" - ;; - openclaw) - assert_contains "${MOCK_LOG}" "sprite exec.*bun.*openclaw" "Installs openclaw via bun" - assert_contains "${MOCK_LOG}" "sprite exec.*openclaw gateway" "Starts openclaw gateway" - ;; - esac -} - -# Assert no temp files were leaked during script execution -_assert_no_temp_leaks() { - local leaked_temps - leaked_temps=$(find /tmp -maxdepth 1 -name "tmp.*" -newer "${MOCK_LOG}" 2>/dev/null | wc -l) - if [[ "${leaked_temps}" -eq 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} No temp files leaked" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${RED}✗${NC} Temp files leaked (${leaked_temps} found in /tmp)" - FAILED=$((FAILED + 1)) - fi -} - -# --- Test runner for a single script --- -run_script_test() { - local script_name="$1" - local script_path="${REPO_ROOT}/sprite/${script_name}.sh" - local output_file="${TEST_DIR}/${script_name}_output.log" - - echo "" - printf '%b\n' "${YELLOW}━━━ Testing ${script_name}.sh ━━━${NC}" - - # Reset mock state - : > "${MOCK_LOG}" - rm -f "${TEST_DIR}/sprite_mock_created" 2>/dev/null || true - - # Run the script with mocked PATH and env vars (timeout 30s) - local exit_code=0 - MOCK_LOG="${MOCK_LOG}" \ - TEST_DIR="${TEST_DIR}" \ - SPRITE_NAME="test-sprite-${script_name}" \ - OPENROUTER_API_KEY="sk-or-v1-0000000000000000000000000000000000000000000000000000000000000000" \ - SPAWN_SKIP_API_VALIDATION=1 \ - SPAWN_SKIP_GITHUB_AUTH=1 \ - PATH="${TEST_DIR}:${PATH}" \ - HOME="${TEST_DIR}/fakehome" \ - timeout 30 bash "${script_path}" > "${output_file}" 2>&1 || exit_code=$? - - assert_exit_code "${exit_code}" 0 "Script exits successfully" - _assert_sprite_common_commands "${script_name}" - _assert_agent_specific "${script_name}" - _assert_no_temp_leaks -} - -# --- Test shared/common.sh sourcing --- -# (sprite/lib/common.sh was removed when sprite/ was converted to TypeScript) -_test_shared_functions_and_syntax() { - # Source locally and check all shared functions exist - local output - output=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - for fn in log_info log_warn log_error safe_read \ - get_openrouter_api_key_manual try_oauth_flow \ - get_openrouter_api_key_oauth open_browser \ - json_escape validate_model_id generate_ssh_key_if_missing \ - generic_ssh_wait; do - type "${fn}" &>/dev/null && echo "OK:${fn}" || echo "MISSING:${fn}" - done - ' 2>/dev/null) - - local missing - missing=$(echo "${output}" | grep "^MISSING:" || true) - assert_equals "${missing}" "" "All shared functions defined" - - # Syntax check - local rc=0 - bash -n "${REPO_ROOT}/shared/common.sh" 2>/dev/null || rc=$? - assert_exit_code "${rc}" 0 "shared/common.sh syntax valid" -} - -_test_shared_log_functions() { - # log functions write to stderr, not stdout - local stdout stderr - stdout=$(timeout 5 bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && log_info "test"' /dev/null) - stderr=$(timeout 5 bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && log_info "test"' &1 >/dev/null) - assert_equals "${stdout}" "" "Log functions write to stderr (no stdout)" - assert_match "${stderr}" "?*" "Log functions produce stderr output" -} - -_test_shared_remote_source() { - if [[ "${REMOTE}" != true ]]; then - return 0 - fi - local remote_fns - remote_fns=$(bash -c ' - eval "$(curl -fsSL https://raw.githubusercontent.com/OpenRouterTeam/spawn/main/shared/common.sh)" - type log_info &>/dev/null && echo "OK" || echo "FAIL" - ' 2>/dev/null) - assert_equals "${remote_fns}" "OK" "Remote source from GitHub works" -} - -test_common_source() { - echo "" - printf '%b\n' "${YELLOW}━━━ Testing shared/common.sh ━━━${NC}" - - _test_shared_functions_and_syntax - _test_shared_log_functions - _test_shared_remote_source -} - -# --- Test shared/common.sh functions --- -# --- shared/common.sh sub-tests (grouped by feature) --- - -_test_model_validation() { - assert_common_succeeds "validate_model_id accepts valid model IDs" \ - 'validate_model_id "anthropic/claude-3.5-sonnet" && echo "valid"' - assert_common_fails "validate_model_id rejects invalid characters" \ - 'validate_model_id "bad;model"' - assert_common_succeeds "validate_model_id accepts empty string" \ - 'validate_model_id "" && echo "valid"' - assert_common_succeeds "validate_model_id accepts openrouter/auto" \ - 'validate_model_id "openrouter/auto" && echo "valid"' - assert_common_succeeds "validate_model_id accepts model IDs with colons" \ - 'validate_model_id "provider/model:version" && echo "valid"' - - # Bulk test: all shell metacharacters must be rejected - # Note: backtick excluded due to shell escaping complexity - local dangerous_chars=('$' '&' '|' '>' '<' '(' ')' '{' '}' ';' '*' '?' '[' ']') - local rejected_count=0 - local rc - for char in "${dangerous_chars[@]}"; do - rc=0 - local test_str - test_str=$(printf 'bad%smodel' "${char}") - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && validate_model_id '"$(printf '%q' "${test_str}")" /dev/null 2>&1 || rc=$? - [[ "${rc}" -ne 0 ]] && rejected_count=$((rejected_count + 1)) - done - assert_equals "${rejected_count}" "${#dangerous_chars[@]}" \ - "validate_model_id rejects shell metacharacters (${rejected_count}/${#dangerous_chars[@]})" -} - -_test_json_escape() { - local result - result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && json_escape "test\"quote"' 2>/dev/null) - # json_escape should produce escaped quotes (\\") in the output - assert_match "${result}" '*\\"*' "json_escape handles special characters" - - # Test the bash fallback path (without python3) escapes control characters - # The fallback must escape newlines, carriage returns, and tabs to produce valid JSON - result=$(bash -c ' - json_escape_fallback() { - local string="${1}" - local escaped="${string//\\/\\\\}" - escaped="${escaped//\"/\\\"}" - escaped="${escaped//$'"'"'\n'"'"'/\\n}" - escaped="${escaped//$'"'"'\r'"'"'/\\r}" - escaped="${escaped//$'"'"'\t'"'"'/\\t}" - echo "\"${escaped}\"" - } - json_escape_fallback "line1 -line2" - ' 2>/dev/null) - assert_match "${result}" '*\\n*' "json_escape fallback escapes newlines" - - result=$(bash -c ' - json_escape_fallback() { - local string="${1}" - local escaped="${string//\\/\\\\}" - escaped="${escaped//\"/\\\"}" - escaped="${escaped//$'"'"'\n'"'"'/\\n}" - escaped="${escaped//$'"'"'\r'"'"'/\\r}" - escaped="${escaped//$'"'"'\t'"'"'/\\t}" - echo "\"${escaped}\"" - } - json_escape_fallback $'"'"'hello\tworld'"'"' - ' 2>/dev/null) - assert_match "${result}" '*\\t*' "json_escape fallback escapes tabs" -} - -_test_ssh_key_utils() { - # generate_ssh_key_if_missing - creates key - local test_key="${TEST_DIR}/test_id_ed25519" - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && generate_ssh_key_if_missing "'"${test_key}"'"' >/dev/null 2>&1 - local key_exists="no" - [[ -f "${test_key}" && -f "${test_key}.pub" ]] && key_exists="yes" - assert_equals "${key_exists}" "yes" "generate_ssh_key_if_missing creates key" - - # generate_ssh_key_if_missing - skips existing - local mtime_before - mtime_before=$(stat -c %Y "${test_key}" 2>/dev/null || stat -f %m "${test_key}" 2>/dev/null) - sleep 1 - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && generate_ssh_key_if_missing "'"${test_key}"'"' >/dev/null 2>&1 - local mtime_after - mtime_after=$(stat -c %Y "${test_key}" 2>/dev/null || stat -f %m "${test_key}" 2>/dev/null) - assert_equals "${mtime_before}" "${mtime_after}" "generate_ssh_key_if_missing skips existing key" - - # get_ssh_fingerprint - local result - result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && get_ssh_fingerprint "'"${test_key}.pub"'"' 2>/dev/null) - assert_match "${result}" "*:*" "get_ssh_fingerprint returns valid fingerprint" - - # extract_ssh_key_ids - local mock_json='{"ssh_keys":[{"id":123},{"id":456}]}' - result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && echo '"'${mock_json}'"' | extract_ssh_key_ids "$(cat)" "ssh_keys"' 2>/dev/null) - assert_match "${result}" "*123*456*" "extract_ssh_key_ids parses JSON correctly" -} - -_test_syntax_and_logging() { - local rc=0 - bash -n "${REPO_ROOT}/shared/common.sh" 2>/dev/null || rc=$? - assert_exit_code "${rc}" 0 "shared/common.sh syntax valid" - - local output missing - output=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - for fn in log_info log_warn log_error; do - type "${fn}" &>/dev/null && echo "OK:${fn}" || echo "MISSING:${fn}" - done - ' 2>/dev/null) - missing=$(echo "${output}" | grep "^MISSING:" || true) - assert_equals "${missing}" "" "All logging functions exist in shared/common.sh" -} - -_test_open_browser() { - # open_browser: termux - local result - result=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - termux-open-url() { echo "termux: $*"; } - export -f termux-open-url - open_browser "https://example.com" - ' 2>/dev/null) - assert_equals "${result}" "termux: https://example.com" "open_browser detects termux-open-url" - - # open_browser: macOS open - result=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - open() { echo "macOS: $*"; } - export -f open - open_browser "https://example.com" - ' 2>/dev/null) - assert_equals "${result}" "macOS: https://example.com" "open_browser detects macOS open" - - # open_browser: fallback message - local stderr_output - stderr_output=$(bash -c ' - PATH="/usr/bin:/bin" - source "'"${REPO_ROOT}"'/shared/common.sh" - command() { - if [[ "$2" == "termux-open-url" || "$2" == "open" || "$2" == "xdg-open" ]]; then - return 1 - fi - builtin command "$@" - } - export -f command - open_browser "https://example.com" - ' 2>&1 >/dev/null) - assert_match "${stderr_output}" "*Please open: https://example.com*" \ - "open_browser shows fallback message when browsers unavailable" -} - -_test_cloud_init() { - # get_cloud_init_userdata - local result - result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && get_cloud_init_userdata' 2>/dev/null) - assert_match "${result}" "*#cloud-config*" "get_cloud_init_userdata returns valid YAML" - assert_match "${result}" "*curl*" "get_cloud_init_userdata includes curl" - assert_match "${result}" "*git*" "get_cloud_init_userdata includes git" - assert_match "${result}" "*zsh*" "get_cloud_init_userdata includes zsh" - assert_match "${result}" "*bun.sh/install*" "get_cloud_init_userdata includes Bun installation" - assert_match "${result}" "*claude.ai/install*" "get_cloud_init_userdata includes Claude installation" - - # check_openrouter_connectivity -- accepts success or graceful failure - if command -v curl &> /dev/null; then - local connectivity_result - connectivity_result=$(bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && check_openrouter_connectivity && echo "reachable"' 2>/dev/null) - # Accept both "reachable" and empty (network unavailable) -- just shouldn't crash - assert_match "${connectivity_result:-ok}" "*" "check_openrouter_connectivity handles connectivity check" - else - printf '%b\n' " ${YELLOW}⚠${NC} check_openrouter_connectivity test skipped (curl not available)" - fi -} - -_test_oauth_functions() { - local rc - - # wait_for_oauth_code - success - local code_test_file="${TEST_DIR}/oauth_code_test" - echo "test_code" > "${code_test_file}" - rc=0 - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && wait_for_oauth_code "'"${code_test_file}"'" 1' >/dev/null 2>&1 || rc=$? - assert_exit_code "${rc}" 0 "wait_for_oauth_code returns success when file exists" - - # wait_for_oauth_code - timeout - local missing_file="${TEST_DIR}/missing_oauth_code" - rc=0 - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && wait_for_oauth_code "'"${missing_file}"'" 1' >/dev/null 2>&1 || rc=$? - assert_match "${rc}" "[1-9]*" "wait_for_oauth_code returns failure on timeout" - - # cleanup_oauth_session - local cleanup_test_dir="${TEST_DIR}/oauth_cleanup_test" - mkdir -p "${cleanup_test_dir}" - bash -c 'source "'"${REPO_ROOT}"'/shared/common.sh" && cleanup_oauth_session "" "'"${cleanup_test_dir}"'"' >/dev/null 2>&1 - local dir_removed="yes" - [[ -d "${cleanup_test_dir}" ]] && dir_removed="no" - assert_equals "${dir_removed}" "yes" "cleanup_oauth_session removes directory" -} - -_test_ssh_wait() { - # generic_ssh_wait - success - local result - result=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - ssh() { return 0; } - export -f ssh - generic_ssh_wait "root" "1.2.3.4" "-o Test" "true" "test" 2 1 2>&1 - echo $? - ' 2>/dev/null | tail -1) - assert_equals "${result}" "0" "generic_ssh_wait succeeds when command passes" - - # generic_ssh_wait - failure - result=$(bash -c ' - source "'"${REPO_ROOT}"'/shared/common.sh" - ssh() { return 1; } - export -f ssh - generic_ssh_wait "root" "1.2.3.4" "-o Test" "false" "test" 2 1 2>&1 - echo $? - ' 2>/dev/null | tail -1) - assert_equals "${result}" "1" "generic_ssh_wait fails after max attempts" -} - -_test_input_and_server_validation() { - # safe_read without TTY - assert_common_fails "safe_read fails when no TTY available" \ - 'safe_read "test: " /dev/null || rc=$? - assert_exit_code "${rc}" 0 "${cloud}/${script}.sh syntax valid" - done - done -} - -# --- Static analysis with shellcheck --- - -# Discover all shell scripts in the repo: agent scripts, lib files, shared, and test harness. -# Populates the DISCOVERED_SCRIPTS array. -_discover_shell_scripts() { - DISCOVERED_SCRIPTS=() - local dir - for dir in "${REPO_ROOT}"/*/; do - local cloud - cloud=$(basename "${dir}") - case "${cloud}" in - cli|shared|test|node_modules|.git|.github|.claude|.docs) continue ;; - esac - local f - for f in "${dir}"*.sh; do - [[ -f "${f}" ]] && DISCOVERED_SCRIPTS+=("${f}") - done - [[ -f "${dir}lib/common.sh" ]] && DISCOVERED_SCRIPTS+=("${dir}lib/common.sh") - done - DISCOVERED_SCRIPTS+=("${REPO_ROOT}/shared/common.sh" "${REPO_ROOT}/test/run.sh") -} - -# Run shellcheck on each discovered script and report results. -_run_shellcheck_on_scripts() { - local issue_count=0 - local checked_count=0 - - for script in "${DISCOVERED_SCRIPTS[@]}"; do - [[ -f "${script}" ]] || continue - checked_count=$((checked_count + 1)) - - # SC1090: Can't follow non-constant source - # SC2312: Consider invoking this command separately to avoid masking its return value - local output - output=$(shellcheck --severity=warning --exclude=SC1090,SC2312 "${script}" 2>&1) || true - - if [[ -n "${output}" ]]; then - issue_count=$((issue_count + 1)) - printf '%b\n' " ${YELLOW}⚠${NC} $(basename "${script}"): found issues" - echo "${output}" | sed 's/^/ /' - fi - done - - if [[ "${issue_count}" -eq 0 ]]; then - printf '%b\n' " ${GREEN}✓${NC} No issues found in ${checked_count} scripts" - PASSED=$((PASSED + 1)) - else - printf '%b\n' " ${YELLOW}⚠${NC} Found issues in ${issue_count}/${checked_count} scripts (advisory only)" - fi -} - -run_shellcheck() { - echo "" - printf '%b\n' "${YELLOW}━━━ Running shellcheck (static analysis) ━━━${NC}" - - if ! command -v shellcheck &> /dev/null; then - printf '%b\n' " ${YELLOW}⚠${NC} shellcheck not found (install with: apt install shellcheck / brew install shellcheck)" - printf '%b\n' " ${YELLOW}⚠${NC} Skipping static analysis" - return 0 - fi - - _discover_shell_scripts - _run_shellcheck_on_scripts -} - -# --- Main --- -echo "===============================" -echo " Spawn Script Test Suite" -echo "===============================" -echo "" -echo "Repo: ${REPO_ROOT}" -echo "Temp dir: ${TEST_DIR}" -echo "Filter: ${FILTER:-all}" -echo "Remote: ${REMOTE}" - -setup_mocks -setup_extra_mocks - -# Create fake home for sprite script tests -mkdir -p "${TEST_DIR}/fakehome/.ssh" -mkdir -p "${TEST_DIR}/fakehome/.config/spawn" -mkdir -p "${TEST_DIR}/fakehome/.claude" -mkdir -p "${TEST_DIR}/fakehome/.local/bin" - -run_shellcheck -test_common_source -test_shared_common -test_source_detection - -# Note: sprite/ cloud provider scripts depend on sprite/lib/common.sh which was -# removed when sprite was converted to TypeScript (PR #1692). Integration tests -# for sprite agent scripts are covered by bun test (cli/src/__tests__/) instead. - -# --- Summary --- -echo "" -echo "===============================" -TOTAL=$((PASSED + FAILED)) -printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${TOTAL} total" -echo "===============================" - -[[ "${FAILED}" -eq 0 ]] && exit 0 || exit 1 diff --git a/test/test-sandbox.sh b/test/test-sandbox.sh deleted file mode 100755 index da2f1a78..00000000 --- a/test/test-sandbox.sh +++ /dev/null @@ -1,200 +0,0 @@ -#!/bin/bash -# Test that all bash test scripts are properly sandboxed -# Verifies no production environment pollution - -set -eo pipefail - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -PASSED=0 -FAILED=0 - -# Capture initial state of agent directories before running tests -INITIAL_OPENCLAW_EXISTS=false -INITIAL_SPRITE_EXISTS=false -INITIAL_CLAUDE_DIR_EXISTS=false -INITIAL_CLAUDE_JSON_EXISTS=false -INITIAL_CLAUDE_SETTINGS_EXISTS=false -INITIAL_CLAUDE_JSON_MTIME="" -INITIAL_CLAUDE_SETTINGS_MTIME="" - -[[ -d "$HOME/.openclaw" ]] && INITIAL_OPENCLAW_EXISTS=true -[[ -d "$HOME/.sprite" ]] && INITIAL_SPRITE_EXISTS=true -[[ -d "$HOME/.claude" ]] && INITIAL_CLAUDE_DIR_EXISTS=true - -if [[ -f "$HOME/.claude.json" ]]; then - INITIAL_CLAUDE_JSON_EXISTS=true - INITIAL_CLAUDE_JSON_MTIME=$(stat -c %Y "$HOME/.claude.json" 2>/dev/null || stat -f %m "$HOME/.claude.json" 2>/dev/null) -fi - -if [[ -f "$HOME/.claude/settings.json" ]]; then - INITIAL_CLAUDE_SETTINGS_EXISTS=true - INITIAL_CLAUDE_SETTINGS_MTIME=$(stat -c %Y "$HOME/.claude/settings.json" 2>/dev/null || stat -f %m "$HOME/.claude/settings.json" 2>/dev/null) -fi - -assert_no_file() { - local pattern="$1" - local msg="$2" - if ls ${pattern} 2>/dev/null | grep -q .; then - printf '%b\n' " ${RED}✗${NC} ${msg}" - printf '%b\n' " Found: $(ls ${pattern} 2>/dev/null | head -3)" - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - fi -} - -assert_config_not_modified() { - local config_path="$HOME/.config/spawn" - local msg="$1" - - # If config doesn't exist, that's fine - if [[ ! -d "$config_path" ]]; then - printf '%b\n' " ${GREEN}✓${NC} ${msg} (dir doesn't exist)" - PASSED=$((PASSED + 1)) - return 0 - fi - - # If it exists, check if any files were modified in last 5 minutes - local recent_files - recent_files=$(find "$config_path" -type f -mmin -5 2>/dev/null) - if [[ -n "$recent_files" ]]; then - printf '%b\n' " ${RED}✗${NC} ${msg}" - printf '%b\n' " Modified: $recent_files" - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - fi -} - -assert_no_directory() { - local dir_path="$1" - local msg="$2" - if [[ -d "$dir_path" ]]; then - printf '%b\n' " ${RED}✗${NC} ${msg}" - printf '%b\n' " Found: $dir_path" - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} ${msg}" - PASSED=$((PASSED + 1)) - fi -} - -echo "========================================" -echo " Bash Test Sandboxing Verification" -echo "========================================" -echo "" - -# Test 1: Run test/run.sh and verify no /tmp pollution -echo "${YELLOW}Test 1: test/run.sh sandboxing${NC}" -cd "${REPO_ROOT}" -timeout 60 bash test/run.sh >/dev/null 2>&1 || true -assert_no_file "/tmp/sprite_mock_created*" "No sprite mock files in /tmp after test/run.sh" -assert_config_not_modified "Production config not modified by test/run.sh" - -# Test 2: Verify test/record.sh respects TEST_CONFIG_DIR -echo "" -echo "${YELLOW}Test 2: test/record.sh sandboxing${NC}" -TEST_CONFIG_DIR=$(mktemp -d) -export TEST_CONFIG_DIR -timeout 10 bash test/record.sh --list >/dev/null 2>&1 || true -assert_no_file "$HOME/.config/spawn/*.json.test-*" "No test files in production config" -rm -rf "${TEST_CONFIG_DIR}" -unset TEST_CONFIG_DIR - -# Test 3: Verify mock.sh uses isolated temp directories -echo "" -echo "${YELLOW}Test 3: test/mock.sh sandboxing${NC}" -# Mock test runs in parallel with isolated TEST_DIR per cloud -# Just verify it doesn't leave artifacts in /tmp or production dirs -timeout 10 bash test/mock.sh hetzner claude 2>/dev/null || true -assert_config_not_modified "Production config not modified by test/mock.sh" - -# Test 4: Verify no agent-specific directories created in HOME -echo "" -echo "${YELLOW}Test 4: Agent directory residue check${NC}" - -# Check if .openclaw was created by tests -if [[ "$INITIAL_OPENCLAW_EXISTS" == "false" ]]; then - assert_no_directory "$HOME/.openclaw" "No ~/.openclaw directory created" -else - printf '%b\n' " ${YELLOW}⊘${NC} Skipped ~/.openclaw check (existed before tests)" -fi - -# Check if .sprite was created by tests -if [[ "$INITIAL_SPRITE_EXISTS" == "false" ]]; then - assert_no_directory "$HOME/.sprite" "No ~/.sprite directory created" -else - printf '%b\n' " ${YELLOW}⊘${NC} Skipped ~/.sprite check (existed before tests)" -fi - -# Check if .claude was created by tests -if [[ "$INITIAL_CLAUDE_DIR_EXISTS" == "false" ]]; then - assert_no_directory "$HOME/.claude" "No ~/.claude directory created" -else - printf '%b\n' " ${YELLOW}⊘${NC} Skipped ~/.claude check (existed before tests)" -fi - -# Test 5: Verify Claude settings not mutated in production config -echo "" -echo "${YELLOW}Test 5: Claude settings integrity${NC}" - -# Check .claude.json mutation only if it existed before tests -if [[ "$INITIAL_CLAUDE_JSON_EXISTS" == "true" ]]; then - # Compare modification time before and after tests - CURRENT_MTIME=$(stat -c %Y "$HOME/.claude.json" 2>/dev/null || stat -f %m "$HOME/.claude.json" 2>/dev/null) - if [[ "$CURRENT_MTIME" != "$INITIAL_CLAUDE_JSON_MTIME" ]]; then - printf '%b\n' " ${RED}✗${NC} Production ~/.claude.json was modified by tests" - printf '%b\n' " File: $HOME/.claude.json" - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} Production ~/.claude.json not modified by tests" - PASSED=$((PASSED + 1)) - fi -elif [[ -f "$HOME/.claude.json" ]]; then - # File was created by tests - printf '%b\n' " ${RED}✗${NC} ~/.claude.json should not be created by tests" - printf '%b\n' " Created: $HOME/.claude.json" - FAILED=$((FAILED + 1)) -else - printf '%b\n' " ${GREEN}✓${NC} ~/.claude.json not created by tests" - PASSED=$((PASSED + 1)) -fi - -# Check settings.json mutation only if it existed before tests -if [[ "$INITIAL_CLAUDE_SETTINGS_EXISTS" == "true" ]]; then - # Compare modification time before and after tests - CURRENT_MTIME=$(stat -c %Y "$HOME/.claude/settings.json" 2>/dev/null || stat -f %m "$HOME/.claude/settings.json" 2>/dev/null) - if [[ "$CURRENT_MTIME" != "$INITIAL_CLAUDE_SETTINGS_MTIME" ]]; then - printf '%b\n' " ${RED}✗${NC} Production ~/.claude/settings.json was modified by tests" - printf '%b\n' " File: $HOME/.claude/settings.json" - FAILED=$((FAILED + 1)) - else - printf '%b\n' " ${GREEN}✓${NC} Production ~/.claude/settings.json not modified by tests" - PASSED=$((PASSED + 1)) - fi -elif [[ -f "$HOME/.claude/settings.json" ]]; then - # File was created by tests - printf '%b\n' " ${RED}✗${NC} ~/.claude/settings.json should not be created by tests" - printf '%b\n' " Created: $HOME/.claude/settings.json" - FAILED=$((FAILED + 1)) -else - printf '%b\n' " ${GREEN}✓${NC} ~/.claude/settings.json not created by tests" - PASSED=$((PASSED + 1)) -fi - -echo "" -echo "========================================" -TOTAL=$((PASSED + FAILED)) -printf '%b\n' " Results: ${GREEN}${PASSED} passed${NC}, ${RED}${FAILED} failed${NC}, ${TOTAL} total" -echo "========================================" - -[[ "${FAILED}" -eq 0 ]] && exit 0 || exit 1 diff --git a/test/update-readme.py b/test/update-readme.py deleted file mode 100644 index b0fbc3ad..00000000 --- a/test/update-readme.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python3 -"""Update README.md matrix cells based on test results. - -Usage: - python3 test/update-readme.py results.txt - -Results file format (one per line): - cloud/agent:pass - cloud/agent:fail - -Only touches cells that have test results; untested combinations stay unchanged. -""" -import json -import re -import sys -import os - -def main(): - if len(sys.argv) < 2: - print("Usage: python3 test/update-readme.py RESULTS_FILE", file=sys.stderr) - sys.exit(1) - - results_file = sys.argv[1] - repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - readme_path = os.path.join(repo_root, "README.md") - manifest_path = os.path.join(repo_root, "manifest.json") - - # Parse results - results = {} - with open(results_file) as f: - for line in f: - line = line.strip() - if not line or ":" not in line: - continue - combo, status = line.rsplit(":", 1) - results[combo] = status # cloud/agent -> pass|fail - - if not results: - print("No results to apply.") - return - - # Load manifest to map agent keys to display names - with open(manifest_path) as f: - manifest = json.load(f) - - # Build agent key -> name mapping for row matching - agent_names = {} - for key, info in manifest["agents"].items(): - agent_names[info["name"]] = key # "Claude Code" -> "claude" - - # Read README - with open(readme_path) as f: - lines = f.readlines() - - # Find the matrix table: header row starts with "| |" - header_idx = None - for i, line in enumerate(lines): - if line.startswith("| |") or line.startswith("| | "): - header_idx = i - break - - if header_idx is None: - print("Could not find matrix table header in README.md", file=sys.stderr) - sys.exit(1) - - # Parse cloud columns from header - # Header: | | [Sprite](sprite/) | [Hetzner Cloud](hetzner/) | ... - header = lines[header_idx] - header_cells = [c.strip() for c in header.split("|")] - # header_cells[0] = "", header_cells[1] = "" (row label), header_cells[2:] = cloud cells - - cloud_columns = {} # cloud_dir -> column index (0-based within cells) - for col_idx, cell in enumerate(header_cells): - # Extract dir from [Name](dir/) - m = re.search(r'\[.*?\]\(([^/)]+)/?[^)]*\)', cell) - if m: - cloud_columns[m.group(1)] = col_idx - - # Process data rows (skip header and separator) - changed = False - for i in range(header_idx + 2, len(lines)): - line = lines[i] - if not line.startswith("|"): - break - - cells = line.split("|") - if len(cells) < 3: - continue - - # Extract agent key from first data cell - # e.g. " [**Claude Code**](https://claude.ai) " -> "Claude Code" - row_label = cells[1].strip() - name_match = re.search(r'\[\*\*(.*?)\*\*\]', row_label) - if not name_match: - continue - display_name = name_match.group(1) - agent_key = agent_names.get(display_name) - if not agent_key: - continue - - row_changed = False - for cloud_dir, col_idx in cloud_columns.items(): - combo = f"{cloud_dir}/{agent_key}" - if combo not in results: - continue - if col_idx >= len(cells): - continue - - status = results[combo] - old_cell = cells[col_idx] - # Preserve whitespace padding - stripped = old_cell.strip() - if status == "pass" and stripped != "\u2713": - cells[col_idx] = old_cell.replace(stripped, "\u2713") if stripped else " \u2713 " - row_changed = True - elif status == "fail" and stripped != "\u2717": - cells[col_idx] = old_cell.replace(stripped, "\u2717") if stripped else " \u2717 " - row_changed = True - - if row_changed: - lines[i] = "|".join(cells) - if not lines[i].endswith("\n"): - lines[i] += "\n" - changed = True - - if changed: - with open(readme_path, "w") as f: - f.writelines(lines) - print(f"README.md updated with {len(results)} test results.") - else: - print("No changes needed in README.md.") - - -if __name__ == "__main__": - main()