fix(e2e): improve openclaw reliability on AWS and other clouds (#2123)

* fix(e2e): improve openclaw reliability on AWS and other clouds Three changes to make openclaw e2e tests more robust: 1. Increase PROVISION_TIMEOUT from 480s to 720s — AWS cloud-init for "full" tier (Node.js + Bun + build-essential) can exceed 480s, causing the CLI to be killed before .spawnrc is written. 2. Add .spawnrc manual fallback in provision.sh — if the CLI is killed before writing .spawnrc, construct it via SSH using OPENROUTER_API_KEY with agent-specific env vars (openclaw, zeroclaw). 3. Add retry logic to openclaw gateway input test — the gateway can crash with 1006 websocket closure on resource-constrained instances. Now retries once after killing and restarting the gateway process. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(security): fix command injection in e2e provision scripts - Use printf %q and temp file for api_key handling in provision.sh to prevent shell metachar injection (single quotes, backticks, $) - Double-quote env_b64 interpolation in cloud_exec call to prevent word splitting - Replace echo with printf in bashrc append to avoid portability issues - Replace overbroad pkill -f 'openclaw gateway' in verify.sh with PID-targeted kill via lsof/fuser on port 18789 Agent: pr-maintainer Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
2026-05-20 01:11:18 +00:00 · 2026-03-02 20:19:34 -08:00 · 2026-03-02 20:19:34 -08:00 · 4a90abdaa2
commit 4a90abdaa2
parent 7b650a0103
3 changed files with 114 additions and 25 deletions
--- a/sh/e2e/lib/common.sh
+++ b/sh/e2e/lib/common.sh
@ -6,7 +6,7 @@ set -eo pipefail
 # Constants
 # ---------------------------------------------------------------------------
 ALL_AGENTS="claude openclaw zeroclaw codex opencode kilocode hermes"
-PROVISION_TIMEOUT="${PROVISION_TIMEOUT:-480}"
+PROVISION_TIMEOUT="${PROVISION_TIMEOUT:-720}"
 INSTALL_WAIT="${INSTALL_WAIT:-600}"
 INPUT_TEST_TIMEOUT="${INPUT_TEST_TIMEOUT:-120}"

--- a/sh/e2e/lib/provision.sh
+++ b/sh/e2e/lib/provision.sh
@ -127,8 +127,57 @@ CLOUD_ENV
    sleep 5
    log_ok "Install completed (.spawnrc found)"
    return 0
-  else
-    log_warn ".spawnrc not found after ${effective_install_wait}s — install may still be running"
-    return 0  # Continue to verification; it will catch real failures
  fi
+
+  # Fallback: CLI was killed before writing .spawnrc (provision timeout race).
+  # Construct .spawnrc manually via SSH using available env vars.
+  log_warn ".spawnrc not found after ${effective_install_wait}s — attempting manual creation"
+  local api_key="${OPENROUTER_API_KEY:-}"
+  if [ -z "${api_key}" ]; then
+    log_err "Cannot create .spawnrc fallback — OPENROUTER_API_KEY not set"
+    return 0
+  fi
+
+  # Build env lines in a temp file to avoid interpolating api_key into shell
+  # strings directly (prevents command injection if the key contains shell
+  # metacharacters like single quotes, backticks, or dollar signs).
+  local env_tmp
+  env_tmp=$(mktemp)
+  {
+    printf '%s\n' "# [spawn:env]"
+    printf 'export IS_SANDBOX=%q\n' "1"
+    printf 'export OPENROUTER_API_KEY=%q\n' "${api_key}"
+  } > "${env_tmp}"
+
+  # Add agent-specific env vars
+  case "${agent}" in
+    openclaw)
+      {
+        printf 'export ANTHROPIC_API_KEY=%q\n' "${api_key}"
+        printf 'export ANTHROPIC_BASE_URL=%q\n' "https://openrouter.ai/api"
+      } >> "${env_tmp}"
+      ;;
+    zeroclaw)
+      {
+        printf 'export ZEROCLAW_PROVIDER=%q\n' "openrouter"
+        printf 'export OPENAI_API_KEY=%q\n' "${api_key}"
+        printf 'export OPENAI_BASE_URL=%q\n' "https://openrouter.ai/api/v1"
+      } >> "${env_tmp}"
+      ;;
+  esac
+
+  local env_b64
+  env_b64=$(base64 < "${env_tmp}" | tr -d '\n')
+  rm -f "${env_tmp}"
+
+  # Use double-quoting around env_b64 in the remote command to prevent word
+  # splitting. Base64 output is shell-safe ([A-Za-z0-9+/=]), but quoting is
+  # defensive best practice against any upstream corruption.
+  if cloud_exec "${app_name}" "printf '%s' \"${env_b64}\" | base64 -d > ~/.spawnrc && chmod 600 ~/.spawnrc && \
+    grep -q 'source ~/.spawnrc' ~/.bashrc 2>/dev/null || printf '%s\n' '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> ~/.bashrc" >/dev/null 2>&1; then
+    log_ok "Manual .spawnrc created successfully"
+  else
+    log_err "Failed to create manual .spawnrc"
+  fi
+  return 0
 }
--- a/sh/e2e/lib/verify.sh
+++ b/sh/e2e/lib/verify.sh
@ -74,12 +74,8 @@ input_test_codex() {
  fi
 }

-input_test_openclaw() {
+_openclaw_ensure_gateway() {
  local app="$1"
-
-  log_step "Running input test for openclaw..."
-
-  # Ensure the gateway is running (it may have died after provisioning)
  log_step "Ensuring openclaw gateway is running on :18789..."
  cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
    export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
@ -99,27 +95,71 @@ input_test_openclaw() {
    log_err "OpenClaw gateway failed to start"
    return 1
  fi
+}
+
+_openclaw_restart_gateway() {
+  local app="$1"
+  log_step "Restarting openclaw gateway..."
+  cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
+    export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
+    _gw_pid=\$(lsof -ti tcp:18789 2>/dev/null || fuser 18789/tcp 2>/dev/null | tr -d ' ') && \
+    kill \"\$_gw_pid\" 2>/dev/null; sleep 2; \
+    _oc_bin=\$(command -v openclaw) || exit 1; \
+    if command -v setsid >/dev/null 2>&1; then setsid \"\$_oc_bin\" gateway > /tmp/openclaw-gateway.log 2>&1 < /dev/null & \
+    else nohup \"\$_oc_bin\" gateway > /tmp/openclaw-gateway.log 2>&1 < /dev/null & fi; \
+    elapsed=0; while [ \$elapsed -lt 30 ]; do \
+      if (echo >/dev/tcp/127.0.0.1/18789) 2>/dev/null || nc -z 127.0.0.1 18789 2>/dev/null; then echo 'Gateway restarted'; break; fi; \
+      sleep 1; elapsed=\$((elapsed + 1)); \
+    done" >/dev/null 2>&1 || log_warn "Failed to restart openclaw gateway"
+}
+
+input_test_openclaw() {
+  local app="$1"
+  local max_attempts=2
+  local attempt=0
+
+  log_step "Running input test for openclaw..."

  local encoded_prompt
  encoded_prompt=$(printf '%s' "${INPUT_TEST_PROMPT}" | base64 -w 0 2>/dev/null || printf '%s' "${INPUT_TEST_PROMPT}" | base64)
-  local remote_cmd
-  remote_cmd="source ~/.spawnrc 2>/dev/null; \
-    export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
-    rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
-    PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); openclaw agent --message \"\$PROMPT\" --session-id e2e-test --json"

-  local output
-  output=$(cloud_exec_long "${app}" "${remote_cmd}" "${INPUT_TEST_TIMEOUT}" 2>&1) || true
+  while [ "${attempt}" -lt "${max_attempts}" ]; do
+    attempt=$((attempt + 1))

-  if printf '%s' "${output}" | grep -q "${INPUT_TEST_MARKER}"; then
-    log_ok "openclaw input test — marker found in response"
-    return 0
-  else
-    log_err "openclaw input test — marker '${INPUT_TEST_MARKER}' not found in response"
-    log_err "Response (last 5 lines):"
-    printf '%s\n' "${output}" | tail -5 >&2
-    return 1
-  fi
+    # Ensure/restart gateway
+    if [ "${attempt}" -eq 1 ]; then
+      _openclaw_ensure_gateway "${app}"
+    else
+      log_warn "Retrying openclaw input test (attempt ${attempt}/${max_attempts})..."
+      _openclaw_restart_gateway "${app}"
+    fi
+
+    local remote_cmd
+    remote_cmd="source ~/.spawnrc 2>/dev/null; \
+      export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
+      rm -rf /tmp/e2e-test && mkdir -p /tmp/e2e-test && cd /tmp/e2e-test && git init -q; \
+      PROMPT=\$(printf '%s' '${encoded_prompt}' | base64 -d); openclaw agent --message \"\$PROMPT\" --session-id e2e-test-${attempt} --json --timeout 60"
+
+    local output
+    output=$(cloud_exec_long "${app}" "${remote_cmd}" "${INPUT_TEST_TIMEOUT}" 2>&1) || true
+
+    if printf '%s' "${output}" | grep -q "${INPUT_TEST_MARKER}"; then
+      log_ok "openclaw input test — marker found in response"
+      return 0
+    fi
+
+    if [ "${attempt}" -lt "${max_attempts}" ]; then
+      log_warn "openclaw input test attempt ${attempt} failed — will retry"
+      log_warn "Response (last 3 lines):"
+      printf '%s\n' "${output}" | tail -3 >&2
+    else
+      log_err "openclaw input test — marker '${INPUT_TEST_MARKER}' not found in response"
+      log_err "Response (last 5 lines):"
+      printf '%s\n' "${output}" | tail -5 >&2
+    fi
+  done
+
+  return 1
 }

 input_test_zeroclaw() {