spawn/sh/e2e/lib/soak.sh
A 6081c0a17f
feat(qa): telegram soak test on digitalocean + fix bun -e (#2547)
- soak.sh: SOAK_CLOUD env var makes cloud configurable (default: sprite)
- qa.sh: load TELEGRAM_BOT_TOKEN, TELEGRAM_TEST_CHAT_ID, SOAK_CLOUD from
  /etc/spawn-qa-auth.env in soak mode
- qa.yml: add weekly Monday 3am UTC scheduled soak trigger
- fix: bun eval → bun -e across soak.sh, key-request.sh, github-auth.sh
  (bun eval is not a valid subcommand in bun 1.3.9)
- fix: export _TOKEN via env prefix so process.env._TOKEN works in bun -e
- docs: update shell-scripts.md rule to say bun -e (not bun eval)

Verified: 3/4 Telegram tests pass in smoke test on DigitalOcean (120s wait)
getMe ✓ sendMessage ✓ getWebhookInfo ✓; cron test needs full 55-min window.

Co-authored-by: spawn-qa-bot <qa@openrouter.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-12 19:45:18 -04:00

571 lines
21 KiB
Bash

#!/bin/bash
# e2e/lib/soak.sh — Telegram soak test for OpenClaw
#
# Provisions OpenClaw on Sprite, waits for stabilization, injects a Telegram
# bot token, installs a cron-triggered reminder, and runs integration tests
# against the Telegram Bot API — including verifying the cron fired.
#
# Required env vars:
# TELEGRAM_BOT_TOKEN — Bot token from @BotFather
# TELEGRAM_TEST_CHAT_ID — Chat ID to send test messages to
#
# Optional env vars:
# SOAK_WAIT_SECONDS — Override the default 1-hour soak wait (default: 3600)
# SOAK_CRON_DELAY_SECONDS — Delay before cron fires (default: 3300 = 55 min)
set -eo pipefail
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
SOAK_WAIT_SECONDS="${SOAK_WAIT_SECONDS:-3600}"
SOAK_CRON_DELAY_SECONDS="${SOAK_CRON_DELAY_SECONDS:-3300}"
SOAK_CLOUD="${SOAK_CLOUD:-sprite}"
SOAK_HEARTBEAT_INTERVAL=300 # 5 minutes
SOAK_GATEWAY_PORT=18789
TELEGRAM_API_BASE="https://api.telegram.org"
SOAK_CRON_JOB_NAME="spawn-soak-reminder" # OpenClaw cron job name
# ---------------------------------------------------------------------------
# validate_positive_int VAR_NAME VALUE
#
# Validates that a value is a positive integer within a safe range (1-86400).
# ---------------------------------------------------------------------------
validate_positive_int() {
local var_name="$1"
local var_value="$2"
if ! printf '%s' "${var_value}" | grep -qE '^[0-9]+$'; then
log_err "${var_name} must be a positive integer, got: ${var_value}"
return 1
fi
if [ "${var_value}" -lt 1 ] || [ "${var_value}" -gt 86400 ]; then
log_err "${var_name} out of range (1-86400), got: ${var_value}"
return 1
fi
return 0
}
# Validate numeric env vars early to prevent injection in arithmetic/commands
if ! validate_positive_int "SOAK_WAIT_SECONDS" "${SOAK_WAIT_SECONDS}"; then exit 1; fi
if ! validate_positive_int "SOAK_CRON_DELAY_SECONDS" "${SOAK_CRON_DELAY_SECONDS}"; then exit 1; fi
# ---------------------------------------------------------------------------
# _encode_b64 VALUE
#
# Base64-encodes VALUE (via stdin), strips newlines, and validates the output
# contains only [A-Za-z0-9+/=]. Prints the encoded string on success, returns
# 1 on failure. Defense-in-depth: prevents corrupted base64 from breaking out
# of single-quoted SSH command strings.
# ---------------------------------------------------------------------------
_encode_b64() {
local raw="$1"
local encoded
encoded=$(printf '%s' "${raw}" | base64 -w 0 2>/dev/null || printf '%s' "${raw}" | base64 | tr -d '\n')
if ! printf '%s' "${encoded}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
log_err "Invalid base64 encoding"
return 1
fi
printf '%s' "${encoded}"
}
# ---------------------------------------------------------------------------
# soak_validate_telegram_env
#
# Checks that TELEGRAM_BOT_TOKEN and TELEGRAM_TEST_CHAT_ID are set.
# ---------------------------------------------------------------------------
soak_validate_telegram_env() {
local missing=0
if [ -z "${TELEGRAM_BOT_TOKEN:-}" ]; then
log_err "TELEGRAM_BOT_TOKEN is not set"
missing=1
fi
if [ -z "${TELEGRAM_TEST_CHAT_ID:-}" ]; then
log_err "TELEGRAM_TEST_CHAT_ID is not set"
missing=1
elif ! printf '%s' "${TELEGRAM_TEST_CHAT_ID}" | grep -qE '^-?[0-9]+$'; then
log_err "TELEGRAM_TEST_CHAT_ID must be numeric (chat IDs are integers), got: ${TELEGRAM_TEST_CHAT_ID}"
missing=1
fi
if [ "${missing}" -eq 1 ]; then
return 1
fi
log_ok "Telegram env validated (token + chat ID present)"
return 0
}
# ---------------------------------------------------------------------------
# soak_wait APP_NAME
#
# Sleeps for SOAK_WAIT_SECONDS with a heartbeat every 5 minutes.
# Each heartbeat checks gateway port 18789 is still listening.
# ---------------------------------------------------------------------------
soak_wait() {
local app="$1"
local elapsed=0
local port_check='ss -tln 2>/dev/null | grep -q ":18789 " || (echo >/dev/tcp/127.0.0.1/18789) 2>/dev/null || nc -z 127.0.0.1 18789 2>/dev/null'
log_header "Soak wait: ${SOAK_WAIT_SECONDS}s (heartbeat every ${SOAK_HEARTBEAT_INTERVAL}s)"
while [ "${elapsed}" -lt "${SOAK_WAIT_SECONDS}" ]; do
local remaining=$((SOAK_WAIT_SECONDS - elapsed))
local sleep_time="${SOAK_HEARTBEAT_INTERVAL}"
if [ "${remaining}" -lt "${sleep_time}" ]; then
sleep_time="${remaining}"
fi
sleep "${sleep_time}"
elapsed=$((elapsed + sleep_time))
# Heartbeat: check gateway is alive
if cloud_exec "${app}" "${port_check}" >/dev/null 2>&1; then
log_info "Heartbeat ${elapsed}/${SOAK_WAIT_SECONDS}s — gateway alive on :${SOAK_GATEWAY_PORT}"
else
log_warn "Heartbeat ${elapsed}/${SOAK_WAIT_SECONDS}s — gateway NOT responding on :${SOAK_GATEWAY_PORT}"
fi
done
log_ok "Soak wait complete (${SOAK_WAIT_SECONDS}s)"
}
# ---------------------------------------------------------------------------
# soak_inject_telegram_config APP_NAME
#
# Injects TELEGRAM_BOT_TOKEN into ~/.openclaw/openclaw.json on the remote VM,
# then restarts the gateway to pick up the new config.
# ---------------------------------------------------------------------------
soak_inject_telegram_config() {
local app="$1"
log_header "Injecting Telegram config"
# Base64-encode the token to avoid shell metacharacter issues
local encoded_token
encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1
log_step "Patching ~/.openclaw/openclaw.json with Telegram bot token..."
# Use bun -e on the remote to JSON-patch the config file.
# _TOKEN is passed via env var prefix so process.env._TOKEN is available in bun.
cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
_TOKEN=\${_TOKEN} bun -e ' \
import { mkdirSync, readFileSync, writeFileSync } from \"node:fs\"; \
import { dirname } from \"node:path\"; \
const configPath = (process.env.HOME ?? \"\") + \"/.openclaw/openclaw.json\"; \
let config = {}; \
try { config = JSON.parse(readFileSync(configPath, \"utf-8\")); } catch {} \
if (!config.channels) config.channels = {}; \
if (!config.channels.telegram) config.channels.telegram = {}; \
config.channels.telegram.botToken = process.env._TOKEN; \
mkdirSync(dirname(configPath), { recursive: true }); \
writeFileSync(configPath, JSON.stringify(config, null, 2)); \
console.log(\"Telegram config injected\"); \
'" 2>&1
if [ $? -ne 0 ]; then
log_err "Failed to inject Telegram config"
return 1
fi
log_ok "Telegram bot token injected into openclaw.json"
# Restart gateway to pick up new config
_openclaw_restart_gateway "${app}"
}
# ---------------------------------------------------------------------------
# soak_test_telegram_getme APP_NAME
#
# Calls Telegram getMe API from the remote VM to verify the bot token is valid.
# ---------------------------------------------------------------------------
soak_test_telegram_getme() {
local app="$1"
log_step "Testing Telegram getMe API..."
local encoded_token
encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1
local output
output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
curl -sS \"https://api.telegram.org/bot\${_TOKEN}/getMe\"" 2>&1) || true
if printf '%s' "${output}" | grep -q '"ok":true'; then
log_ok "Telegram getMe — bot token is valid"
return 0
else
log_err "Telegram getMe — unexpected response"
log_err "Response: ${output}"
return 1
fi
}
# ---------------------------------------------------------------------------
# soak_test_telegram_send APP_NAME
#
# Sends a timestamped test message to TELEGRAM_TEST_CHAT_ID.
# ---------------------------------------------------------------------------
soak_test_telegram_send() {
local app="$1"
log_step "Testing Telegram sendMessage API..."
local encoded_token
encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1
local marker
marker="SPAWN_SOAK_TEST_$(date +%s)"
local output
output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
curl -sS \"https://api.telegram.org/bot\${_TOKEN}/sendMessage\" \
-d chat_id='${TELEGRAM_TEST_CHAT_ID}' \
-d text='${marker}'" 2>&1) || true
if printf '%s' "${output}" | grep -q '"ok":true'; then
log_ok "Telegram sendMessage — message sent (marker: ${marker})"
return 0
else
log_err "Telegram sendMessage — failed to send message"
log_err "Response: ${output}"
return 1
fi
}
# ---------------------------------------------------------------------------
# soak_test_telegram_webhook APP_NAME
#
# Calls getWebhookInfo to verify gateway registered a webhook (or is polling).
# ---------------------------------------------------------------------------
soak_test_telegram_webhook() {
local app="$1"
log_step "Testing Telegram getWebhookInfo API..."
local encoded_token
encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1
local output
output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
curl -sS \"https://api.telegram.org/bot\${_TOKEN}/getWebhookInfo\"" 2>&1) || true
if printf '%s' "${output}" | grep -q '"ok":true'; then
log_ok "Telegram getWebhookInfo — responded OK"
# Log webhook URL if set (informational — polling mode has empty url)
local webhook_url
webhook_url=$(printf '%s' "${output}" | grep -o '"url":"[^"]*"' | head -1) || true
if [ -n "${webhook_url}" ]; then
log_info "Webhook info: ${webhook_url}"
else
log_info "No webhook URL set — bot is likely in polling mode"
fi
return 0
else
log_err "Telegram getWebhookInfo — unexpected response"
log_err "Response: ${output}"
return 1
fi
}
# ---------------------------------------------------------------------------
# soak_install_openclaw_cron APP_NAME
#
# Uses OpenClaw's built-in cron scheduler to create a one-shot reminder that
# sends a Telegram message after SOAK_CRON_DELAY_SECONDS (~55 min).
#
# This tests that OpenClaw's gateway stays alive and its cron system can
# execute scheduled tasks and deliver messages to Telegram.
#
# Uses: openclaw cron add --at <ISO8601> --channel telegram --announce
# Verify: openclaw cron runs <name> after soak wait
# ---------------------------------------------------------------------------
soak_install_openclaw_cron() {
local app="$1"
log_header "Scheduling OpenClaw cron reminder"
log_info "Job name: ${SOAK_CRON_JOB_NAME}"
log_info "Delay: ${SOAK_CRON_DELAY_SECONDS}s (~$((SOAK_CRON_DELAY_SECONDS / 60)) min)"
# Compute the ISO 8601 fire time on the remote VM (uses its clock, not ours)
local fire_at
fire_at=$(cloud_exec "${app}" "date -u -d '+${SOAK_CRON_DELAY_SECONDS} seconds' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || \
date -u -v+${SOAK_CRON_DELAY_SECONDS}S '+%Y-%m-%dT%H:%M:%SZ'" 2>&1) || true
if [ -z "${fire_at}" ]; then
log_err "Failed to compute fire time on remote VM"
return 1
fi
log_info "Fire at: ${fire_at} (UTC)"
# Create the cron job via OpenClaw's CLI
# --at: one-shot at a specific time
# --session isolated: runs in its own session (doesn't block main conversation)
# --channel telegram: deliver via Telegram
# --to: target the test chat
# --announce: post the message to the channel
# --delete-after-run: clean up after firing (one-shot)
local output
output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
openclaw cron add \
--name '${SOAK_CRON_JOB_NAME}' \
--at '${fire_at}' \
--session isolated \
--message 'Spawn soak test: scheduled reminder fired successfully at \$(date -u)' \
--announce \
--channel telegram \
--to 'chat:${TELEGRAM_TEST_CHAT_ID}' \
--delete-after-run" 2>&1) || true
if printf '%s' "${output}" | grep -qi 'error\|fail\|not found\|unknown'; then
log_err "Failed to create OpenClaw cron job"
log_err "Output: ${output}"
return 1
fi
log_ok "OpenClaw cron job scheduled (fires at ${fire_at})"
# Drop a timestamp marker so the verify step can find cron artifacts created after this point
cloud_exec "${app}" "touch /tmp/.spawn-cron-scheduled-${app}" 2>/dev/null || true
# Verify the job exists via openclaw cron list
local list_output
list_output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
openclaw cron list" 2>&1) || true
if printf '%s' "${list_output}" | grep -q "${SOAK_CRON_JOB_NAME}"; then
log_ok "Cron job '${SOAK_CRON_JOB_NAME}' confirmed in openclaw cron list"
else
log_warn "Cron job not visible in openclaw cron list — may still work"
log_info "List output: ${list_output}"
fi
return 0
}
# ---------------------------------------------------------------------------
# soak_test_openclaw_cron_fired APP_NAME
#
# Verifies that the OpenClaw cron job actually delivered a message to
# Telegram by:
# 1. Reading OpenClaw's cron execution logs for the Telegram API response
# 2. Extracting the message_id from the response
# 3. Calling Telegram's forwardMessage API with that message_id
#
# If Telegram can forward the message, it EXISTS in the chat — this is
# proof from Telegram itself, not from OpenClaw's self-reporting.
# ---------------------------------------------------------------------------
soak_test_openclaw_cron_fired() {
local app="$1"
log_step "Testing OpenClaw cron-triggered Telegram reminder..."
local encoded_token
encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1
# Step 1: Get the message_id from OpenClaw's cron execution data.
# OpenClaw stores cron job data in ~/.openclaw/cron/. We look for:
# - openclaw cron runs output (structured execution history)
# - ~/.openclaw/cron/ files (raw execution artifacts)
# The Telegram sendMessage response contains "message_id":<number>.
log_info "Step 1: Extracting message_id from OpenClaw cron logs..."
local message_id=""
# Try openclaw cron runs first — it may include the delivery response
local runs_output
runs_output=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
openclaw cron runs '${SOAK_CRON_JOB_NAME}' 2>/dev/null || true" 2>&1) || true
if [ -n "${runs_output}" ]; then
log_info "Cron runs output: ${runs_output}"
# Try to extract message_id from JSON in the output
message_id=$(printf '%s' "${runs_output}" | grep -o '"message_id":[0-9]*' | head -1 | grep -o '[0-9]*') || true
fi
# Fallback: search OpenClaw's cron data directory for the Telegram response
if [ -z "${message_id}" ]; then
log_info "Searching ~/.openclaw/cron/ for Telegram API response..."
local cron_data
cron_data=$(cloud_exec "${app}" "find ~/.openclaw/cron/ -type f -name '*.json' -newer /tmp/.spawn-cron-scheduled-${app} 2>/dev/null | \
xargs grep -l 'message_id' 2>/dev/null | head -1 | xargs cat 2>/dev/null || true" 2>&1) || true
if [ -n "${cron_data}" ]; then
message_id=$(printf '%s' "${cron_data}" | grep -o '"message_id":[0-9]*' | head -1 | grep -o '[0-9]*') || true
fi
fi
# Fallback: scan the entire cron directory for any message_id
if [ -z "${message_id}" ]; then
local all_cron_data
all_cron_data=$(cloud_exec "${app}" "grep -rh 'message_id' ~/.openclaw/cron/ 2>/dev/null || true" 2>&1) || true
if [ -n "${all_cron_data}" ]; then
# Take the last (most recent) message_id found
message_id=$(printf '%s' "${all_cron_data}" | grep -o '"message_id":[0-9]*' | tail -1 | grep -o '[0-9]*') || true
fi
fi
if [ -z "${message_id}" ]; then
log_err "OpenClaw cron — could not find message_id in cron execution data"
log_err "The cron job may not have fired, or delivery failed before reaching Telegram"
# Log diagnostic info
local job_status
job_status=$(cloud_exec "${app}" "source ~/.spawnrc 2>/dev/null; \
export PATH=\$HOME/.npm-global/bin:\$HOME/.bun/bin:\$HOME/.local/bin:\$PATH; \
openclaw cron status '${SOAK_CRON_JOB_NAME}' 2>/dev/null; \
echo '---'; \
openclaw cron list 2>/dev/null; \
echo '---'; \
ls -la ~/.openclaw/cron/ 2>/dev/null || echo 'no cron dir'" 2>&1) || true
log_info "Diagnostic: ${job_status}"
return 1
fi
log_info "Step 2: Found message_id=${message_id} — verifying on Telegram..."
# Step 2: Verify the message exists in the Telegram chat by forwarding it.
# If Telegram can forward message_id from chat to itself, the message is real.
# This is proof from Telegram's API, not OpenClaw's self-reporting.
local verify_output
verify_output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
curl -sS \"https://api.telegram.org/bot\${_TOKEN}/forwardMessage\" \
-d chat_id='${TELEGRAM_TEST_CHAT_ID}' \
-d from_chat_id='${TELEGRAM_TEST_CHAT_ID}' \
-d message_id='${message_id}'" 2>&1) || true
if printf '%s' "${verify_output}" | grep -q '"ok":true'; then
log_ok "OpenClaw cron — message ${message_id} verified in Telegram chat (forwarded successfully)"
return 0
else
log_err "OpenClaw cron — Telegram could not forward message_id=${message_id}"
log_err "This means the message does NOT exist in the chat"
log_err "Response: ${verify_output}"
return 1
fi
}
# ---------------------------------------------------------------------------
# soak_run_telegram_tests APP_NAME
#
# Runs all 4 Telegram tests and returns the failure count.
# ---------------------------------------------------------------------------
soak_run_telegram_tests() {
local app="$1"
local failures=0
local total=4
log_header "Telegram Integration Tests (${total} tests)"
soak_test_telegram_getme "${app}" || failures=$((failures + 1))
soak_test_telegram_send "${app}" || failures=$((failures + 1))
soak_test_telegram_webhook "${app}" || failures=$((failures + 1))
soak_test_openclaw_cron_fired "${app}" || failures=$((failures + 1))
if [ "${failures}" -eq 0 ]; then
log_ok "All ${total} Telegram tests passed"
else
log_err "${failures}/${total} Telegram test(s) failed"
fi
return "${failures}"
}
# ---------------------------------------------------------------------------
# run_soak_test [LOG_DIR]
#
# Orchestrator: validate env → load cloud driver (SOAK_CLOUD) → provision openclaw →
# verify → inject telegram config → schedule openclaw cron reminder →
# soak wait → run tests (including openclaw cron verification) → teardown.
# ---------------------------------------------------------------------------
run_soak_test() {
local log_dir="${1:-${LOG_DIR:-}}"
if [ -z "${log_dir}" ]; then
log_dir=$(mktemp -d "${TMPDIR:-/tmp}/spawn-soak.XXXXXX")
fi
log_header "Spawn Soak Test: OpenClaw + Telegram (with cron reminder)"
log_info "Cloud: ${SOAK_CLOUD}"
log_info "Soak wait: ${SOAK_WAIT_SECONDS}s"
log_info "Cron delay: ${SOAK_CRON_DELAY_SECONDS}s"
# Validate Telegram secrets
if ! soak_validate_telegram_env; then
log_err "Soak test aborted — missing Telegram env vars"
return 1
fi
# Load cloud driver (configurable via SOAK_CLOUD, default: sprite)
load_cloud_driver "${SOAK_CLOUD}"
# Validate cloud environment
if ! require_env; then
log_err "Soak test aborted — cloud env validation failed"
return 1
fi
# Provision OpenClaw
local app_name
app_name=$(make_app_name "openclaw")
track_app "${app_name}"
local soak_start
soak_start=$(date +%s)
if ! provision_agent "openclaw" "${app_name}" "${log_dir}"; then
log_err "Soak test aborted — provisioning failed"
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
return 1
fi
# Standard verification
if ! verify_agent "openclaw" "${app_name}"; then
log_err "Soak test aborted — verification failed"
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
return 1
fi
# Inject Telegram config BEFORE soak wait so cron can use the bot token
if ! soak_inject_telegram_config "${app_name}"; then
log_err "Soak test aborted — Telegram config injection failed"
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
return 1
fi
# Schedule OpenClaw cron reminder — fires in ~55 min during the 1h soak wait
if ! soak_install_openclaw_cron "${app_name}"; then
log_warn "OpenClaw cron install failed — cron test will fail but continuing"
fi
# Soak wait — gateway heartbeat + cron fires during this window
soak_wait "${app_name}"
# Run Telegram tests (including cron verification)
local test_failures=0
soak_run_telegram_tests "${app_name}" || test_failures=$?
# Teardown
teardown_agent "${app_name}" || log_warn "Teardown failed for ${app_name}"
# Summary
local soak_end
soak_end=$(date +%s)
local soak_duration=$((soak_end - soak_start))
local duration_str
duration_str=$(format_duration "${soak_duration}")
printf "\n"
log_header "Soak Test Summary"
if [ "${test_failures}" -eq 0 ]; then
log_ok "All Telegram tests passed (${duration_str})"
else
log_err "${test_failures} Telegram test(s) failed (${duration_str})"
fi
return "${test_failures}"
}