security: validate base64 output in cloud_exec and soak.sh (defense-in-depth) (#2532)

Add base64 character validation ([A-Za-z0-9+/=]) before use in SSH command strings for gcp.sh, aws.sh, and hetzner.sh cloud_exec functions -- matching the existing fix in digitalocean.sh (#2528). Also add a validated _encode_b64 helper to soak.sh and use it for all Telegram bot token encoding, preventing corrupted base64 from breaking out of single-quoted SSH command strings. Closes #2527 Agent: security-auditor Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-05-19 08:01:17 +00:00 · 2026-03-12 06:32:48 -07:00 · 2026-03-12 06:32:48 -07:00 · 6fda75ccc8
commit 6fda75ccc8
parent 76399eafd9
4 changed files with 48 additions and 5 deletions
--- a/sh/e2e/lib/clouds/aws.sh
+++ b/sh/e2e/lib/clouds/aws.sh
@ -152,6 +152,14 @@ _aws_exec() {
  local encoded_cmd
  encoded_cmd=$(printf '%s' "${cmd}" | base64 | tr -d '\n')

+  # Validate base64 output contains only safe characters (defense-in-depth).
+  # Standard base64 only produces [A-Za-z0-9+/=]. This rejects any corruption
+  # and ensures the value cannot break out of single quotes in the SSH command.
+  if ! printf '%s' "${encoded_cmd}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
+    log_err "Invalid base64 encoding of command for SSH exec"
+    return 1
+  fi
+
  ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
      -o ConnectTimeout=10 -o LogLevel=ERROR -o BatchMode=yes \
      "ubuntu@${_AWS_INSTANCE_IP}" "printf '%s' '${encoded_cmd}' | base64 -d | bash"
--- a/sh/e2e/lib/clouds/gcp.sh
+++ b/sh/e2e/lib/clouds/gcp.sh
@ -165,6 +165,14 @@ _gcp_exec() {
  local encoded_cmd
  encoded_cmd=$(printf '%s' "${cmd}" | base64 | tr -d '\n')

+  # Validate base64 output contains only safe characters (defense-in-depth).
+  # Standard base64 only produces [A-Za-z0-9+/=]. This rejects any corruption
+  # and ensures the value cannot break out of single quotes in the SSH command.
+  if ! printf '%s' "${encoded_cmd}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
+    log_err "Invalid base64 encoding of command for SSH exec"
+    return 1
+  fi
+
  ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
      -o ConnectTimeout=10 -o LogLevel=ERROR -o BatchMode=yes \
      "${ssh_user}@${_GCP_INSTANCE_IP}" "printf '%s' '${encoded_cmd}' | base64 -d | bash"
--- a/sh/e2e/lib/clouds/hetzner.sh
+++ b/sh/e2e/lib/clouds/hetzner.sh
@ -158,6 +158,14 @@ _hetzner_exec() {
  local encoded_cmd
  encoded_cmd=$(printf '%s' "${cmd}" | base64 | tr -d '\n')

+  # Validate base64 output contains only safe characters (defense-in-depth).
+  # Standard base64 only produces [A-Za-z0-9+/=]. This rejects any corruption
+  # and ensures the value cannot break out of single quotes in the SSH command.
+  if ! printf '%s' "${encoded_cmd}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
+    log_err "Invalid base64 encoding of command for SSH exec"
+    return 1
+  fi
+
  ssh -o StrictHostKeyChecking=no \
      -o UserKnownHostsFile=/dev/null \
      -o LogLevel=ERROR \
--- a/sh/e2e/lib/soak.sh
+++ b/sh/e2e/lib/soak.sh
@ -47,6 +47,25 @@ validate_positive_int() {
 if ! validate_positive_int "SOAK_WAIT_SECONDS" "${SOAK_WAIT_SECONDS}"; then exit 1; fi
 if ! validate_positive_int "SOAK_CRON_DELAY_SECONDS" "${SOAK_CRON_DELAY_SECONDS}"; then exit 1; fi

+# ---------------------------------------------------------------------------
+# _encode_b64 VALUE
+#
+# Base64-encodes VALUE (via stdin), strips newlines, and validates the output
+# contains only [A-Za-z0-9+/=]. Prints the encoded string on success, returns
+# 1 on failure. Defense-in-depth: prevents corrupted base64 from breaking out
+# of single-quoted SSH command strings.
+# ---------------------------------------------------------------------------
+_encode_b64() {
+  local raw="$1"
+  local encoded
+  encoded=$(printf '%s' "${raw}" | base64 -w 0 2>/dev/null || printf '%s' "${raw}" | base64 | tr -d '\n')
+  if ! printf '%s' "${encoded}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
+    log_err "Invalid base64 encoding"
+    return 1
+  fi
+  printf '%s' "${encoded}"
+}
+
 # ---------------------------------------------------------------------------
 # soak_validate_telegram_env
 #
@ -123,7 +142,7 @@ soak_inject_telegram_config() {

  # Base64-encode the token to avoid shell metacharacter issues
  local encoded_token
-  encoded_token=$(printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 -w 0 2>/dev/null || printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 | tr -d '\n')
+  encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1

  log_step "Patching ~/.openclaw/openclaw.json with Telegram bot token..."

@ -166,7 +185,7 @@ soak_test_telegram_getme() {
  log_step "Testing Telegram getMe API..."

  local encoded_token
-  encoded_token=$(printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 -w 0 2>/dev/null || printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 | tr -d '\n')
+  encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1

  local output
  output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
@ -193,7 +212,7 @@ soak_test_telegram_send() {
  log_step "Testing Telegram sendMessage API..."

  local encoded_token
-  encoded_token=$(printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 -w 0 2>/dev/null || printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 | tr -d '\n')
+  encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1

  local marker
  marker="SPAWN_SOAK_TEST_$(date +%s)"
@ -225,7 +244,7 @@ soak_test_telegram_webhook() {
  log_step "Testing Telegram getWebhookInfo API..."

  local encoded_token
-  encoded_token=$(printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 -w 0 2>/dev/null || printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 | tr -d '\n')
+  encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1

  local output
  output=$(cloud_exec "${app}" "_TOKEN=\$(printf '%s' '${encoded_token}' | base64 -d); \
@ -344,7 +363,7 @@ soak_test_openclaw_cron_fired() {
  log_step "Testing OpenClaw cron-triggered Telegram reminder..."

  local encoded_token
-  encoded_token=$(printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 -w 0 2>/dev/null || printf '%s' "${TELEGRAM_BOT_TOKEN}" | base64 | tr -d '\n')
+  encoded_token=$(_encode_b64 "${TELEGRAM_BOT_TOKEN}") || return 1

  # Step 1: Get the message_id from OpenClaw's cron execution data.
  # OpenClaw stores cron job data in ~/.openclaw/cron/. We look for: