spawn/sh/e2e/lib/clouds/aws.sh
A 6fda75ccc8
security: validate base64 output in cloud_exec and soak.sh (defense-in-depth) (#2532)
Add base64 character validation ([A-Za-z0-9+/=]) before use in SSH
command strings for gcp.sh, aws.sh, and hetzner.sh cloud_exec
functions -- matching the existing fix in digitalocean.sh (#2528).

Also add a validated _encode_b64 helper to soak.sh and use it for
all Telegram bot token encoding, preventing corrupted base64 from
breaking out of single-quoted SSH command strings.

Closes #2527

Agent: security-auditor

Co-authored-by: B <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-12 09:32:48 -04:00

272 lines
9 KiB
Bash

#!/bin/bash
# e2e/lib/clouds/aws.sh — AWS Lightsail cloud driver for multi-cloud E2E
#
# Implements the standard cloud driver interface (_aws_* prefixed functions).
# Sourced by common.sh's load_cloud_driver() which wires these to generic names.
#
# Depends on: log_step, log_ok, log_err, log_warn, log_info, format_duration,
# untrack_app (provided by common.sh)
set -eo pipefail
# ---------------------------------------------------------------------------
# Instance IP cache (avoid repeated API calls within a single run)
# ---------------------------------------------------------------------------
_AWS_INSTANCE_IP=""
_AWS_INSTANCE_APP=""
# ---------------------------------------------------------------------------
# _aws_validate_env
#
# Check that the aws CLI is installed and credentials are valid.
# Returns 0 on success, 1 on failure.
# ---------------------------------------------------------------------------
_aws_validate_env() {
local missing=0
if ! command -v aws >/dev/null 2>&1; then
log_err "aws CLI not found. Install from https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
missing=1
fi
if [ "${missing}" -eq 1 ]; then
return 1
fi
if ! aws sts get-caller-identity --region "${AWS_REGION:-us-east-1}" >/dev/null 2>&1; then
log_err "AWS credentials are not valid. Run: aws configure"
return 1
fi
log_ok "AWS credentials validated (region: ${AWS_REGION:-us-east-1})"
return 0
}
# ---------------------------------------------------------------------------
# _aws_headless_env APP AGENT
#
# Print export lines to stdout for headless provisioning.
# These are eval'd by the provisioning harness before invoking the CLI.
# ---------------------------------------------------------------------------
_aws_headless_env() {
local app="$1"
local agent="${2:-}"
local bundle="${AWS_BUNDLE:-medium_3_0}"
printf 'export LIGHTSAIL_SERVER_NAME="%s"\n' "${app}"
printf 'export AWS_DEFAULT_REGION="%s"\n' "${AWS_REGION:-us-east-1}"
printf 'export LIGHTSAIL_BUNDLE="%s"\n' "${bundle}"
}
# ---------------------------------------------------------------------------
# _aws_provision_verify APP LOG_DIR
#
# Verify instance exists after provisioning, resolve public IP, and write
# metadata files for downstream steps (verify, teardown).
#
# Writes:
# $LOG_DIR/$APP.ip — public IPv4 address (plain text)
# $LOG_DIR/$APP.meta — instance metadata (JSON)
# ---------------------------------------------------------------------------
_aws_provision_verify() {
local app="$1"
local log_dir="$2"
local region="${AWS_REGION:-us-east-1}"
# Check instance exists
if ! aws lightsail get-instance \
--instance-name "${app}" \
--region "${region}" >/dev/null 2>&1; then
log_err "Instance ${app} does not exist after provisioning"
return 1
fi
log_ok "Instance ${app} exists"
# Resolve public IP
local instance_ip
instance_ip=$(aws lightsail get-instance \
--instance-name "${app}" \
--region "${region}" \
--query 'instance.publicIpAddress' \
--output text 2>/dev/null || true)
if [ -z "${instance_ip}" ] || [ "${instance_ip}" = "None" ]; then
log_err "Could not resolve public IP for ${app}"
return 1
fi
log_ok "Instance IP: ${instance_ip}"
# Write IP file for downstream steps
printf '%s' "${instance_ip}" > "${log_dir}/${app}.ip"
# Write metadata file
printf '{"name":"%s","region":"%s","ip":"%s"}\n' \
"${app}" "${region}" "${instance_ip}" \
> "${log_dir}/${app}.meta"
return 0
}
# ---------------------------------------------------------------------------
# _aws_exec APP CMD
#
# Resolve instance IP (cached), then run CMD via SSH.
# Returns the exit code of the remote command.
# ---------------------------------------------------------------------------
_aws_exec() {
local app="$1"
local cmd="$2"
# Resolve instance IP (cached per app)
if [ "${_AWS_INSTANCE_APP}" != "${app}" ] || [ -z "${_AWS_INSTANCE_IP}" ]; then
# Try reading from the IP file first (written by _aws_provision_verify)
if [ -n "${LOG_DIR:-}" ] && [ -f "${LOG_DIR}/${app}.ip" ]; then
_AWS_INSTANCE_IP=$(cat "${LOG_DIR}/${app}.ip")
else
_AWS_INSTANCE_IP=$(aws lightsail get-instance \
--instance-name "${app}" \
--region "${AWS_REGION:-us-east-1}" \
--query 'instance.publicIpAddress' \
--output text 2>/dev/null || true)
fi
_AWS_INSTANCE_APP="${app}"
if [ -z "${_AWS_INSTANCE_IP}" ] || [ "${_AWS_INSTANCE_IP}" = "None" ]; then
log_err "Could not resolve IP for instance ${app}"
return 1
fi
# Validate IP looks like an IPv4 address (defense-in-depth against API/file tampering)
if ! printf '%s' "${_AWS_INSTANCE_IP}" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$'; then
log_err "Invalid IP address for instance ${app}: ${_AWS_INSTANCE_IP}"
_AWS_INSTANCE_IP=""
_AWS_INSTANCE_APP=""
return 1
fi
fi
# Base64-encode the command to prevent shell injection when passed as an
# SSH argument. The encoded string contains only [A-Za-z0-9+/=] characters,
# making it safe to embed in single quotes. Stdin is preserved for callers
# that pipe data into cloud_exec.
local encoded_cmd
encoded_cmd=$(printf '%s' "${cmd}" | base64 | tr -d '\n')
# Validate base64 output contains only safe characters (defense-in-depth).
# Standard base64 only produces [A-Za-z0-9+/=]. This rejects any corruption
# and ensures the value cannot break out of single quotes in the SSH command.
if ! printf '%s' "${encoded_cmd}" | grep -qE '^[A-Za-z0-9+/=]+$'; then
log_err "Invalid base64 encoding of command for SSH exec"
return 1
fi
ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ConnectTimeout=10 -o LogLevel=ERROR -o BatchMode=yes \
"ubuntu@${_AWS_INSTANCE_IP}" "printf '%s' '${encoded_cmd}' | base64 -d | bash"
}
# ---------------------------------------------------------------------------
# _aws_teardown APP
#
# Delete the Lightsail instance, verify deletion, and untrack it.
# ---------------------------------------------------------------------------
_aws_teardown() {
local app="$1"
local region="${AWS_REGION:-us-east-1}"
log_step "Tearing down ${app}..."
# Delete the instance
aws lightsail delete-instance \
--instance-name "${app}" \
--region "${region}" \
--force-delete-add-ons \
>/dev/null 2>&1 || true
# Brief wait for deletion to propagate
sleep 2
# Verify deletion
if aws lightsail get-instance --instance-name "${app}" --region "${region}" >/dev/null 2>&1; then
log_warn "Instance ${app} may still exist (AWS still reports it)"
else
log_ok "Instance ${app} torn down"
fi
# Clear IP cache if this was the cached instance
if [ "${_AWS_INSTANCE_APP}" = "${app}" ]; then
_AWS_INSTANCE_IP=""
_AWS_INSTANCE_APP=""
fi
untrack_app "${app}"
}
# ---------------------------------------------------------------------------
# _aws_cleanup_stale
#
# List all Lightsail instances, filter for e2e-* names, and destroy any
# older than 30 minutes (based on the unix timestamp embedded in the name).
# ---------------------------------------------------------------------------
_aws_cleanup_stale() {
local region="${AWS_REGION:-us-east-1}"
local now
now=$(date +%s)
local max_age=1800 # 30 minutes in seconds
# List all instances
local instances_json
instances_json=$(aws lightsail get-instances \
--region "${region}" \
--query 'instances[].name' \
--output json 2>/dev/null || true)
if [ -z "${instances_json}" ] || [ "${instances_json}" = "null" ] || [ "${instances_json}" = "[]" ]; then
log_info "Could not list instances or no instances found — skipping cleanup"
return 0
fi
# Extract instance names matching e2e-* pattern
local instance_names
instance_names=$(printf '%s' "${instances_json}" | jq -r '.[]? // empty' 2>/dev/null | grep '^e2e-' || true)
if [ -z "${instance_names}" ]; then
log_ok "No stale e2e instances found"
return 0
fi
local cleaned=0
local skipped=0
for instance_name in ${instance_names}; do
# Extract timestamp from name: e2e-AGENT-TIMESTAMP
# The timestamp is the last dash-separated segment
local ts
ts=$(printf '%s' "${instance_name}" | sed 's/.*-//')
# Validate it looks like a unix timestamp (all digits, 10 chars)
if ! printf '%s' "${ts}" | grep -qE '^[0-9]{10}$'; then
log_warn "Skipping ${instance_name} — cannot parse timestamp"
skipped=$((skipped + 1))
continue
fi
local age=$((now - ts))
if [ "${age}" -gt "${max_age}" ]; then
local age_str
age_str=$(format_duration "${age}")
log_step "Destroying stale instance ${instance_name} (age: ${age_str})"
_aws_teardown "${instance_name}" || log_warn "Failed to tear down ${instance_name}"
cleaned=$((cleaned + 1))
else
skipped=$((skipped + 1))
fi
done
if [ "${cleaned}" -gt 0 ]; then
log_ok "Cleaned ${cleaned} stale instance(s)"
fi
if [ "${skipped}" -gt 0 ]; then
log_info "Skipped ${skipped} recent instance(s)"
fi
}