spawn/gcp/lib/common.sh
A fd80f1992c
fix: improve error messages for GCP, AWS Lightsail, Cherry, and Oracle (#957)
- GCP: capture gcloud stderr on failure, add common issues guidance,
  use _log_diagnostic for ensure_gcloud errors
- AWS Lightsail: add common issues for create_server failure,
  use _log_diagnostic for ensure_aws_cli errors,
  improve instance timeout message with actionable steps
- Cherry Servers: use extract_api_error_message instead of raw response
  dump, add common issues for server creation failure
- Oracle Cloud: capture OCI CLI stderr on instance launch failure,
  add common issues for VCN, subnet, and instance creation errors

Agent: ux-engineer

Co-authored-by: A <6723574+louisgv@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-13 09:00:10 -08:00

205 lines
7.5 KiB
Bash

#!/bin/bash
# Common bash functions for GCP Compute Engine spawn scripts
# Uses gcloud CLI — requires Google Cloud SDK installed and configured
# Bash safety flags
set -eo pipefail
# ============================================================
# Provider-agnostic functions
# ============================================================
# Source shared provider-agnostic functions (local or remote fallback)
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" 2>/dev/null && pwd)"
if [[ -n "${SCRIPT_DIR}" && -f "${SCRIPT_DIR}/../../shared/common.sh" ]]; then
source "${SCRIPT_DIR}/../../shared/common.sh"
else
eval "$(curl -fsSL https://raw.githubusercontent.com/OpenRouterTeam/spawn/main/shared/common.sh)"
fi
# Note: Provider-agnostic functions (logging, OAuth, browser, nc_listen) are now in shared/common.sh
# ============================================================
# GCP Compute Engine specific functions
# ============================================================
# Cache username to avoid repeated subprocess calls
GCP_USERNAME=$(whoami)
# SSH_OPTS is now defined in shared/common.sh
ensure_gcloud() {
if ! command -v gcloud &>/dev/null; then
_log_diagnostic \
"Google Cloud SDK (gcloud) is required but not installed" \
"gcloud CLI has not been installed on this machine" \
--- \
"Install the Google Cloud SDK: https://cloud.google.com/sdk/docs/install" \
"Or on macOS: brew install google-cloud-sdk"
return 1
fi
# Verify auth
if ! gcloud auth list --filter=status:ACTIVE --format="value(account)" 2>/dev/null | head -1 | grep -q '@'; then
_log_diagnostic \
"gcloud is not authenticated" \
"No active Google Cloud account found" \
"Previous authentication may have expired" \
--- \
"Run: gcloud auth login" \
"Or set credentials via: export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json"
return 1
fi
# Set project
local project="${GCP_PROJECT:-$(gcloud config get-value project 2>/dev/null)}"
if [[ -z "${project}" || "${project}" == "(unset)" ]]; then
_log_diagnostic \
"No GCP project configured" \
"No project is set in gcloud config or GCP_PROJECT env var" \
--- \
"Set via environment: export GCP_PROJECT=your-project-id" \
"Or via gcloud: gcloud config set project YOUR_PROJECT" \
"List your projects: gcloud projects list"
return 1
fi
export GCP_PROJECT="${project}"
log_info "Using GCP project: ${project}"
}
ensure_ssh_key() {
local key_path="${HOME}/.ssh/id_ed25519"
# Generate key if needed
generate_ssh_key_if_missing "${key_path}"
# GCP handles SSH keys via project/instance metadata, added during create
log_info "SSH key ready"
}
get_server_name() {
get_resource_name "GCP_INSTANCE_NAME" "Enter instance name: "
}
get_cloud_init_userdata() {
cat << 'CLOUD_INIT_EOF'
#!/bin/bash
apt-get update -y
apt-get install -y curl unzip git zsh
# Install Bun
su - $(logname 2>/dev/null || echo "$(whoami)") -c 'curl -fsSL https://bun.sh/install | bash' || true
# Install Claude Code
su - $(logname 2>/dev/null || echo "$(whoami)") -c 'curl -fsSL https://claude.ai/install.sh | bash' || true
# Configure PATH for all users
echo 'export PATH="${HOME}/.claude/local/bin:${HOME}/.bun/bin:${PATH}"' >> /etc/profile.d/spawn.sh
chmod +x /etc/profile.d/spawn.sh
touch /tmp/.cloud-init-complete
CLOUD_INIT_EOF
}
create_server() {
local name="${1}"
local machine_type="${GCP_MACHINE_TYPE:-e2-medium}"
local zone="${GCP_ZONE:-us-central1-a}"
local image_family="ubuntu-2404-lts-amd64"
local image_project="ubuntu-os-cloud"
# Validate env var inputs to prevent command injection
validate_resource_name "${machine_type}" || { log_error "Invalid GCP_MACHINE_TYPE"; return 1; }
validate_region_name "${zone}" || { log_error "Invalid GCP_ZONE"; return 1; }
log_step "Creating GCP instance '${name}' (type: ${machine_type}, zone: ${zone})..."
local userdata
userdata=$(get_cloud_init_userdata)
local pub_key
pub_key=$(cat "${HOME}/.ssh/id_ed25519.pub")
local gcloud_err
gcloud_err=$(mktemp)
track_temp_file "${gcloud_err}"
if ! gcloud compute instances create "${name}" \
--zone="${zone}" \
--machine-type="${machine_type}" \
--image-family="${image_family}" \
--image-project="${image_project}" \
--metadata="startup-script=${userdata},ssh-keys=${GCP_USERNAME}:${pub_key}" \
--project="${GCP_PROJECT}" \
--quiet \
>/dev/null 2>"${gcloud_err}"; then
log_error "Failed to create GCP instance"
local err_output
err_output=$(cat "${gcloud_err}" 2>/dev/null)
if [[ -n "${err_output}" ]]; then
log_error "gcloud error: ${err_output}"
fi
log_warn "Common issues:"
log_warn " - Billing not enabled for the project (enable at https://console.cloud.google.com/billing)"
log_warn " - Compute Engine API not enabled (enable at https://console.cloud.google.com/apis)"
log_warn " - Instance quota exceeded in zone (try different GCP_ZONE)"
log_warn " - Machine type unavailable in zone (try different GCP_MACHINE_TYPE or GCP_ZONE)"
return 1
fi
# Export instance metadata for use by calling script
# shellcheck disable=SC2034 # Variables exported for use by sourcing scripts
export GCP_INSTANCE_NAME_ACTUAL="${name}"
export GCP_ZONE="${zone}"
# Get external IP
local server_ip
server_ip=$(gcloud compute instances describe "${name}" \
--zone="${zone}" \
--project="${GCP_PROJECT}" \
--format='get(networkInterfaces[0].accessConfigs[0].natIP)' 2>/dev/null)
export GCP_SERVER_IP="${server_ip}"
log_info "Instance created: IP=${GCP_SERVER_IP}"
}
verify_server_connectivity() {
local ip="${1}" max_attempts=${2:-30}
# Use shared generic_ssh_wait with exponential backoff
# shellcheck disable=SC2086,SC2154
generic_ssh_wait "${GCP_USERNAME}" "${ip}" "${SSH_OPTS}" "echo ok" "SSH connectivity" "${max_attempts}"
}
wait_for_cloud_init() {
local ip="${1}" max_attempts=${2:-60}
# First establish SSH connectivity using generic_ssh_wait
generic_ssh_wait "${GCP_USERNAME}" "${ip}" "${SSH_OPTS}" "echo ok" "SSH connectivity" 30 5
# Then wait for cloud-init completion marker
generic_ssh_wait "${GCP_USERNAME}" "${ip}" "${SSH_OPTS}" "test -f /tmp/.cloud-init-complete" "startup script completion" "${max_attempts}" 5
}
# GCP uses current username
run_server() {
local ip="${1}" cmd="${2}"
# shellcheck disable=SC2086
ssh ${SSH_OPTS} "${GCP_USERNAME}@${ip}" "${cmd}"
}
upload_file() {
local ip="${1}" local_path="${2}" remote_path="${3}"
# shellcheck disable=SC2086
scp ${SSH_OPTS} "${local_path}" "${GCP_USERNAME}@${ip}:${remote_path}"
}
interactive_session() {
local ip="${1}" cmd="${2}"
# shellcheck disable=SC2086
ssh -t ${SSH_OPTS} "${GCP_USERNAME}@${ip}" "${cmd}"
}
destroy_server() {
local name="${1}"
local zone="${GCP_ZONE:-us-central1-a}"
log_step "Destroying GCP instance ${name}..."
gcloud compute instances delete "${name}" --zone="${zone}" --project="${GCP_PROJECT}" --quiet >/dev/null 2>&1
log_info "Instance ${name} destroyed"
}
list_servers() {
gcloud compute instances list --project="${GCP_PROJECT}" --format='table(name,zone,status,networkInterfaces[0].accessConfigs[0].natIP:label=EXTERNAL_IP,machineType.basename())'
}