diff --git a/kamatera/lib/common.sh b/kamatera/lib/common.sh index 9025f74c..f88a9e79 100644 --- a/kamatera/lib/common.sh +++ b/kamatera/lib/common.sh @@ -243,6 +243,76 @@ generate_server_password() { printf '%s' "$password" } +# Parse command IDs from a Kamatera API response +# Kamatera returns an array of command IDs, a single number, or a string +# Usage: parse_command_ids JSON_RESPONSE +parse_command_ids() { + local response="$1" + python3 -c " +import json, sys +data = json.loads(sys.stdin.read()) +if isinstance(data, list): + print(','.join(str(x) for x in data)) +elif isinstance(data, (int, float)): + print(int(data)) +else: + print(data) +" <<< "$response" 2>/dev/null +} + +# Poll Kamatera server info until a WAN IP address is available +# Sets: KAMATERA_SERVER_IP +# Usage: get_kamatera_server_ip SERVER_NAME [MAX_ATTEMPTS] +get_kamatera_server_ip() { + local name="$1" + local max_attempts=${2:-30} + local attempt=1 + + log_warn "Retrieving server IP address..." + while [[ "$attempt" -le "$max_attempts" ]]; do + local info_response + info_response=$(kamatera_api POST "/service/server/info" "{\"name\":\"$name\"}") + + KAMATERA_SERVER_IP=$(python3 -c " +import json, sys +data = json.loads(sys.stdin.read()) +if isinstance(data, list) and len(data) > 0: + server = data[0] +else: + server = data +networks = server.get('networks', []) +for net in networks: + net_name = net.get('network', '') + if net_name.startswith('wan'): + ips = net.get('ips', []) + if ips: + print(ips[0]) + sys.exit(0) +# Fallback: try power_on field or any IP +power = server.get('power', '') +if power == 'on': + for net in networks: + ips = net.get('ips', []) + if ips: + print(ips[0]) + sys.exit(0) +" <<< "$info_response" 2>/dev/null) + + if [[ -n "$KAMATERA_SERVER_IP" ]]; then + export KAMATERA_SERVER_IP + log_info "Server active: IP=$KAMATERA_SERVER_IP" + return 0 + fi + + log_warn "Waiting for server IP... (attempt $attempt/$max_attempts)" + sleep "$INSTANCE_STATUS_POLL_DELAY" + attempt=$((attempt + 1)) + done + + log_error "Failed to retrieve server IP address" + return 1 +} + create_server() { local name="$1" local datacenter="${KAMATERA_DATACENTER:-EU}" @@ -326,18 +396,8 @@ print(json.dumps(body)) local response response=$(kamatera_api POST "/service/server" "$body") - # Parse command ID from response (Kamatera returns array of command IDs) local command_ids - command_ids=$(python3 -c " -import json, sys -data = json.loads(sys.stdin.read()) -if isinstance(data, list): - print(','.join(str(x) for x in data)) -elif isinstance(data, (int, float)): - print(int(data)) -else: - print(data) -" <<< "$response" 2>/dev/null) + command_ids=$(parse_command_ids "$response") if [[ -z "$command_ids" ]]; then log_error "Failed to create Kamatera server" @@ -352,60 +412,13 @@ else: log_info "Server creation command submitted: $command_ids" - # Wait for the command to complete local queue_result queue_result=$(wait_for_command "$command_ids" 600) || return 1 - # Extract server name from the completed command KAMATERA_SERVER_NAME_ACTUAL="$name" export KAMATERA_SERVER_NAME_ACTUAL - # Get server info to retrieve IP address - log_warn "Retrieving server IP address..." - local max_info_attempts=30 - local info_attempt=1 - while [[ "$info_attempt" -le "$max_info_attempts" ]]; do - local info_response - info_response=$(kamatera_api POST "/service/server/info" "{\"name\":\"$name\"}") - - KAMATERA_SERVER_IP=$(python3 -c " -import json, sys -data = json.loads(sys.stdin.read()) -if isinstance(data, list) and len(data) > 0: - server = data[0] -else: - server = data -networks = server.get('networks', []) -for net in networks: - net_name = net.get('network', '') - if net_name.startswith('wan'): - ips = net.get('ips', []) - if ips: - print(ips[0]) - sys.exit(0) -# Fallback: try power_on field or any IP -power = server.get('power', '') -if power == 'on': - for net in networks: - ips = net.get('ips', []) - if ips: - print(ips[0]) - sys.exit(0) -" <<< "$info_response" 2>/dev/null) - - if [[ -n "$KAMATERA_SERVER_IP" ]]; then - export KAMATERA_SERVER_IP - log_info "Server active: IP=$KAMATERA_SERVER_IP" - return 0 - fi - - log_warn "Waiting for server IP... (attempt $info_attempt/$max_info_attempts)" - sleep "$INSTANCE_STATUS_POLL_DELAY" - info_attempt=$((info_attempt + 1)) - done - - log_error "Failed to retrieve server IP address" - return 1 + get_kamatera_server_ip "$name" } verify_server_connectivity() { @@ -438,18 +451,8 @@ destroy_server() { local response response=$(kamatera_api POST "/service/server/terminate" "{\"name\":\"$server_name\",\"force\":true}") - # Parse command ID and wait for completion local command_ids - command_ids=$(python3 -c " -import json, sys -data = json.loads(sys.stdin.read()) -if isinstance(data, list): - print(','.join(str(x) for x in data)) -elif isinstance(data, (int, float)): - print(int(data)) -else: - print(data) -" <<< "$response" 2>/dev/null) + command_ids=$(parse_command_ids "$response") if [[ -n "$command_ids" ]]; then wait_for_command "$command_ids" 120 || true diff --git a/latitude/lib/common.sh b/latitude/lib/common.sh index 6d90e651..cd8c720f 100644 --- a/latitude/lib/common.sh +++ b/latitude/lib/common.sh @@ -263,29 +263,12 @@ except: print(sys.stdin.read()) log_warn "Waiting for server provisioning (this may take a few minutes for bare metal)..." } -# Wait for server to become active and get its IP address -wait_for_server_ready() { - local server_id="$1" - local max_attempts=${2:-60} - local attempt=1 - - log_warn "Waiting for server $server_id to become active..." - while [[ "$attempt" -le "$max_attempts" ]]; do - local response - response=$(latitude_api GET "/servers/$server_id") - - local status - status=$(echo "$response" | python3 -c " -import json, sys -data = json.loads(sys.stdin.read()) -server = data.get('data', {}) -attrs = server.get('attributes', {}) -print(attrs.get('status', 'unknown')) -" 2>/dev/null || echo "unknown") - - if [[ "$status" == "on" ]] || [[ "$status" == "active" ]]; then - # Extract IP address - LATITUDE_SERVER_IP=$(echo "$response" | python3 -c " +# Extract the IPv4 address from a Latitude.sh server API response +# Checks network.ip, ip_addresses[], and primary_ipv4 fields +# Usage: extract_latitude_server_ip JSON_RESPONSE +extract_latitude_server_ip() { + local response="$1" + echo "$response" | python3 -c " import json, sys data = json.loads(sys.stdin.read()) server = data.get('data', {}) @@ -315,15 +298,36 @@ if primary: print(primary) sys.exit(0) sys.exit(1) -" 2>/dev/null) +" 2>/dev/null +} +# Wait for server to become active and get its IP address +wait_for_server_ready() { + local server_id="$1" + local max_attempts=${2:-60} + local attempt=1 + + log_warn "Waiting for server $server_id to become active..." + while [[ "$attempt" -le "$max_attempts" ]]; do + local response + response=$(latitude_api GET "/servers/$server_id") + + local status + status=$(echo "$response" | python3 -c " +import json, sys +data = json.loads(sys.stdin.read()) +server = data.get('data', {}) +attrs = server.get('attributes', {}) +print(attrs.get('status', 'unknown')) +" 2>/dev/null || echo "unknown") + + if [[ "$status" == "on" ]] || [[ "$status" == "active" ]]; then + LATITUDE_SERVER_IP=$(extract_latitude_server_ip "$response") if [[ -n "$LATITUDE_SERVER_IP" ]]; then export LATITUDE_SERVER_IP log_info "Server active: IP=$LATITUDE_SERVER_IP" return 0 fi - - # IP might not be assigned yet, keep waiting log_warn "Server active but IP not yet assigned... (attempt $attempt/$max_attempts)" else log_warn "Server status: $status (attempt $attempt/$max_attempts)" diff --git a/runpod/lib/common.sh b/runpod/lib/common.sh index dd7901cd..5f0e8053 100644 --- a/runpod/lib/common.sh +++ b/runpod/lib/common.sh @@ -102,6 +102,65 @@ get_server_name() { echo "${server_name}" } +# Wait for a RunPod pod to become ready and set SSH connection vars +# Sets: RUNPOD_SSH_HOST, RUNPOD_SSH_PORT, RUNPOD_SSH_USER +# Usage: wait_for_pod_ready POD_ID [MAX_ATTEMPTS] +wait_for_pod_ready() { + local pod_id="${1}" + local max_attempts=${2:-60} + local attempt=1 + + log_warn "Waiting for pod to become ready..." + while [[ "${attempt}" -le "${max_attempts}" ]]; do + local status_query='query { pod(input: { podId: "'"${pod_id}"'" }) { id name desiredStatus runtime { uptimeInSeconds ports { ip isIpPublic privatePort publicPort type } } } }' + local status_response + status_response=$(runpod_api "${status_query}") + + local runtime + runtime=$(echo "${status_response}" | python3 -c "import json,sys; r=json.loads(sys.stdin.read())['data']['pod']['runtime']; print('running' if r else 'pending')" 2>/dev/null || echo "pending") + + if [[ "${runtime}" == "running" ]]; then + # Extract SSH connection info from ports + local ssh_info + ssh_info=$(echo "${status_response}" | python3 -c " +import json, sys +data = json.loads(sys.stdin.read()) +ports = data['data']['pod']['runtime']['ports'] +for p in (ports or []): + if p['privatePort'] == 22 and p['type'] == 'tcp': + print(p['ip'] + ':' + str(p['publicPort'])) + sys.exit(0) +# No direct TCP port found, fall back to proxy SSH +print('proxy') +" 2>/dev/null || echo "proxy") + + if [[ "${ssh_info}" == "proxy" ]]; then + RUNPOD_SSH_HOST="ssh.runpod.io" + RUNPOD_SSH_PORT="22" + RUNPOD_SSH_USER="${pod_id}" + export RUNPOD_SSH_HOST RUNPOD_SSH_PORT RUNPOD_SSH_USER + log_info "Pod ready (using SSH proxy: ${RUNPOD_SSH_USER}@${RUNPOD_SSH_HOST})" + else + RUNPOD_SSH_HOST=$(echo "${ssh_info}" | cut -d: -f1) + RUNPOD_SSH_PORT=$(echo "${ssh_info}" | cut -d: -f2) + RUNPOD_SSH_USER="root" + export RUNPOD_SSH_HOST RUNPOD_SSH_PORT RUNPOD_SSH_USER + log_info "Pod ready: SSH at ${RUNPOD_SSH_HOST}:${RUNPOD_SSH_PORT}" + fi + return 0 + fi + + local desired_status + desired_status=$(echo "${status_response}" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['data']['pod']['desiredStatus'])" 2>/dev/null || echo "UNKNOWN") + log_warn "Pod status: ${desired_status}/${runtime} (${attempt}/${max_attempts})" + sleep "${INSTANCE_STATUS_POLL_DELAY}" + attempt=$((attempt + 1)) + done + + log_error "Pod did not become ready in time" + return 1 +} + create_server() { local name="${1}" local gpu_type="${RUNPOD_GPU_TYPE:-NVIDIA RTX A4000}" @@ -144,59 +203,7 @@ create_server() { export RUNPOD_POD_ID log_info "Pod created: ID=${RUNPOD_POD_ID}" - # Wait for pod to become ready and get SSH connection info - log_warn "Waiting for pod to become ready..." - local max_attempts=60 - local attempt=1 - while [[ "${attempt}" -le "${max_attempts}" ]]; do - local status_query='query { pod(input: { podId: "'"${RUNPOD_POD_ID}"'" }) { id name desiredStatus runtime { uptimeInSeconds ports { ip isIpPublic privatePort publicPort type } } } }' - local status_response - status_response=$(runpod_api "${status_query}") - - local runtime - runtime=$(echo "${status_response}" | python3 -c "import json,sys; r=json.loads(sys.stdin.read())['data']['pod']['runtime']; print('running' if r else 'pending')" 2>/dev/null || echo "pending") - - if [[ "${runtime}" == "running" ]]; then - # Extract SSH connection info from ports - local ssh_info - ssh_info=$(echo "${status_response}" | python3 -c " -import json, sys -data = json.loads(sys.stdin.read()) -ports = data['data']['pod']['runtime']['ports'] -for p in (ports or []): - if p['privatePort'] == 22 and p['type'] == 'tcp': - print(p['ip'] + ':' + str(p['publicPort'])) - sys.exit(0) -# No direct TCP port found, fall back to proxy SSH -print('proxy') -" 2>/dev/null || echo "proxy") - - if [[ "${ssh_info}" == "proxy" ]]; then - # Use RunPod SSH proxy - RUNPOD_SSH_HOST="ssh.runpod.io" - RUNPOD_SSH_PORT="22" - RUNPOD_SSH_USER="${RUNPOD_POD_ID}" - export RUNPOD_SSH_HOST RUNPOD_SSH_PORT RUNPOD_SSH_USER - log_info "Pod ready (using SSH proxy: ${RUNPOD_SSH_USER}@${RUNPOD_SSH_HOST})" - else - RUNPOD_SSH_HOST=$(echo "${ssh_info}" | cut -d: -f1) - RUNPOD_SSH_PORT=$(echo "${ssh_info}" | cut -d: -f2) - RUNPOD_SSH_USER="root" - export RUNPOD_SSH_HOST RUNPOD_SSH_PORT RUNPOD_SSH_USER - log_info "Pod ready: SSH at ${RUNPOD_SSH_HOST}:${RUNPOD_SSH_PORT}" - fi - return 0 - fi - - local desired_status - desired_status=$(echo "${status_response}" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['data']['pod']['desiredStatus'])" 2>/dev/null || echo "UNKNOWN") - log_warn "Pod status: ${desired_status}/${runtime} (${attempt}/${max_attempts})" - sleep "${INSTANCE_STATUS_POLL_DELAY}" - attempt=$((attempt + 1)) - done - - log_error "Pod did not become ready in time" - return 1 + wait_for_pod_ready "${RUNPOD_POD_ID}" } # Build SSH options string for RunPod (may use non-standard port)