diff --git a/cli/package.json b/cli/package.json index 929a42c0..631e267d 100644 --- a/cli/package.json +++ b/cli/package.json @@ -1,6 +1,6 @@ { "name": "@openrouter/spawn", - "version": "0.7.0", + "version": "0.7.1", "type": "module", "bin": { "spawn": "cli.js" diff --git a/cli/src/aws/aws.ts b/cli/src/aws/aws.ts index a064ae46..27c0c676 100644 --- a/cli/src/aws/aws.ts +++ b/cli/src/aws/aws.ts @@ -17,6 +17,7 @@ import { } from "../shared/ui"; import type { CloudInitTier } from "../shared/agents"; import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init"; +import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh"; import * as v from "valibot"; import { parseJsonWith } from "../shared/parse"; @@ -116,28 +117,7 @@ export function getState() { const SSH_USER = "ubuntu"; const SSH_KEY_PATH = `${process.env.HOME}/.ssh/id_ed25519`; -const SSH_OPTS = [ - "-o", - "StrictHostKeyChecking=no", - "-o", - "UserKnownHostsFile=/dev/null", - "-o", - "LogLevel=ERROR", - "-o", - "ConnectTimeout=10", - "-o", - "ServerAliveInterval=15", - "-o", - "ServerAliveCountMax=3", - "-i", - SSH_KEY_PATH, -]; - -// ─── Helpers ──────────────────────────────────────────────────────────────── - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} +const SSH_OPTS = [...SSH_BASE_OPTS, "-i", SSH_KEY_PATH]; // ─── Valibot Schemas for AWS API Responses ────────────────────────────────── @@ -915,40 +895,13 @@ export function saveLaunchCmd(launchCmd: string): void { // ─── SSH Execution ────────────────────────────────────────────────────────── -export async function waitForSsh(maxAttempts = 30): Promise { - logStep("Waiting for SSH connectivity..."); - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const proc = Bun.spawn( - [ - "ssh", - ...SSH_OPTS, - `${SSH_USER}@${instanceIp}`, - "echo ok", - ], - { - stdio: [ - "ignore", - "pipe", - "ignore", - ], - }, - ); - const stdout = await new Response(proc.stdout).text(); - const exitCode = await proc.exited; - if (exitCode === 0 && stdout.includes("ok")) { - logInfo("SSH is ready"); - return; - } - } catch { - // ignore - } - logStep(`SSH not ready yet (${attempt}/${maxAttempts})`); - await sleep(5000); - } - - throw new Error("SSH connectivity timeout"); +export async function waitForSsh(maxAttempts = 36): Promise { + await sharedWaitForSsh({ + host: instanceIp, + user: SSH_USER, + maxAttempts, + sshKeyPath: SSH_KEY_PATH, + }); } export async function waitForCloudInit(maxAttempts = 60): Promise { diff --git a/cli/src/digitalocean/digitalocean.ts b/cli/src/digitalocean/digitalocean.ts index 3e31e260..d6a4e4a3 100644 --- a/cli/src/digitalocean/digitalocean.ts +++ b/cli/src/digitalocean/digitalocean.ts @@ -18,6 +18,7 @@ import { import type { CloudInitTier } from "../shared/agents"; import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init"; import { parseJsonWith } from "../shared/parse"; +import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh"; const DO_API_BASE = "https://api.digitalocean.com/v2"; const DO_DASHBOARD_URL = "https://cloud.digitalocean.com/droplets"; @@ -116,10 +117,6 @@ async function doApi( // ─── Helpers ───────────────────────────────────────────────────────────────── -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} - const LooseObject = v.record(v.string(), v.unknown()); function parseJson(text: string): Record | null { @@ -842,58 +839,15 @@ async function waitForDropletActive(dropletId: string, maxAttempts = 60): Promis // ─── SSH Execution ─────────────────────────────────────────────────────────── -const SSH_OPTS = [ - "-o", - "StrictHostKeyChecking=no", - "-o", - "UserKnownHostsFile=/dev/null", - "-o", - "LogLevel=ERROR", - "-o", - "ConnectTimeout=10", - "-o", - "ServerAliveInterval=15", - "-o", - "ServerAliveCountMax=3", -]; +const SSH_OPTS = SSH_BASE_OPTS; -export async function waitForCloudInit(ip?: string, maxAttempts = 60): Promise { +export async function waitForCloudInit(ip?: string, _maxAttempts = 60): Promise { const serverIp = ip || doServerIp; - logStep("Waiting for SSH connectivity..."); - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const proc = Bun.spawn( - [ - "ssh", - ...SSH_OPTS, - `root@${serverIp}`, - "echo ok", - ], - { - stdio: [ - "ignore", - "pipe", - "pipe", - ], - }, - ); - const stdout = await new Response(proc.stdout).text(); - const exitCode = await proc.exited; - if (exitCode === 0 && stdout.includes("ok")) { - logInfo("SSH is ready"); - break; - } - } catch { - // ignore - } - if (attempt >= maxAttempts) { - logError("SSH connectivity failed"); - throw new Error("SSH wait timeout"); - } - logStep(`SSH not ready yet (${attempt}/${maxAttempts})`); - await sleep(5000); - } + await sharedWaitForSsh({ + host: serverIp, + user: "root", + maxAttempts: 36, + }); // Stream cloud-init output so the user sees progress in real time logStep("Streaming cloud-init output (timeout: 5min)..."); diff --git a/cli/src/gcp/gcp.ts b/cli/src/gcp/gcp.ts index b3bcadd8..a2228428 100644 --- a/cli/src/gcp/gcp.ts +++ b/cli/src/gcp/gcp.ts @@ -16,6 +16,7 @@ import { } from "../shared/ui"; import type { CloudInitTier } from "../shared/agents"; import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init"; +import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh"; const DASHBOARD_URL = "https://console.cloud.google.com/compute/instances"; @@ -141,12 +142,6 @@ export function getState() { }; } -// ─── Helpers ──────────────────────────────────────────────────────────────── - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} - // ─── gcloud CLI Wrapper ───────────────────────────────────────────────────── function getGcloudCmd(): string | null { @@ -773,45 +768,15 @@ export async function createInstance( // ─── SSH Operations ───────────────────────────────────────────────────────── -const SSH_OPTS = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3"; +const SSH_OPTS = SSH_BASE_OPTS; -export async function waitForSsh(maxAttempts = 30): Promise { - logStep("Waiting for SSH connectivity..."); +export async function waitForSsh(maxAttempts = 36): Promise { const username = resolveUsername(); - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const proc = Bun.spawn( - [ - "ssh", - ...SSH_OPTS.split(" "), - "-o", - "ConnectTimeout=5", - `${username}@${gcpServerIp}`, - "echo ok", - ], - { - stdio: [ - "ignore", - "pipe", - "pipe", - ], - }, - ); - const stdout = await new Response(proc.stdout).text(); - const exitCode = await proc.exited; - if (exitCode === 0 && stdout.includes("ok")) { - logInfo("SSH is ready"); - return; - } - } catch { - // ignore - } - logStep(`SSH not ready yet (${attempt}/${maxAttempts})`); - await sleep(5000); - } - logError(`SSH connectivity failed after ${maxAttempts} attempts`); - throw new Error("SSH wait timeout"); + await sharedWaitForSsh({ + host: gcpServerIp, + user: username, + maxAttempts, + }); } export async function waitForCloudInit(maxAttempts = 60): Promise { @@ -825,9 +790,7 @@ export async function waitForCloudInit(maxAttempts = 60): Promise { const proc = Bun.spawn( [ "ssh", - ...SSH_OPTS.split(" "), - "-o", - "ConnectTimeout=5", + ...SSH_OPTS, `${username}@${gcpServerIp}`, "test -f /tmp/.cloud-init-complete", ], @@ -859,7 +822,7 @@ export async function runServer(cmd: string, timeoutSecs?: number): Promise { const proc = Bun.spawn( [ "ssh", - ...SSH_OPTS.split(" "), + ...SSH_OPTS, "-t", `${username}@${gcpServerIp}`, `bash -c ${shellQuote(fullCmd)}`, diff --git a/cli/src/hetzner/hetzner.ts b/cli/src/hetzner/hetzner.ts index 0b9d734a..b7dc53af 100644 --- a/cli/src/hetzner/hetzner.ts +++ b/cli/src/hetzner/hetzner.ts @@ -16,6 +16,7 @@ import { } from "../shared/ui"; import type { CloudInitTier } from "../shared/agents"; import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init"; +import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh"; import * as v from "valibot"; import { parseJsonWith } from "../shared/parse"; @@ -78,10 +79,6 @@ async function hetznerApi(method: string, endpoint: string, body?: string, maxRe // ─── Helpers ───────────────────────────────────────────────────────────────── -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} - const LooseObject = v.record(v.string(), v.unknown()); function parseJson(text: string): Record | null { @@ -478,58 +475,15 @@ export async function createServer( // ─── SSH Execution ─────────────────────────────────────────────────────────── -const SSH_OPTS = [ - "-o", - "StrictHostKeyChecking=no", - "-o", - "UserKnownHostsFile=/dev/null", - "-o", - "LogLevel=ERROR", - "-o", - "ConnectTimeout=10", - "-o", - "ServerAliveInterval=15", - "-o", - "ServerAliveCountMax=3", -]; +const SSH_OPTS = SSH_BASE_OPTS; -export async function waitForCloudInit(ip?: string, maxAttempts = 60): Promise { +export async function waitForCloudInit(ip?: string, _maxAttempts = 60): Promise { const serverIp = ip || hetznerServerIp; - logStep("Waiting for SSH connectivity..."); - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const proc = Bun.spawn( - [ - "ssh", - ...SSH_OPTS, - `root@${serverIp}`, - "echo ok", - ], - { - stdio: [ - "ignore", - "pipe", - "pipe", - ], - }, - ); - const stdout = await new Response(proc.stdout).text(); - const exitCode = await proc.exited; - if (exitCode === 0 && stdout.includes("ok")) { - logInfo("SSH is ready"); - break; - } - } catch { - // ignore - } - if (attempt >= maxAttempts) { - logError("SSH connectivity failed"); - throw new Error("SSH wait timeout"); - } - logStep(`SSH not ready yet (${attempt}/${maxAttempts})`); - await sleep(5000); - } + await sharedWaitForSsh({ + host: serverIp, + user: "root", + maxAttempts: 36, + }); logStep("Waiting for cloud-init to complete..."); for (let attempt = 1; attempt <= 60; attempt++) { diff --git a/cli/src/shared/ssh.ts b/cli/src/shared/ssh.ts new file mode 100644 index 00000000..49f1baf4 --- /dev/null +++ b/cli/src/shared/ssh.ts @@ -0,0 +1,149 @@ +// shared/ssh.ts — Shared SSH wait utility with TCP pre-check and stderr capture + +import { logInfo, logStep, logError } from "./ui"; +import { connect } from "node:net"; + +// ─── Shared SSH Options ────────────────────────────────────────────────────── + +/** Base SSH options shared across all clouds (array form for Bun.spawn). */ +export const SSH_BASE_OPTS: string[] = [ + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "LogLevel=ERROR", + "-o", "ConnectTimeout=10", + "-o", "ServerAliveInterval=15", + "-o", "ServerAliveCountMax=3", + "-o", "BatchMode=yes", +]; + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +/** Async sleep — shared across all cloud providers. */ +export function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +// ─── TCP Pre-Check ─────────────────────────────────────────────────────────── + +/** + * Probe whether a TCP port is open using node:net. + * Returns true if the connection succeeds within `timeoutMs`, false otherwise. + * This is much cheaper than a full SSH handshake attempt. + */ +export function tcpCheck(host: string, port: number, timeoutMs = 2000): Promise { + return new Promise((resolve) => { + const socket = connect({ host, port }); + const timer = setTimeout(() => { + socket.destroy(); + resolve(false); + }, timeoutMs); + socket.on("connect", () => { + clearTimeout(timer); + socket.destroy(); + resolve(true); + }); + socket.on("error", () => { + clearTimeout(timer); + socket.destroy(); + resolve(false); + }); + }); +} + +// ─── SSH Wait ──────────────────────────────────────────────────────────────── + +export interface WaitForSshOpts { + host: string; + user: string; + /** Maximum total attempts across both phases. Default: 36 (~3 min). */ + maxAttempts?: number; + /** Path to SSH identity file (e.g. ~/.ssh/id_ed25519). */ + sshKeyPath?: string; + /** Extra SSH options appended after SSH_BASE_OPTS. */ + extraSshOpts?: string[]; +} + +/** + * Two-phase SSH wait with resilience improvements: + * + * **Phase 1 (TCP probe):** Loop with cheap TCP probes until port 22 is open. + * Uses 2s intervals. Avoids the 10s ConnectTimeout overhead when sshd isn't + * even listening yet (VM still booting). + * + * **Phase 2 (SSH handshake):** Once port 22 is open, attempt full SSH `echo ok`. + * Uses 3s intervals. Captures stderr so the user sees the actual error reason. + * + * Total budget: ~`maxAttempts` attempts spread across both phases. + * Effective timeout: ~3 min with defaults. + */ +export async function waitForSsh(opts: WaitForSshOpts): Promise { + const { host, user, sshKeyPath, extraSshOpts } = opts; + const maxAttempts = opts.maxAttempts ?? 36; + + // Build SSH args + const sshArgs: string[] = [...SSH_BASE_OPTS]; + if (sshKeyPath) { + sshArgs.push("-i", sshKeyPath); + } + if (extraSshOpts) { + sshArgs.push(...extraSshOpts); + } + + // ── Phase 1: TCP probe ──────────────────────────────────────────────────── + logStep("Waiting for SSH port to open..."); + let attempt = 0; + while (attempt < maxAttempts) { + attempt += 1; + const open = await tcpCheck(host, 22, 2000); + if (open) { + logInfo("SSH port 22 is open"); + break; + } + logStep(`SSH port closed (${attempt}/${maxAttempts})`); + await sleep(2000); + } + + if (attempt >= maxAttempts) { + logError(`SSH port 22 never opened after ${maxAttempts} attempts`); + throw new Error("SSH connectivity timeout — port 22 never opened"); + } + + // ── Phase 2: SSH handshake ──────────────────────────────────────────────── + logStep("Waiting for SSH handshake..."); + const remaining = maxAttempts - attempt; + // At least 5 handshake attempts even if TCP phase used most of the budget + const handshakeAttempts = Math.max(remaining, 5); + + for (let i = 1; i <= handshakeAttempts; i++) { + try { + const proc = Bun.spawn( + ["ssh", ...sshArgs, `${user}@${host}`, "echo ok"], + { stdio: ["ignore", "pipe", "pipe"] }, + ); + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + const exitCode = await proc.exited; + + if (exitCode === 0 && stdout.includes("ok")) { + logInfo("SSH is ready"); + return; + } + + // Show the actual SSH error reason dimly so users can debug + const reason = stderr.trim(); + if (reason) { + logStep(`SSH handshake failed (${i}/${handshakeAttempts}): ${reason}`); + } else { + logStep(`SSH handshake failed (${i}/${handshakeAttempts})`); + } + } catch { + logStep(`SSH handshake error (${i}/${handshakeAttempts})`); + } + await sleep(3000); + } + + logError(`SSH handshake failed after ${handshakeAttempts} attempts`); + throw new Error("SSH connectivity timeout — handshake never succeeded"); +} diff --git a/cli/src/sprite/sprite.ts b/cli/src/sprite/sprite.ts index d3a2fbaa..e6831a73 100644 --- a/cli/src/sprite/sprite.ts +++ b/cli/src/sprite/sprite.ts @@ -11,6 +11,7 @@ import { toKebabCase, defaultSpawnName, } from "../shared/ui"; +import { sleep } from "../shared/ssh"; // ─── Configurable Constants ────────────────────────────────────────────────── @@ -30,10 +31,6 @@ export function getState() { // ─── Helpers ───────────────────────────────────────────────────────────────── -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} - /** Run a command locally and return { exitCode, stdout, stderr }. */ function spawnSync(args: string[]): { exitCode: number;