feat: shared SSH wait utility with TCP pre-check and stderr capture (#1779)

Replace 5 duplicated SSH wait implementations (AWS, DO, Hetzner, GCP,
Sprite) with a shared two-phase utility in cli/src/shared/ssh.ts:

- Phase 1: cheap TCP probe (2s intervals) until port 22 opens
- Phase 2: full SSH handshake (3s intervals) with stderr capture
- Adds BatchMode=yes to prevent interactive prompt hangs
- Removes ~220 lines of duplicated sleep/SSH_OPTS/waitForSsh code

Daytona (token auth) and Fly (WireGuard) left unchanged — too different.

Co-authored-by: Claude <claude@anthropic.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
A 2026-02-22 19:17:09 -08:00 committed by GitHub
parent b62dc1af33
commit 0843c5e708
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 189 additions and 219 deletions

View file

@ -1,6 +1,6 @@
{
"name": "@openrouter/spawn",
"version": "0.7.0",
"version": "0.7.1",
"type": "module",
"bin": {
"spawn": "cli.js"

View file

@ -17,6 +17,7 @@ import {
} from "../shared/ui";
import type { CloudInitTier } from "../shared/agents";
import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init";
import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh";
import * as v from "valibot";
import { parseJsonWith } from "../shared/parse";
@ -116,28 +117,7 @@ export function getState() {
const SSH_USER = "ubuntu";
const SSH_KEY_PATH = `${process.env.HOME}/.ssh/id_ed25519`;
const SSH_OPTS = [
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-o",
"LogLevel=ERROR",
"-o",
"ConnectTimeout=10",
"-o",
"ServerAliveInterval=15",
"-o",
"ServerAliveCountMax=3",
"-i",
SSH_KEY_PATH,
];
// ─── Helpers ────────────────────────────────────────────────────────────────
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
const SSH_OPTS = [...SSH_BASE_OPTS, "-i", SSH_KEY_PATH];
// ─── Valibot Schemas for AWS API Responses ──────────────────────────────────
@ -915,40 +895,13 @@ export function saveLaunchCmd(launchCmd: string): void {
// ─── SSH Execution ──────────────────────────────────────────────────────────
export async function waitForSsh(maxAttempts = 30): Promise<void> {
logStep("Waiting for SSH connectivity...");
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS,
`${SSH_USER}@${instanceIp}`,
"echo ok",
],
{
stdio: [
"ignore",
"pipe",
"ignore",
],
},
);
const stdout = await new Response(proc.stdout).text();
const exitCode = await proc.exited;
if (exitCode === 0 && stdout.includes("ok")) {
logInfo("SSH is ready");
return;
}
} catch {
// ignore
}
logStep(`SSH not ready yet (${attempt}/${maxAttempts})`);
await sleep(5000);
}
throw new Error("SSH connectivity timeout");
export async function waitForSsh(maxAttempts = 36): Promise<void> {
await sharedWaitForSsh({
host: instanceIp,
user: SSH_USER,
maxAttempts,
sshKeyPath: SSH_KEY_PATH,
});
}
export async function waitForCloudInit(maxAttempts = 60): Promise<void> {

View file

@ -18,6 +18,7 @@ import {
import type { CloudInitTier } from "../shared/agents";
import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init";
import { parseJsonWith } from "../shared/parse";
import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh";
const DO_API_BASE = "https://api.digitalocean.com/v2";
const DO_DASHBOARD_URL = "https://cloud.digitalocean.com/droplets";
@ -116,10 +117,6 @@ async function doApi(
// ─── Helpers ─────────────────────────────────────────────────────────────────
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
const LooseObject = v.record(v.string(), v.unknown());
function parseJson(text: string): Record<string, unknown> | null {
@ -842,58 +839,15 @@ async function waitForDropletActive(dropletId: string, maxAttempts = 60): Promis
// ─── SSH Execution ───────────────────────────────────────────────────────────
const SSH_OPTS = [
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-o",
"LogLevel=ERROR",
"-o",
"ConnectTimeout=10",
"-o",
"ServerAliveInterval=15",
"-o",
"ServerAliveCountMax=3",
];
const SSH_OPTS = SSH_BASE_OPTS;
export async function waitForCloudInit(ip?: string, maxAttempts = 60): Promise<void> {
export async function waitForCloudInit(ip?: string, _maxAttempts = 60): Promise<void> {
const serverIp = ip || doServerIp;
logStep("Waiting for SSH connectivity...");
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS,
`root@${serverIp}`,
"echo ok",
],
{
stdio: [
"ignore",
"pipe",
"pipe",
],
},
);
const stdout = await new Response(proc.stdout).text();
const exitCode = await proc.exited;
if (exitCode === 0 && stdout.includes("ok")) {
logInfo("SSH is ready");
break;
}
} catch {
// ignore
}
if (attempt >= maxAttempts) {
logError("SSH connectivity failed");
throw new Error("SSH wait timeout");
}
logStep(`SSH not ready yet (${attempt}/${maxAttempts})`);
await sleep(5000);
}
await sharedWaitForSsh({
host: serverIp,
user: "root",
maxAttempts: 36,
});
// Stream cloud-init output so the user sees progress in real time
logStep("Streaming cloud-init output (timeout: 5min)...");

View file

@ -16,6 +16,7 @@ import {
} from "../shared/ui";
import type { CloudInitTier } from "../shared/agents";
import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init";
import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh";
const DASHBOARD_URL = "https://console.cloud.google.com/compute/instances";
@ -141,12 +142,6 @@ export function getState() {
};
}
// ─── Helpers ────────────────────────────────────────────────────────────────
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
// ─── gcloud CLI Wrapper ─────────────────────────────────────────────────────
function getGcloudCmd(): string | null {
@ -773,45 +768,15 @@ export async function createInstance(
// ─── SSH Operations ─────────────────────────────────────────────────────────
const SSH_OPTS = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o ServerAliveInterval=15 -o ServerAliveCountMax=3";
const SSH_OPTS = SSH_BASE_OPTS;
export async function waitForSsh(maxAttempts = 30): Promise<void> {
logStep("Waiting for SSH connectivity...");
export async function waitForSsh(maxAttempts = 36): Promise<void> {
const username = resolveUsername();
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS.split(" "),
"-o",
"ConnectTimeout=5",
`${username}@${gcpServerIp}`,
"echo ok",
],
{
stdio: [
"ignore",
"pipe",
"pipe",
],
},
);
const stdout = await new Response(proc.stdout).text();
const exitCode = await proc.exited;
if (exitCode === 0 && stdout.includes("ok")) {
logInfo("SSH is ready");
return;
}
} catch {
// ignore
}
logStep(`SSH not ready yet (${attempt}/${maxAttempts})`);
await sleep(5000);
}
logError(`SSH connectivity failed after ${maxAttempts} attempts`);
throw new Error("SSH wait timeout");
await sharedWaitForSsh({
host: gcpServerIp,
user: username,
maxAttempts,
});
}
export async function waitForCloudInit(maxAttempts = 60): Promise<void> {
@ -825,9 +790,7 @@ export async function waitForCloudInit(maxAttempts = 60): Promise<void> {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS.split(" "),
"-o",
"ConnectTimeout=5",
...SSH_OPTS,
`${username}@${gcpServerIp}`,
"test -f /tmp/.cloud-init-complete",
],
@ -859,7 +822,7 @@ export async function runServer(cmd: string, timeoutSecs?: number): Promise<void
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS.split(" "),
...SSH_OPTS,
`${username}@${gcpServerIp}`,
`bash -c ${shellQuote(fullCmd)}`,
],
@ -892,7 +855,7 @@ export async function runServerCapture(cmd: string, timeoutSecs?: number): Promi
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS.split(" "),
...SSH_OPTS,
`${username}@${gcpServerIp}`,
`bash -c ${shellQuote(fullCmd)}`,
],
@ -932,7 +895,7 @@ export async function uploadFile(localPath: string, remotePath: string): Promise
const proc = Bun.spawn(
[
"scp",
...SSH_OPTS.split(" "),
...SSH_OPTS,
localPath,
`${username}@${gcpServerIp}:${expandedPath}`,
],
@ -959,7 +922,7 @@ export async function interactiveSession(cmd: string): Promise<number> {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS.split(" "),
...SSH_OPTS,
"-t",
`${username}@${gcpServerIp}`,
`bash -c ${shellQuote(fullCmd)}`,

View file

@ -16,6 +16,7 @@ import {
} from "../shared/ui";
import type { CloudInitTier } from "../shared/agents";
import { getPackagesForTier, needsNode, needsBun, NODE_INSTALL_CMD } from "../shared/cloud-init";
import { SSH_BASE_OPTS, sleep, waitForSsh as sharedWaitForSsh } from "../shared/ssh";
import * as v from "valibot";
import { parseJsonWith } from "../shared/parse";
@ -78,10 +79,6 @@ async function hetznerApi(method: string, endpoint: string, body?: string, maxRe
// ─── Helpers ─────────────────────────────────────────────────────────────────
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
const LooseObject = v.record(v.string(), v.unknown());
function parseJson(text: string): Record<string, unknown> | null {
@ -478,58 +475,15 @@ export async function createServer(
// ─── SSH Execution ───────────────────────────────────────────────────────────
const SSH_OPTS = [
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-o",
"LogLevel=ERROR",
"-o",
"ConnectTimeout=10",
"-o",
"ServerAliveInterval=15",
"-o",
"ServerAliveCountMax=3",
];
const SSH_OPTS = SSH_BASE_OPTS;
export async function waitForCloudInit(ip?: string, maxAttempts = 60): Promise<void> {
export async function waitForCloudInit(ip?: string, _maxAttempts = 60): Promise<void> {
const serverIp = ip || hetznerServerIp;
logStep("Waiting for SSH connectivity...");
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
const proc = Bun.spawn(
[
"ssh",
...SSH_OPTS,
`root@${serverIp}`,
"echo ok",
],
{
stdio: [
"ignore",
"pipe",
"pipe",
],
},
);
const stdout = await new Response(proc.stdout).text();
const exitCode = await proc.exited;
if (exitCode === 0 && stdout.includes("ok")) {
logInfo("SSH is ready");
break;
}
} catch {
// ignore
}
if (attempt >= maxAttempts) {
logError("SSH connectivity failed");
throw new Error("SSH wait timeout");
}
logStep(`SSH not ready yet (${attempt}/${maxAttempts})`);
await sleep(5000);
}
await sharedWaitForSsh({
host: serverIp,
user: "root",
maxAttempts: 36,
});
logStep("Waiting for cloud-init to complete...");
for (let attempt = 1; attempt <= 60; attempt++) {

149
cli/src/shared/ssh.ts Normal file
View file

@ -0,0 +1,149 @@
// shared/ssh.ts — Shared SSH wait utility with TCP pre-check and stderr capture
import { logInfo, logStep, logError } from "./ui";
import { connect } from "node:net";
// ─── Shared SSH Options ──────────────────────────────────────────────────────
/** Base SSH options shared across all clouds (array form for Bun.spawn). */
export const SSH_BASE_OPTS: string[] = [
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
"-o", "ConnectTimeout=10",
"-o", "ServerAliveInterval=15",
"-o", "ServerAliveCountMax=3",
"-o", "BatchMode=yes",
];
// ─── Helpers ─────────────────────────────────────────────────────────────────
/** Async sleep — shared across all cloud providers. */
export function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
// ─── TCP Pre-Check ───────────────────────────────────────────────────────────
/**
* Probe whether a TCP port is open using node:net.
* Returns true if the connection succeeds within `timeoutMs`, false otherwise.
* This is much cheaper than a full SSH handshake attempt.
*/
export function tcpCheck(host: string, port: number, timeoutMs = 2000): Promise<boolean> {
return new Promise((resolve) => {
const socket = connect({ host, port });
const timer = setTimeout(() => {
socket.destroy();
resolve(false);
}, timeoutMs);
socket.on("connect", () => {
clearTimeout(timer);
socket.destroy();
resolve(true);
});
socket.on("error", () => {
clearTimeout(timer);
socket.destroy();
resolve(false);
});
});
}
// ─── SSH Wait ────────────────────────────────────────────────────────────────
export interface WaitForSshOpts {
host: string;
user: string;
/** Maximum total attempts across both phases. Default: 36 (~3 min). */
maxAttempts?: number;
/** Path to SSH identity file (e.g. ~/.ssh/id_ed25519). */
sshKeyPath?: string;
/** Extra SSH options appended after SSH_BASE_OPTS. */
extraSshOpts?: string[];
}
/**
* Two-phase SSH wait with resilience improvements:
*
* **Phase 1 (TCP probe):** Loop with cheap TCP probes until port 22 is open.
* Uses 2s intervals. Avoids the 10s ConnectTimeout overhead when sshd isn't
* even listening yet (VM still booting).
*
* **Phase 2 (SSH handshake):** Once port 22 is open, attempt full SSH `echo ok`.
* Uses 3s intervals. Captures stderr so the user sees the actual error reason.
*
* Total budget: ~`maxAttempts` attempts spread across both phases.
* Effective timeout: ~3 min with defaults.
*/
export async function waitForSsh(opts: WaitForSshOpts): Promise<void> {
const { host, user, sshKeyPath, extraSshOpts } = opts;
const maxAttempts = opts.maxAttempts ?? 36;
// Build SSH args
const sshArgs: string[] = [...SSH_BASE_OPTS];
if (sshKeyPath) {
sshArgs.push("-i", sshKeyPath);
}
if (extraSshOpts) {
sshArgs.push(...extraSshOpts);
}
// ── Phase 1: TCP probe ────────────────────────────────────────────────────
logStep("Waiting for SSH port to open...");
let attempt = 0;
while (attempt < maxAttempts) {
attempt += 1;
const open = await tcpCheck(host, 22, 2000);
if (open) {
logInfo("SSH port 22 is open");
break;
}
logStep(`SSH port closed (${attempt}/${maxAttempts})`);
await sleep(2000);
}
if (attempt >= maxAttempts) {
logError(`SSH port 22 never opened after ${maxAttempts} attempts`);
throw new Error("SSH connectivity timeout — port 22 never opened");
}
// ── Phase 2: SSH handshake ────────────────────────────────────────────────
logStep("Waiting for SSH handshake...");
const remaining = maxAttempts - attempt;
// At least 5 handshake attempts even if TCP phase used most of the budget
const handshakeAttempts = Math.max(remaining, 5);
for (let i = 1; i <= handshakeAttempts; i++) {
try {
const proc = Bun.spawn(
["ssh", ...sshArgs, `${user}@${host}`, "echo ok"],
{ stdio: ["ignore", "pipe", "pipe"] },
);
const [stdout, stderr] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
]);
const exitCode = await proc.exited;
if (exitCode === 0 && stdout.includes("ok")) {
logInfo("SSH is ready");
return;
}
// Show the actual SSH error reason dimly so users can debug
const reason = stderr.trim();
if (reason) {
logStep(`SSH handshake failed (${i}/${handshakeAttempts}): ${reason}`);
} else {
logStep(`SSH handshake failed (${i}/${handshakeAttempts})`);
}
} catch {
logStep(`SSH handshake error (${i}/${handshakeAttempts})`);
}
await sleep(3000);
}
logError(`SSH handshake failed after ${handshakeAttempts} attempts`);
throw new Error("SSH connectivity timeout — handshake never succeeded");
}

View file

@ -11,6 +11,7 @@ import {
toKebabCase,
defaultSpawnName,
} from "../shared/ui";
import { sleep } from "../shared/ssh";
// ─── Configurable Constants ──────────────────────────────────────────────────
@ -30,10 +31,6 @@ export function getState() {
// ─── Helpers ─────────────────────────────────────────────────────────────────
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
/** Run a command locally and return { exitCode, stdout, stderr }. */
function spawnSync(args: string[]): {
exitCode: number;