feat(status): add agent alive probe via SSH (#3109)

`spawn status` now probes running servers by SSHing in and running
`{agent} --version` to verify the agent binary is installed and
executable. Results show in a new "Probe" column (live/down/—) and
as `agent_alive` in JSON output. Only "running" servers are probed;
gone/stopped/unknown servers are skipped.

The probe function is injectable via opts for testability.

Co-authored-by: spawn-bot <spawn-bot@openrouter.ai>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
A 2026-03-29 22:44:46 -07:00 committed by GitHub
parent ddce16a438
commit 749f79a9c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 329 additions and 8 deletions

View file

@ -1,6 +1,6 @@
{
"name": "@openrouter/spawn",
"version": "0.28.2",
"version": "0.29.0",
"type": "module",
"bin": {
"spawn": "cli.js"

View file

@ -151,6 +151,7 @@ describe("cmdStatus", () => {
await cmdStatus({
json: true,
probe: async () => true,
});
expect(fetchedUrls.some((u) => u.includes("hetzner.cloud/v1/servers/12345"))).toBe(true);
});
@ -193,6 +194,7 @@ describe("cmdStatus", () => {
await cmdStatus({
json: true,
probe: async () => true,
});
expect(fetchedUrls.some((u) => u.includes("digitalocean.com/v2/droplets/99999"))).toBe(true);
});
@ -415,10 +417,189 @@ describe("cmdStatus", () => {
return new Response(JSON.stringify(mockManifest));
});
await cmdStatus();
await cmdStatus({
probe: async () => true,
});
const infoCalls = clack.logInfo.mock.calls.map((c: unknown[]) => String(c[0]));
// Should mention running servers and spawn list
expect(infoCalls.some((msg: string) => msg.includes("running"))).toBe(true);
});
// ── Agent probe tests ───────────────────────────────────────────────────
it("probes running server and reports agent_alive true in JSON", async () => {
writeHistory(testDir, [
{
id: "probe-live",
agent: "claude",
cloud: "hetzner",
timestamp: new Date().toISOString(),
connection: {
ip: "1.2.3.4",
user: "root",
cloud: "hetzner",
server_id: "12345",
},
},
]);
writeCloudConfig("hetzner", {
api_key: "test-token",
});
_resetCacheForTesting();
global.fetch = mock(async (url: string | URL | Request) => {
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
if (u.includes("hetzner.cloud")) {
return new Response(
JSON.stringify({
server: {
status: "running",
},
}),
);
}
return new Response(JSON.stringify(mockManifest));
});
await cmdStatus({
json: true,
probe: async () => true,
});
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
const parsed = JSON.parse(output);
expect(parsed[0].agent_alive).toBe(true);
});
it("probes running server and reports agent_alive false in JSON", async () => {
writeHistory(testDir, [
{
id: "probe-down",
agent: "claude",
cloud: "hetzner",
timestamp: new Date().toISOString(),
connection: {
ip: "1.2.3.4",
user: "root",
cloud: "hetzner",
server_id: "12345",
},
},
]);
writeCloudConfig("hetzner", {
api_key: "test-token",
});
_resetCacheForTesting();
global.fetch = mock(async (url: string | URL | Request) => {
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
if (u.includes("hetzner.cloud")) {
return new Response(
JSON.stringify({
server: {
status: "running",
},
}),
);
}
return new Response(JSON.stringify(mockManifest));
});
await cmdStatus({
json: true,
probe: async () => false,
});
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
const parsed = JSON.parse(output);
expect(parsed[0].agent_alive).toBe(false);
});
it("does not probe gone servers — agent_alive is null", async () => {
writeHistory(testDir, [
{
id: "probe-gone",
agent: "claude",
cloud: "hetzner",
timestamp: new Date().toISOString(),
connection: {
ip: "1.2.3.4",
user: "root",
cloud: "hetzner",
server_id: "12345",
},
},
]);
writeCloudConfig("hetzner", {
api_key: "test-token",
});
let probeCalled = false;
_resetCacheForTesting();
global.fetch = mock(async (url: string | URL | Request) => {
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
if (u.includes("hetzner.cloud")) {
return new Response("Not Found", {
status: 404,
});
}
return new Response(JSON.stringify(mockManifest));
});
await cmdStatus({
json: true,
probe: async () => {
probeCalled = true;
return true;
},
});
expect(probeCalled).toBe(false);
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
const parsed = JSON.parse(output);
expect(parsed[0].agent_alive).toBeNull();
});
it("shows unreachable warning when probe fails in table mode", async () => {
writeHistory(testDir, [
{
id: "probe-warn",
agent: "claude",
cloud: "hetzner",
timestamp: new Date().toISOString(),
connection: {
ip: "1.2.3.4",
user: "root",
cloud: "hetzner",
server_id: "12345",
},
},
]);
writeCloudConfig("hetzner", {
api_key: "test-token",
});
_resetCacheForTesting();
global.fetch = mock(async (url: string | URL | Request) => {
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
if (u.includes("hetzner.cloud")) {
return new Response(
JSON.stringify({
server: {
status: "running",
},
}),
);
}
return new Response(JSON.stringify(mockManifest));
});
await cmdStatus({
probe: async () => false,
});
const infoCalls = clack.logInfo.mock.calls.map((c: unknown[]) => String(c[0]));
expect(infoCalls.some((msg: string) => msg.includes("unreachable"))).toBe(true);
});
});

View file

@ -8,7 +8,8 @@ import { filterHistory, markRecordDeleted } from "../history.js";
import { loadManifest } from "../manifest.js";
import { validateServerIdentifier } from "../security.js";
import { parseJsonObj } from "../shared/parse.js";
import { asyncTryCatchIf, isNetworkError, tryCatch, unwrapOr } from "../shared/result.js";
import { asyncTryCatch, asyncTryCatchIf, isNetworkError, tryCatch, unwrapOr } from "../shared/result.js";
import { SSH_BASE_OPTS } from "../shared/ssh.js";
import { loadApiToken } from "../shared/ui.js";
import { formatRelativeTime } from "./list.js";
import { resolveDisplayName } from "./shared.js";
@ -20,6 +21,7 @@ type LiveState = "running" | "stopped" | "gone" | "unknown";
interface ServerStatusResult {
record: SpawnRecord;
liveState: LiveState;
agentAlive: boolean | null;
}
interface JsonStatusEntry {
@ -29,6 +31,7 @@ interface JsonStatusEntry {
ip: string;
name: string;
state: LiveState;
agent_alive: boolean | null;
spawned_at: string;
server_id: string;
}
@ -148,6 +151,107 @@ async function checkServerStatus(record: SpawnRecord): Promise<LiveState> {
}
}
// ── Agent alive probe ───────────────────────────────────────────────────────
/**
* Resolve the agent binary name from the manifest or the stored launch command.
* Returns the first word of the launch string (e.g. "openclaw tui" "openclaw").
*/
function resolveAgentBinary(record: SpawnRecord, manifest: Manifest | null): string | null {
const fromManifest = manifest?.agents[record.agent]?.launch;
if (fromManifest) {
return fromManifest.split(/\s+/)[0] || null;
}
// Fallback: extract the last command from launch_cmd (after all source/export prefixes)
const launchCmd = record.connection?.launch_cmd;
if (launchCmd) {
const parts = launchCmd.split(";").map((s) => s.trim());
const last = parts[parts.length - 1] || "";
return last.split(/\s+/)[0] || null;
}
return null;
}
/**
* Probe a running server by SSHing in and running `{binary} --version`.
* Returns true if the agent binary is installed and executable, false otherwise.
*/
async function probeAgentAlive(record: SpawnRecord, manifest: Manifest | null): Promise<boolean> {
const conn = record.connection;
if (!conn) {
return false;
}
if (conn.cloud === "local") {
return true;
}
const binary = resolveAgentBinary(record, manifest);
if (!binary) {
return false;
}
const versionCmd = `source ~/.spawnrc 2>/dev/null; export PATH="$HOME/.local/bin:$HOME/.claude/local/bin:$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.n/bin:$PATH"; ${binary} --version`;
const result = await asyncTryCatch(async () => {
let proc: {
exited: Promise<number>;
};
if (conn.cloud === "sprite") {
const name = conn.server_name || "";
if (!name) {
return false;
}
proc = Bun.spawn(
[
"sprite",
"exec",
"-s",
name,
"--",
"bash",
"-c",
versionCmd,
],
{
stdout: "ignore",
stderr: "ignore",
},
);
} else {
const user = conn.user || "root";
const ip = conn.ip || "";
if (!ip || ip === "sprite-console") {
return false;
}
proc = Bun.spawn(
[
"ssh",
...SSH_BASE_OPTS,
"-o",
"ConnectTimeout=5",
`${user}@${ip}`,
versionCmd,
],
{
stdout: "ignore",
stderr: "ignore",
},
);
}
const exitCode = await Promise.race([
proc.exited,
new Promise<number>((_, reject) => {
setTimeout(() => reject(new Error("probe timeout")), 10_000);
}),
]);
return exitCode === 0;
});
return result.ok ? result.data : false;
}
// ── Formatting ───────────────────────────────────────────────────────────────
function fmtState(state: LiveState): string {
@ -163,6 +267,13 @@ function fmtState(state: LiveState): string {
}
}
function fmtProbe(alive: boolean | null): string {
if (alive === null) {
return pc.dim("—");
}
return alive ? pc.green("live") : pc.red("down");
}
function fmtIp(conn: SpawnRecord["connection"]): string {
if (!conn) {
return "—";
@ -190,6 +301,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
const COL_CLOUD = 14;
const COL_IP = 16;
const COL_STATE = 12;
const COL_PROBE = 10;
const COL_SINCE = 12;
const header = [
@ -198,6 +310,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
col(pc.dim("Cloud"), COL_CLOUD),
col(pc.dim("IP"), COL_IP),
col(pc.dim("State"), COL_STATE),
col(pc.dim("Probe"), COL_PROBE),
pc.dim("Since"),
].join(" ");
@ -208,6 +321,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
"-".repeat(COL_CLOUD),
"-".repeat(COL_IP),
"-".repeat(COL_STATE),
"-".repeat(COL_PROBE),
"-".repeat(COL_SINCE),
].join("-"),
);
@ -216,13 +330,14 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
console.log(header);
console.log(divider);
for (const { record, liveState } of results) {
for (const { record, liveState, agentAlive } of results) {
const conn = record.connection;
const shortId = record.id ? record.id.slice(0, 6) : "??????";
const agentDisplay = resolveDisplayName(manifest, record.agent, "agent");
const cloudDisplay = resolveDisplayName(manifest, record.cloud, "cloud");
const ip = fmtIp(conn);
const state = fmtState(liveState);
const probe = fmtProbe(agentAlive);
const since = formatRelativeTime(record.timestamp);
const row = [
@ -231,6 +346,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
col(cloudDisplay, COL_CLOUD),
col(ip, COL_IP),
col(state, COL_STATE),
col(probe, COL_PROBE),
pc.dim(since),
].join(" ");
@ -243,13 +359,14 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
// ── JSON output ──────────────────────────────────────────────────────────────
function renderStatusJson(results: ServerStatusResult[]): void {
const entries: JsonStatusEntry[] = results.map(({ record, liveState }) => ({
const entries: JsonStatusEntry[] = results.map(({ record, liveState, agentAlive }) => ({
id: record.id || "",
agent: record.agent,
cloud: record.cloud,
ip: fmtIp(record.connection),
name: record.name || record.connection?.server_name || "",
state: liveState,
agent_alive: agentAlive,
spawned_at: record.timestamp,
server_id: record.connection?.server_id || record.connection?.server_name || "",
}));
@ -258,9 +375,16 @@ function renderStatusJson(results: ServerStatusResult[]): void {
// ── Main command ─────────────────────────────────────────────────────────────
export async function cmdStatus(
opts: { prune?: boolean; json?: boolean; agentFilter?: string; cloudFilter?: string } = {},
): Promise<void> {
export interface StatusOpts {
prune?: boolean;
json?: boolean;
agentFilter?: string;
cloudFilter?: string;
/** Override the agent probe for testing. Called only for "running" servers. */
probe?: (record: SpawnRecord, manifest: Manifest | null) => Promise<boolean>;
}
export async function cmdStatus(opts: StatusOpts = {}): Promise<void> {
const records = filterHistory(opts.agentFilter, opts.cloudFilter);
const candidates = records.filter(
@ -284,12 +408,19 @@ export async function cmdStatus(
p.log.step(`Checking status of ${candidates.length} server${candidates.length !== 1 ? "s" : ""}...`);
}
const probeFn = opts.probe ?? probeAgentAlive;
const results: ServerStatusResult[] = await Promise.all(
candidates.map(async (record) => {
const liveState = await checkServerStatus(record);
let agentAlive: boolean | null = null;
if (liveState === "running") {
agentAlive = await probeFn(record, manifest);
}
return {
record,
liveState,
agentAlive,
};
}),
);
@ -332,6 +463,15 @@ export async function cmdStatus(
);
}
const unreachable = results.filter((r) => r.agentAlive === false);
if (unreachable.length > 0) {
p.log.info(
pc.dim(
`${unreachable.length} server${unreachable.length !== 1 ? "s" : ""} running but agent unreachable. The agent may have crashed or still be starting.`,
),
);
}
const running = results.filter((r) => r.liveState === "running").length;
if (running > 0) {
p.log.info(