mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-05-16 20:01:08 +00:00
feat(status): add agent alive probe via SSH (#3109)
`spawn status` now probes running servers by SSHing in and running
`{agent} --version` to verify the agent binary is installed and
executable. Results show in a new "Probe" column (live/down/—) and
as `agent_alive` in JSON output. Only "running" servers are probed;
gone/stopped/unknown servers are skipped.
The probe function is injectable via opts for testability.
Co-authored-by: spawn-bot <spawn-bot@openrouter.ai>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ddce16a438
commit
749f79a9c2
3 changed files with 329 additions and 8 deletions
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@openrouter/spawn",
|
||||
"version": "0.28.2",
|
||||
"version": "0.29.0",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"spawn": "cli.js"
|
||||
|
|
|
|||
|
|
@ -151,6 +151,7 @@ describe("cmdStatus", () => {
|
|||
|
||||
await cmdStatus({
|
||||
json: true,
|
||||
probe: async () => true,
|
||||
});
|
||||
expect(fetchedUrls.some((u) => u.includes("hetzner.cloud/v1/servers/12345"))).toBe(true);
|
||||
});
|
||||
|
|
@ -193,6 +194,7 @@ describe("cmdStatus", () => {
|
|||
|
||||
await cmdStatus({
|
||||
json: true,
|
||||
probe: async () => true,
|
||||
});
|
||||
expect(fetchedUrls.some((u) => u.includes("digitalocean.com/v2/droplets/99999"))).toBe(true);
|
||||
});
|
||||
|
|
@ -415,10 +417,189 @@ describe("cmdStatus", () => {
|
|||
return new Response(JSON.stringify(mockManifest));
|
||||
});
|
||||
|
||||
await cmdStatus();
|
||||
await cmdStatus({
|
||||
probe: async () => true,
|
||||
});
|
||||
|
||||
const infoCalls = clack.logInfo.mock.calls.map((c: unknown[]) => String(c[0]));
|
||||
// Should mention running servers and spawn list
|
||||
expect(infoCalls.some((msg: string) => msg.includes("running"))).toBe(true);
|
||||
});
|
||||
|
||||
// ── Agent probe tests ───────────────────────────────────────────────────
|
||||
|
||||
it("probes running server and reports agent_alive true in JSON", async () => {
|
||||
writeHistory(testDir, [
|
||||
{
|
||||
id: "probe-live",
|
||||
agent: "claude",
|
||||
cloud: "hetzner",
|
||||
timestamp: new Date().toISOString(),
|
||||
connection: {
|
||||
ip: "1.2.3.4",
|
||||
user: "root",
|
||||
cloud: "hetzner",
|
||||
server_id: "12345",
|
||||
},
|
||||
},
|
||||
]);
|
||||
writeCloudConfig("hetzner", {
|
||||
api_key: "test-token",
|
||||
});
|
||||
|
||||
_resetCacheForTesting();
|
||||
global.fetch = mock(async (url: string | URL | Request) => {
|
||||
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
|
||||
if (u.includes("hetzner.cloud")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
server: {
|
||||
status: "running",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return new Response(JSON.stringify(mockManifest));
|
||||
});
|
||||
|
||||
await cmdStatus({
|
||||
json: true,
|
||||
probe: async () => true,
|
||||
});
|
||||
|
||||
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
|
||||
const parsed = JSON.parse(output);
|
||||
expect(parsed[0].agent_alive).toBe(true);
|
||||
});
|
||||
|
||||
it("probes running server and reports agent_alive false in JSON", async () => {
|
||||
writeHistory(testDir, [
|
||||
{
|
||||
id: "probe-down",
|
||||
agent: "claude",
|
||||
cloud: "hetzner",
|
||||
timestamp: new Date().toISOString(),
|
||||
connection: {
|
||||
ip: "1.2.3.4",
|
||||
user: "root",
|
||||
cloud: "hetzner",
|
||||
server_id: "12345",
|
||||
},
|
||||
},
|
||||
]);
|
||||
writeCloudConfig("hetzner", {
|
||||
api_key: "test-token",
|
||||
});
|
||||
|
||||
_resetCacheForTesting();
|
||||
global.fetch = mock(async (url: string | URL | Request) => {
|
||||
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
|
||||
if (u.includes("hetzner.cloud")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
server: {
|
||||
status: "running",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return new Response(JSON.stringify(mockManifest));
|
||||
});
|
||||
|
||||
await cmdStatus({
|
||||
json: true,
|
||||
probe: async () => false,
|
||||
});
|
||||
|
||||
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
|
||||
const parsed = JSON.parse(output);
|
||||
expect(parsed[0].agent_alive).toBe(false);
|
||||
});
|
||||
|
||||
it("does not probe gone servers — agent_alive is null", async () => {
|
||||
writeHistory(testDir, [
|
||||
{
|
||||
id: "probe-gone",
|
||||
agent: "claude",
|
||||
cloud: "hetzner",
|
||||
timestamp: new Date().toISOString(),
|
||||
connection: {
|
||||
ip: "1.2.3.4",
|
||||
user: "root",
|
||||
cloud: "hetzner",
|
||||
server_id: "12345",
|
||||
},
|
||||
},
|
||||
]);
|
||||
writeCloudConfig("hetzner", {
|
||||
api_key: "test-token",
|
||||
});
|
||||
|
||||
let probeCalled = false;
|
||||
_resetCacheForTesting();
|
||||
global.fetch = mock(async (url: string | URL | Request) => {
|
||||
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
|
||||
if (u.includes("hetzner.cloud")) {
|
||||
return new Response("Not Found", {
|
||||
status: 404,
|
||||
});
|
||||
}
|
||||
return new Response(JSON.stringify(mockManifest));
|
||||
});
|
||||
|
||||
await cmdStatus({
|
||||
json: true,
|
||||
probe: async () => {
|
||||
probeCalled = true;
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
expect(probeCalled).toBe(false);
|
||||
const output = consoleSpy.mock.calls.map((c: unknown[]) => String(c[0])).join("");
|
||||
const parsed = JSON.parse(output);
|
||||
expect(parsed[0].agent_alive).toBeNull();
|
||||
});
|
||||
|
||||
it("shows unreachable warning when probe fails in table mode", async () => {
|
||||
writeHistory(testDir, [
|
||||
{
|
||||
id: "probe-warn",
|
||||
agent: "claude",
|
||||
cloud: "hetzner",
|
||||
timestamp: new Date().toISOString(),
|
||||
connection: {
|
||||
ip: "1.2.3.4",
|
||||
user: "root",
|
||||
cloud: "hetzner",
|
||||
server_id: "12345",
|
||||
},
|
||||
},
|
||||
]);
|
||||
writeCloudConfig("hetzner", {
|
||||
api_key: "test-token",
|
||||
});
|
||||
|
||||
_resetCacheForTesting();
|
||||
global.fetch = mock(async (url: string | URL | Request) => {
|
||||
const u = isString(url) ? url : url instanceof URL ? url.toString() : url.url;
|
||||
if (u.includes("hetzner.cloud")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
server: {
|
||||
status: "running",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return new Response(JSON.stringify(mockManifest));
|
||||
});
|
||||
|
||||
await cmdStatus({
|
||||
probe: async () => false,
|
||||
});
|
||||
|
||||
const infoCalls = clack.logInfo.mock.calls.map((c: unknown[]) => String(c[0]));
|
||||
expect(infoCalls.some((msg: string) => msg.includes("unreachable"))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ import { filterHistory, markRecordDeleted } from "../history.js";
|
|||
import { loadManifest } from "../manifest.js";
|
||||
import { validateServerIdentifier } from "../security.js";
|
||||
import { parseJsonObj } from "../shared/parse.js";
|
||||
import { asyncTryCatchIf, isNetworkError, tryCatch, unwrapOr } from "../shared/result.js";
|
||||
import { asyncTryCatch, asyncTryCatchIf, isNetworkError, tryCatch, unwrapOr } from "../shared/result.js";
|
||||
import { SSH_BASE_OPTS } from "../shared/ssh.js";
|
||||
import { loadApiToken } from "../shared/ui.js";
|
||||
import { formatRelativeTime } from "./list.js";
|
||||
import { resolveDisplayName } from "./shared.js";
|
||||
|
|
@ -20,6 +21,7 @@ type LiveState = "running" | "stopped" | "gone" | "unknown";
|
|||
interface ServerStatusResult {
|
||||
record: SpawnRecord;
|
||||
liveState: LiveState;
|
||||
agentAlive: boolean | null;
|
||||
}
|
||||
|
||||
interface JsonStatusEntry {
|
||||
|
|
@ -29,6 +31,7 @@ interface JsonStatusEntry {
|
|||
ip: string;
|
||||
name: string;
|
||||
state: LiveState;
|
||||
agent_alive: boolean | null;
|
||||
spawned_at: string;
|
||||
server_id: string;
|
||||
}
|
||||
|
|
@ -148,6 +151,107 @@ async function checkServerStatus(record: SpawnRecord): Promise<LiveState> {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Agent alive probe ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Resolve the agent binary name from the manifest or the stored launch command.
|
||||
* Returns the first word of the launch string (e.g. "openclaw tui" → "openclaw").
|
||||
*/
|
||||
function resolveAgentBinary(record: SpawnRecord, manifest: Manifest | null): string | null {
|
||||
const fromManifest = manifest?.agents[record.agent]?.launch;
|
||||
if (fromManifest) {
|
||||
return fromManifest.split(/\s+/)[0] || null;
|
||||
}
|
||||
// Fallback: extract the last command from launch_cmd (after all source/export prefixes)
|
||||
const launchCmd = record.connection?.launch_cmd;
|
||||
if (launchCmd) {
|
||||
const parts = launchCmd.split(";").map((s) => s.trim());
|
||||
const last = parts[parts.length - 1] || "";
|
||||
return last.split(/\s+/)[0] || null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe a running server by SSHing in and running `{binary} --version`.
|
||||
* Returns true if the agent binary is installed and executable, false otherwise.
|
||||
*/
|
||||
async function probeAgentAlive(record: SpawnRecord, manifest: Manifest | null): Promise<boolean> {
|
||||
const conn = record.connection;
|
||||
if (!conn) {
|
||||
return false;
|
||||
}
|
||||
if (conn.cloud === "local") {
|
||||
return true;
|
||||
}
|
||||
|
||||
const binary = resolveAgentBinary(record, manifest);
|
||||
if (!binary) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const versionCmd = `source ~/.spawnrc 2>/dev/null; export PATH="$HOME/.local/bin:$HOME/.claude/local/bin:$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.n/bin:$PATH"; ${binary} --version`;
|
||||
|
||||
const result = await asyncTryCatch(async () => {
|
||||
let proc: {
|
||||
exited: Promise<number>;
|
||||
};
|
||||
|
||||
if (conn.cloud === "sprite") {
|
||||
const name = conn.server_name || "";
|
||||
if (!name) {
|
||||
return false;
|
||||
}
|
||||
proc = Bun.spawn(
|
||||
[
|
||||
"sprite",
|
||||
"exec",
|
||||
"-s",
|
||||
name,
|
||||
"--",
|
||||
"bash",
|
||||
"-c",
|
||||
versionCmd,
|
||||
],
|
||||
{
|
||||
stdout: "ignore",
|
||||
stderr: "ignore",
|
||||
},
|
||||
);
|
||||
} else {
|
||||
const user = conn.user || "root";
|
||||
const ip = conn.ip || "";
|
||||
if (!ip || ip === "sprite-console") {
|
||||
return false;
|
||||
}
|
||||
proc = Bun.spawn(
|
||||
[
|
||||
"ssh",
|
||||
...SSH_BASE_OPTS,
|
||||
"-o",
|
||||
"ConnectTimeout=5",
|
||||
`${user}@${ip}`,
|
||||
versionCmd,
|
||||
],
|
||||
{
|
||||
stdout: "ignore",
|
||||
stderr: "ignore",
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const exitCode = await Promise.race([
|
||||
proc.exited,
|
||||
new Promise<number>((_, reject) => {
|
||||
setTimeout(() => reject(new Error("probe timeout")), 10_000);
|
||||
}),
|
||||
]);
|
||||
return exitCode === 0;
|
||||
});
|
||||
|
||||
return result.ok ? result.data : false;
|
||||
}
|
||||
|
||||
// ── Formatting ───────────────────────────────────────────────────────────────
|
||||
|
||||
function fmtState(state: LiveState): string {
|
||||
|
|
@ -163,6 +267,13 @@ function fmtState(state: LiveState): string {
|
|||
}
|
||||
}
|
||||
|
||||
function fmtProbe(alive: boolean | null): string {
|
||||
if (alive === null) {
|
||||
return pc.dim("—");
|
||||
}
|
||||
return alive ? pc.green("live") : pc.red("down");
|
||||
}
|
||||
|
||||
function fmtIp(conn: SpawnRecord["connection"]): string {
|
||||
if (!conn) {
|
||||
return "—";
|
||||
|
|
@ -190,6 +301,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
const COL_CLOUD = 14;
|
||||
const COL_IP = 16;
|
||||
const COL_STATE = 12;
|
||||
const COL_PROBE = 10;
|
||||
const COL_SINCE = 12;
|
||||
|
||||
const header = [
|
||||
|
|
@ -198,6 +310,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
col(pc.dim("Cloud"), COL_CLOUD),
|
||||
col(pc.dim("IP"), COL_IP),
|
||||
col(pc.dim("State"), COL_STATE),
|
||||
col(pc.dim("Probe"), COL_PROBE),
|
||||
pc.dim("Since"),
|
||||
].join(" ");
|
||||
|
||||
|
|
@ -208,6 +321,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
"-".repeat(COL_CLOUD),
|
||||
"-".repeat(COL_IP),
|
||||
"-".repeat(COL_STATE),
|
||||
"-".repeat(COL_PROBE),
|
||||
"-".repeat(COL_SINCE),
|
||||
].join("-"),
|
||||
);
|
||||
|
|
@ -216,13 +330,14 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
console.log(header);
|
||||
console.log(divider);
|
||||
|
||||
for (const { record, liveState } of results) {
|
||||
for (const { record, liveState, agentAlive } of results) {
|
||||
const conn = record.connection;
|
||||
const shortId = record.id ? record.id.slice(0, 6) : "??????";
|
||||
const agentDisplay = resolveDisplayName(manifest, record.agent, "agent");
|
||||
const cloudDisplay = resolveDisplayName(manifest, record.cloud, "cloud");
|
||||
const ip = fmtIp(conn);
|
||||
const state = fmtState(liveState);
|
||||
const probe = fmtProbe(agentAlive);
|
||||
const since = formatRelativeTime(record.timestamp);
|
||||
|
||||
const row = [
|
||||
|
|
@ -231,6 +346,7 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
col(cloudDisplay, COL_CLOUD),
|
||||
col(ip, COL_IP),
|
||||
col(state, COL_STATE),
|
||||
col(probe, COL_PROBE),
|
||||
pc.dim(since),
|
||||
].join(" ");
|
||||
|
||||
|
|
@ -243,13 +359,14 @@ function renderStatusTable(results: ServerStatusResult[], manifest: Manifest | n
|
|||
// ── JSON output ──────────────────────────────────────────────────────────────
|
||||
|
||||
function renderStatusJson(results: ServerStatusResult[]): void {
|
||||
const entries: JsonStatusEntry[] = results.map(({ record, liveState }) => ({
|
||||
const entries: JsonStatusEntry[] = results.map(({ record, liveState, agentAlive }) => ({
|
||||
id: record.id || "",
|
||||
agent: record.agent,
|
||||
cloud: record.cloud,
|
||||
ip: fmtIp(record.connection),
|
||||
name: record.name || record.connection?.server_name || "",
|
||||
state: liveState,
|
||||
agent_alive: agentAlive,
|
||||
spawned_at: record.timestamp,
|
||||
server_id: record.connection?.server_id || record.connection?.server_name || "",
|
||||
}));
|
||||
|
|
@ -258,9 +375,16 @@ function renderStatusJson(results: ServerStatusResult[]): void {
|
|||
|
||||
// ── Main command ─────────────────────────────────────────────────────────────
|
||||
|
||||
export async function cmdStatus(
|
||||
opts: { prune?: boolean; json?: boolean; agentFilter?: string; cloudFilter?: string } = {},
|
||||
): Promise<void> {
|
||||
export interface StatusOpts {
|
||||
prune?: boolean;
|
||||
json?: boolean;
|
||||
agentFilter?: string;
|
||||
cloudFilter?: string;
|
||||
/** Override the agent probe for testing. Called only for "running" servers. */
|
||||
probe?: (record: SpawnRecord, manifest: Manifest | null) => Promise<boolean>;
|
||||
}
|
||||
|
||||
export async function cmdStatus(opts: StatusOpts = {}): Promise<void> {
|
||||
const records = filterHistory(opts.agentFilter, opts.cloudFilter);
|
||||
|
||||
const candidates = records.filter(
|
||||
|
|
@ -284,12 +408,19 @@ export async function cmdStatus(
|
|||
p.log.step(`Checking status of ${candidates.length} server${candidates.length !== 1 ? "s" : ""}...`);
|
||||
}
|
||||
|
||||
const probeFn = opts.probe ?? probeAgentAlive;
|
||||
|
||||
const results: ServerStatusResult[] = await Promise.all(
|
||||
candidates.map(async (record) => {
|
||||
const liveState = await checkServerStatus(record);
|
||||
let agentAlive: boolean | null = null;
|
||||
if (liveState === "running") {
|
||||
agentAlive = await probeFn(record, manifest);
|
||||
}
|
||||
return {
|
||||
record,
|
||||
liveState,
|
||||
agentAlive,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
|
@ -332,6 +463,15 @@ export async function cmdStatus(
|
|||
);
|
||||
}
|
||||
|
||||
const unreachable = results.filter((r) => r.agentAlive === false);
|
||||
if (unreachable.length > 0) {
|
||||
p.log.info(
|
||||
pc.dim(
|
||||
`${unreachable.length} server${unreachable.length !== 1 ? "s" : ""} running but agent unreachable. The agent may have crashed or still be starting.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
const running = results.filter((r) => r.liveState === "running").length;
|
||||
if (running > 0) {
|
||||
p.log.info(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue