mirror of
https://github.com/OpenRouterTeam/spawn.git
synced 2026-04-26 11:00:38 +00:00
* fix(zeroclaw): remove broken zeroclaw agent (repo 404) The zeroclaw-labs/zeroclaw GitHub repository returns 404 — all installs fail. Remove zeroclaw entirely from the matrix: agent definition, setup code, shell scripts, e2e tests, packer config, skill files, and documentation. Fixes #3102 Agent: code-health Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix(zeroclaw): remove stale zeroclaw reference from discovery.md ARM agents list Addresses security review on PR #3107 — the last remaining zeroclaw reference in .claude/rules/discovery.md is now removed. Agent: issue-fixer Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix(zeroclaw): remove remaining stale zeroclaw references from CI/packer Remove zeroclaw from: - .github/workflows/agent-tarballs.yml ARM build matrix - .github/workflows/docker.yml agent matrix - packer/digitalocean.pkr.hcl comment - sh/e2e/e2e.sh comment Addresses all 5 stale references flagged in security review of PR #3107. Agent: issue-fixer Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> --------- Co-authored-by: B <6723574+louisgv@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
866 lines
30 KiB
TypeScript
866 lines
30 KiB
TypeScript
// shared/orchestrate.ts — Shared orchestration pipeline for deploying agents
|
|
// Each cloud implements CloudOrchestrator and calls runOrchestration().
|
|
|
|
import type { SpawnRecord, VMConnection } from "../history.js";
|
|
import type { CloudRunner } from "./agent-setup.js";
|
|
import type { AgentConfig } from "./agents.js";
|
|
import type { SshTunnelHandle } from "./ssh.js";
|
|
|
|
import { existsSync, readFileSync, unlinkSync } from "node:fs";
|
|
import { getErrorMessage } from "@openrouter/spawn-shared";
|
|
import * as v from "valibot";
|
|
import {
|
|
generateSpawnId,
|
|
mergeChildHistory,
|
|
SpawnRecordSchema,
|
|
saveLaunchCmd,
|
|
saveMetadata,
|
|
saveSpawnRecord,
|
|
} from "../history.js";
|
|
import { offerGithubAuth, setupAutoUpdate, wrapSshCall } from "./agent-setup.js";
|
|
import { tryTarballInstall } from "./agent-tarball.js";
|
|
import { generateEnvConfig } from "./agents.js";
|
|
import { getOrPromptApiKey } from "./oauth.js";
|
|
import { parseJsonWith } from "./parse.js";
|
|
import { getSpawnCloudConfigPath, getSpawnPreferencesPath, getTmpDir } from "./paths.js";
|
|
import { asyncTryCatch, asyncTryCatchIf, isOperationalError, tryCatch } from "./result.js";
|
|
import { isWindows } from "./shell.js";
|
|
import { injectSpawnSkill } from "./spawn-skill.js";
|
|
import { sleep, startSshTunnel } from "./ssh.js";
|
|
import { ensureSshKeys, getSshKeyOpts } from "./ssh-keys.js";
|
|
import {
|
|
logDebug,
|
|
logError,
|
|
logInfo,
|
|
logStep,
|
|
logWarn,
|
|
openBrowser,
|
|
prepareStdinForHandoff,
|
|
prompt,
|
|
retryOrQuit,
|
|
shellQuote,
|
|
validateModelId,
|
|
withRetry,
|
|
} from "./ui.js";
|
|
|
|
/** Docker container name used by --beta docker deployments. */
|
|
export const DOCKER_CONTAINER_NAME = "spawn-agent";
|
|
/** Docker registry hosting spawn agent images. */
|
|
export const DOCKER_REGISTRY = "ghcr.io/openrouterteam";
|
|
|
|
/** Wrap a command to run inside the Docker container instead of the host. */
|
|
function makeDockerExec(cmd: string): string {
|
|
if (!cmd || cmd.length === 0) {
|
|
throw new Error("makeDockerExec: command must be non-empty");
|
|
}
|
|
return `docker exec ${DOCKER_CONTAINER_NAME} bash -c ${shellQuote(cmd)}`;
|
|
}
|
|
|
|
/** Wrap a CloudRunner so all commands and uploads target the Docker container. */
|
|
export function makeDockerRunner(hostRunner: CloudRunner): CloudRunner {
|
|
return {
|
|
runServer: (cmd: string, timeoutSecs?: number) => hostRunner.runServer(makeDockerExec(cmd), timeoutSecs),
|
|
uploadFile: async (localPath: string, remotePath: string) => {
|
|
await hostRunner.uploadFile(localPath, remotePath);
|
|
await hostRunner.runServer(
|
|
`docker cp ${shellQuote(remotePath)} ${DOCKER_CONTAINER_NAME}:${shellQuote(remotePath)}`,
|
|
);
|
|
},
|
|
downloadFile: hostRunner.downloadFile,
|
|
};
|
|
}
|
|
|
|
export interface CloudOrchestrator {
|
|
cloudName: string;
|
|
cloudLabel: string;
|
|
runner: CloudRunner;
|
|
/** When true, skip tarball + agent install (e.g. booting from a pre-baked snapshot). */
|
|
skipAgentInstall?: boolean;
|
|
/** When true, skip cloud-init wait — just wait for SSH (e.g. minimal-tier agent with tarball). */
|
|
skipCloudInit?: boolean;
|
|
authenticate(): Promise<void>;
|
|
checkAccountReady?(): Promise<void>;
|
|
promptSize(): Promise<void>;
|
|
createServer(name: string): Promise<VMConnection>;
|
|
getServerName(): Promise<string>;
|
|
waitForReady(): Promise<void>;
|
|
interactiveSession(cmd: string): Promise<number>;
|
|
/** Return SSH connection info for tunnel support. Omit for non-SSH clouds. */
|
|
getConnectionInfo?(): {
|
|
host: string;
|
|
user: string;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Wrap a launch command in a restart loop for cloud VMs.
|
|
* Restarts the agent on non-zero exit (crash, SIGTERM, OOM) up to MAX_RESTARTS times.
|
|
* Clean exits (exit code 0) break out of the loop immediately.
|
|
* Skipped for local execution where the user controls the process directly.
|
|
*/
|
|
function wrapWithRestartLoop(cmd: string): string {
|
|
// Shell restart loop — bash 3.x compatible (no ((var++)), no set -u)
|
|
return [
|
|
"_spawn_restarts=0",
|
|
"_spawn_max=10",
|
|
'while [ "$_spawn_restarts" -lt "$_spawn_max" ]; do',
|
|
` ${cmd}`,
|
|
" _spawn_exit=$?",
|
|
' if [ "$_spawn_exit" -eq 0 ]; then break; fi',
|
|
" _spawn_restarts=$((_spawn_restarts + 1))",
|
|
' printf "\\n[spawn] Agent exited with code %d. Restarting in 5s (%d/%d)...\\n" "$_spawn_exit" "$_spawn_restarts" "$_spawn_max" >&2',
|
|
" sleep 5",
|
|
"done",
|
|
'if [ "$_spawn_restarts" -ge "$_spawn_max" ]; then',
|
|
' printf "\\n[spawn] Agent crashed %d times. Giving up.\\n" "$_spawn_max" >&2',
|
|
"fi",
|
|
'exit "${_spawn_exit:-0}"',
|
|
].join("\n");
|
|
}
|
|
|
|
// ── Recursive spawn helpers ──────────────────────────────────────────────────
|
|
|
|
/** Install the spawn CLI on a remote VM. */
|
|
export async function installSpawnCli(runner: CloudRunner): Promise<void> {
|
|
logStep("Installing spawn CLI on VM...");
|
|
// Build PATH explicitly — non-interactive bash skips .bashrc (PS1 guard),
|
|
// and some platforms (Sprite) have a broken bun shim that finds via
|
|
// `command -v` but doesn't actually work. We prepend all known bun
|
|
// locations so the real binary is found first, then test `bun --version`
|
|
// (not just existence) and install bun fresh if it doesn't work.
|
|
const installCmd = [
|
|
'export BUN_INSTALL="${BUN_INSTALL:-$HOME/.bun}"',
|
|
'export PATH="$BUN_INSTALL/bin:$HOME/.local/bin:$HOME/.npm-global/bin:/.sprite/languages/bun/bin:/usr/local/bin:$PATH"',
|
|
'if ! bun --version >/dev/null 2>&1; then curl -fsSL https://bun.sh/install | bash && export PATH="$HOME/.bun/bin:$PATH"; fi',
|
|
"curl -fsSL https://openrouter.ai/labs/spawn/cli/install.sh | bash",
|
|
].join("; ");
|
|
const result = await asyncTryCatch(() =>
|
|
withRetry("spawn CLI install", () => wrapSshCall(runner.runServer(installCmd)), 2, 5),
|
|
);
|
|
if (!result.ok) {
|
|
logWarn("Spawn CLI install failed — recursive spawning will not be available on this VM");
|
|
} else {
|
|
logInfo("Spawn CLI installed on VM");
|
|
}
|
|
}
|
|
|
|
/** Copy local cloud credentials to the remote VM for recursive spawning. */
|
|
export async function delegateCloudCredentials(runner: CloudRunner): Promise<void> {
|
|
logStep("Delegating cloud credentials to VM...");
|
|
|
|
const filesToDelegate: {
|
|
localPath: string;
|
|
remotePath: string;
|
|
}[] = [];
|
|
|
|
// Delegate ALL cloud credentials so the child VM can spawn on any cloud,
|
|
// not just the one the parent is running on.
|
|
const cloudNames = [
|
|
"hetzner",
|
|
"digitalocean",
|
|
"aws",
|
|
"gcp",
|
|
"sprite",
|
|
];
|
|
for (const cloud of cloudNames) {
|
|
const cloudConfigPath = getSpawnCloudConfigPath(cloud);
|
|
if (existsSync(cloudConfigPath)) {
|
|
filesToDelegate.push({
|
|
localPath: cloudConfigPath,
|
|
remotePath: `~/.config/spawn/${cloud}.json`,
|
|
});
|
|
}
|
|
}
|
|
|
|
// OpenRouter credentials (always needed for child spawns)
|
|
const orConfigPath = getSpawnCloudConfigPath("openrouter");
|
|
if (existsSync(orConfigPath)) {
|
|
filesToDelegate.push({
|
|
localPath: orConfigPath,
|
|
remotePath: "~/.config/spawn/openrouter.json",
|
|
});
|
|
}
|
|
|
|
if (filesToDelegate.length === 0) {
|
|
logWarn("No credentials to delegate — child spawns may require manual auth");
|
|
return;
|
|
}
|
|
|
|
// Ensure config dir exists on VM
|
|
const mkdirResult = await asyncTryCatch(() =>
|
|
runner.runServer("mkdir -p ~/.config/spawn && chmod 700 ~/.config/spawn"),
|
|
);
|
|
if (!mkdirResult.ok) {
|
|
logWarn("Could not create config directory on VM");
|
|
return;
|
|
}
|
|
|
|
for (const file of filesToDelegate) {
|
|
const content = readFileSync(file.localPath, "utf-8");
|
|
const b64 = Buffer.from(content).toString("base64");
|
|
if (!/^[A-Za-z0-9+/=]+$/.test(b64)) {
|
|
throw new Error("Unexpected characters in base64 output");
|
|
}
|
|
const writeResult = await asyncTryCatch(() =>
|
|
runner.runServer(`printf '%s' '${b64}' | base64 -d > ${file.remotePath} && chmod 600 ${file.remotePath}`),
|
|
);
|
|
if (!writeResult.ok) {
|
|
logWarn(`Could not delegate ${file.remotePath}`);
|
|
}
|
|
}
|
|
|
|
logInfo("Cloud credentials delegated to VM");
|
|
}
|
|
|
|
/** Get parent_id and depth fields for spawn records (set when running inside a child VM). */
|
|
function getParentFields(): {
|
|
parent_id?: string;
|
|
depth?: number;
|
|
} {
|
|
const parentId = process.env.SPAWN_PARENT_ID;
|
|
const depth = Number(process.env.SPAWN_DEPTH) || 0;
|
|
return parentId
|
|
? {
|
|
parent_id: parentId,
|
|
depth,
|
|
}
|
|
: depth > 0
|
|
? {
|
|
depth,
|
|
}
|
|
: {};
|
|
}
|
|
|
|
/** Append recursive-spawn env vars to the envPairs array when --beta recursive is active. */
|
|
export function appendRecursiveEnvVars(envPairs: string[], spawnId: string): void {
|
|
const currentDepth = Number(process.env.SPAWN_DEPTH) || 0;
|
|
envPairs.push(`SPAWN_PARENT_ID=${spawnId}`);
|
|
envPairs.push(`SPAWN_DEPTH=${currentDepth + 1}`);
|
|
envPairs.push("SPAWN_BETA=recursive");
|
|
}
|
|
|
|
/** Options for runOrchestration (used in tests to inject mock dependencies). */
|
|
export interface OrchestrationOptions {
|
|
tryTarball?: (runner: CloudRunner, agentName: string) => Promise<boolean>;
|
|
getApiKey?: (agentSlug?: string, cloudSlug?: string) => Promise<string>;
|
|
}
|
|
|
|
/**
|
|
* Load a preferred model from ~/.config/spawn/preferences.json.
|
|
* Format: { "models": { "codex": "openai/gpt-5.3-codex", "openclaw": "anthropic/claude-sonnet-4.6" } }
|
|
* Returns null if no preference is set or the file doesn't exist.
|
|
*/
|
|
const PreferencesSchema = v.object({
|
|
models: v.optional(v.record(v.string(), v.string())),
|
|
starPromptShownAt: v.optional(v.string()),
|
|
});
|
|
|
|
function loadPreferredModel(agentName: string): string | null {
|
|
const result = tryCatch(() => {
|
|
const raw = JSON.parse(readFileSync(getSpawnPreferencesPath(), "utf-8"));
|
|
const parsed = v.safeParse(PreferencesSchema, raw);
|
|
if (!parsed.success) {
|
|
return null;
|
|
}
|
|
return parsed.output.models?.[agentName] ?? null;
|
|
});
|
|
return result.ok ? result.data : null;
|
|
}
|
|
|
|
export async function runOrchestration(
|
|
cloud: CloudOrchestrator,
|
|
agent: AgentConfig,
|
|
agentName: string,
|
|
options?: OrchestrationOptions,
|
|
): Promise<void> {
|
|
logInfo(`${agent.name} on ${cloud.cloudLabel}`);
|
|
process.stderr.write("\n");
|
|
|
|
// 1. Authenticate with cloud provider
|
|
await cloud.authenticate();
|
|
|
|
const betaFeatures = new Set((process.env.SPAWN_BETA ?? "").split(",").filter(Boolean));
|
|
const fastMode = process.env.SPAWN_FAST === "1" || betaFeatures.has("parallel");
|
|
const useTarball = fastMode || betaFeatures.has("tarball");
|
|
|
|
// Skip cloud-init for minimal-tier agents when using tarballs or snapshots.
|
|
// Ubuntu 24.04 base images already have curl + git, so minimal agents (claude,
|
|
// opencode, hermes) don't need the cloud-init package install step.
|
|
// This saves ~30-60s by just waiting for SSH instead of polling for cloud-init completion.
|
|
if (
|
|
cloud.cloudName !== "local" &&
|
|
(useTarball || cloud.skipAgentInstall) &&
|
|
(agent.cloudInitTier === "minimal" || !agent.cloudInitTier)
|
|
) {
|
|
cloud.skipCloudInit = true;
|
|
}
|
|
|
|
// 1b. Size/bundle selection (must happen before createServer)
|
|
await cloud.promptSize();
|
|
|
|
// 2. Provision server
|
|
const spawnId = generateSpawnId();
|
|
const serverName = await cloud.getServerName();
|
|
|
|
if (fastMode && cloud.cloudName !== "local") {
|
|
// ── Fast mode: server boot + setup prompts run concurrently ─────────
|
|
// Start server creation, then do API key prompt, pre-provision, tarball
|
|
// download, and account check in parallel with server boot.
|
|
//
|
|
// Keep a dummy timer on the event loop so Bun doesn't exit prematurely.
|
|
// When all concurrent promises settle (especially after Bun.serve.stop()
|
|
// in the OAuth flow removes its handle), the event loop can appear empty
|
|
// before the continuation starts new I/O — causing a silent exit(0).
|
|
const keepAlive = setInterval(() => {}, 60_000);
|
|
|
|
const serverBootPromise = (async () => {
|
|
const conn = await cloud.createServer(serverName);
|
|
const spawnName = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
|
|
saveSpawnRecord({
|
|
id: spawnId,
|
|
agent: agentName,
|
|
cloud: cloud.cloudName,
|
|
timestamp: new Date().toISOString(),
|
|
...(spawnName
|
|
? {
|
|
name: spawnName,
|
|
}
|
|
: {}),
|
|
...getParentFields(),
|
|
connection: conn,
|
|
});
|
|
await cloud.waitForReady();
|
|
return conn;
|
|
})();
|
|
|
|
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
|
|
|
|
// These all run concurrently with server boot
|
|
const [bootResult, apiKeyResult] = await Promise.allSettled([
|
|
serverBootPromise,
|
|
resolveApiKey(agentName, cloud.cloudName),
|
|
cloud.checkAccountReady
|
|
? asyncTryCatch(() => cloud.checkAccountReady!())
|
|
: Promise.resolve({
|
|
ok: true,
|
|
}),
|
|
agent.preProvision
|
|
? asyncTryCatch(() => agent.preProvision!())
|
|
: Promise.resolve({
|
|
ok: true,
|
|
}),
|
|
]);
|
|
|
|
// Server boot must succeed — retry if it failed
|
|
if (bootResult.status === "rejected") {
|
|
logError(getErrorMessage(bootResult.reason));
|
|
await retryOrQuit("Retry server creation?");
|
|
// User chose to retry — fall through to sequential path which has full retry loops
|
|
// (Re-running the concurrent path would re-prompt for API key, etc.)
|
|
const connection = await cloud.createServer(serverName);
|
|
const spawnName2 = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
|
|
saveSpawnRecord({
|
|
id: spawnId,
|
|
agent: agentName,
|
|
cloud: cloud.cloudName,
|
|
timestamp: new Date().toISOString(),
|
|
...(spawnName2
|
|
? {
|
|
name: spawnName2,
|
|
}
|
|
: {}),
|
|
...getParentFields(),
|
|
connection,
|
|
});
|
|
await cloud.waitForReady();
|
|
}
|
|
|
|
// API key must succeed
|
|
if (apiKeyResult.status === "rejected") {
|
|
throw apiKeyResult.reason;
|
|
}
|
|
const apiKey = apiKeyResult.value;
|
|
|
|
// Model ID
|
|
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
|
|
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
|
|
if (rawModelId && !modelId) {
|
|
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
|
|
}
|
|
|
|
// Env config (computed locally, no SSH needed)
|
|
const envPairs = agent.envVars(apiKey);
|
|
if (modelId && agent.modelEnvVar) {
|
|
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
|
|
}
|
|
if (betaFeatures.has("recursive")) {
|
|
appendRecursiveEnvVars(envPairs, spawnId);
|
|
}
|
|
const envContent = generateEnvConfig(envPairs);
|
|
|
|
// Install agent — remote tarball, fallback to live install
|
|
if (cloud.skipAgentInstall) {
|
|
logInfo("Snapshot boot — skipping agent install");
|
|
} else {
|
|
let installed = false;
|
|
if (useTarball && !agent.skipTarball) {
|
|
const tarball = options?.tryTarball ?? tryTarballInstall;
|
|
installed = await tarball(cloud.runner, agentName);
|
|
}
|
|
if (!installed) {
|
|
for (;;) {
|
|
const r = await asyncTryCatch(() => agent.install());
|
|
if (r.ok) {
|
|
break;
|
|
}
|
|
logError(getErrorMessage(r.error));
|
|
await retryOrQuit("Retry agent install?");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Inject env + continue with shared post-install flow
|
|
clearInterval(keepAlive);
|
|
await injectEnvVars(cloud, envContent);
|
|
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
|
|
} else {
|
|
// ── Standard sequential flow ────────────────────────────────────────
|
|
|
|
// 1b. Pre-flight account readiness check
|
|
if (cloud.checkAccountReady) {
|
|
const r = await asyncTryCatch(() => cloud.checkAccountReady!());
|
|
if (!r.ok) {
|
|
logWarn("Account readiness check failed — proceeding anyway");
|
|
logDebug(getErrorMessage(r.error));
|
|
}
|
|
}
|
|
|
|
// 2. Get API key
|
|
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
|
|
const apiKey = await resolveApiKey(agentName, cloud.cloudName);
|
|
|
|
// 3. Pre-provision hooks
|
|
if (agent.preProvision) {
|
|
const r = await asyncTryCatch(() => agent.preProvision!());
|
|
if (!r.ok) {
|
|
logWarn("Pre-provision hook failed — continuing");
|
|
logDebug(getErrorMessage(r.error));
|
|
}
|
|
}
|
|
|
|
// 4. Model ID
|
|
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
|
|
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
|
|
if (rawModelId && !modelId) {
|
|
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
|
|
}
|
|
|
|
// 5. Provision server (retry loop)
|
|
let connection: VMConnection;
|
|
for (;;) {
|
|
const r = await asyncTryCatch(() => cloud.createServer(serverName));
|
|
if (r.ok) {
|
|
connection = r.data;
|
|
break;
|
|
}
|
|
logError(getErrorMessage(r.error));
|
|
await retryOrQuit("Retry server creation?");
|
|
}
|
|
const spawnName = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
|
|
saveSpawnRecord({
|
|
id: spawnId,
|
|
agent: agentName,
|
|
cloud: cloud.cloudName,
|
|
timestamp: new Date().toISOString(),
|
|
...(spawnName
|
|
? {
|
|
name: spawnName,
|
|
}
|
|
: {}),
|
|
...getParentFields(),
|
|
connection,
|
|
});
|
|
|
|
// 6. Wait for readiness (retry loop)
|
|
for (;;) {
|
|
const r = await asyncTryCatch(() => cloud.waitForReady());
|
|
if (r.ok) {
|
|
break;
|
|
}
|
|
logError(getErrorMessage(r.error));
|
|
await retryOrQuit("Server may still be starting. Keep waiting?");
|
|
}
|
|
|
|
// 7. Env config
|
|
const envPairs = agent.envVars(apiKey);
|
|
if (modelId && agent.modelEnvVar) {
|
|
envPairs.push(`${agent.modelEnvVar}=${modelId}`);
|
|
}
|
|
if (betaFeatures.has("recursive")) {
|
|
appendRecursiveEnvVars(envPairs, spawnId);
|
|
}
|
|
const envContent = generateEnvConfig(envPairs);
|
|
|
|
// 8. Install agent
|
|
if (cloud.skipAgentInstall) {
|
|
logInfo("Snapshot boot — skipping agent install");
|
|
} else {
|
|
let installedFromTarball = false;
|
|
if (cloud.cloudName !== "local" && !agent.skipTarball && useTarball) {
|
|
const tarball = options?.tryTarball ?? tryTarballInstall;
|
|
installedFromTarball = await tarball(cloud.runner, agentName);
|
|
}
|
|
if (!installedFromTarball) {
|
|
for (;;) {
|
|
const r = await asyncTryCatch(() => agent.install());
|
|
if (r.ok) {
|
|
break;
|
|
}
|
|
logError(getErrorMessage(r.error));
|
|
await retryOrQuit("Retry agent install?");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Inject env + continue with shared post-install flow
|
|
await injectEnvVars(cloud, envContent);
|
|
await postInstall(cloud, agent, agentName, apiKey, modelId, spawnId, options);
|
|
}
|
|
}
|
|
|
|
async function injectEnvVars(cloud: CloudOrchestrator, envContent: string): Promise<void> {
|
|
logStep("Setting up environment variables...");
|
|
const envB64 = Buffer.from(envContent).toString("base64");
|
|
if (!/^[A-Za-z0-9+/=]+$/.test(envB64)) {
|
|
throw new Error("Unexpected characters in base64 output");
|
|
}
|
|
|
|
const isLocalWindows = cloud.cloudName === "local" && isWindows();
|
|
const envSetupCmd = isLocalWindows
|
|
? `$bytes = [Convert]::FromBase64String('${envB64}'); ` + `[IO.File]::WriteAllBytes("$HOME/.spawnrc", $bytes)`
|
|
: `printf '%s' '${envB64}' | base64 -d > ~/.spawnrc && chmod 600 ~/.spawnrc; ` +
|
|
"for _rc in ~/.bashrc ~/.profile ~/.bash_profile ~/.zshrc; do " +
|
|
`grep -q 'source ~/.spawnrc' "$_rc" 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> "$_rc"; ` +
|
|
"done";
|
|
|
|
const envResult = await asyncTryCatch(() =>
|
|
withRetry("env setup", () => wrapSshCall(cloud.runner.runServer(envSetupCmd)), 2, 5),
|
|
);
|
|
if (!envResult.ok) {
|
|
logWarn("Environment setup had errors");
|
|
}
|
|
}
|
|
|
|
async function postInstall(
|
|
cloud: CloudOrchestrator,
|
|
agent: AgentConfig,
|
|
agentName: string,
|
|
apiKey: string,
|
|
modelId: string | undefined,
|
|
spawnId: string,
|
|
_options?: OrchestrationOptions,
|
|
): Promise<void> {
|
|
// Parse enabled setup steps
|
|
let enabledSteps: Set<string> | undefined;
|
|
const stepsEnv = process.env.SPAWN_ENABLED_STEPS;
|
|
if (stepsEnv !== undefined) {
|
|
const stepNames = stepsEnv.split(",").filter(Boolean);
|
|
if (stepNames.length > 0) {
|
|
const { validateStepNames } = await import("./agents.js");
|
|
const { valid, invalid } = validateStepNames(agentName, stepNames);
|
|
if (invalid.length > 0) {
|
|
logWarn(`Unknown setup steps ignored: ${invalid.join(", ")}`);
|
|
}
|
|
enabledSteps = new Set(valid);
|
|
} else {
|
|
enabledSteps = new Set();
|
|
}
|
|
}
|
|
|
|
// Agent-specific configuration
|
|
if (agent.configure) {
|
|
const configResult = await asyncTryCatch(() =>
|
|
withRetry("agent config", () => wrapSshCall(agent.configure!(apiKey, modelId, enabledSteps)), 2, 5),
|
|
);
|
|
if (!configResult.ok) {
|
|
logWarn("Agent configuration failed (continuing with defaults)");
|
|
}
|
|
}
|
|
|
|
// GitHub CLI setup
|
|
if (!enabledSteps || enabledSteps.has("github")) {
|
|
await offerGithubAuth(cloud.runner, enabledSteps?.has("github"));
|
|
}
|
|
|
|
// Auto-update service
|
|
if (cloud.cloudName !== "local" && agent.updateCmd && (!enabledSteps || enabledSteps.has("auto-update"))) {
|
|
await setupAutoUpdate(cloud.runner, agentName, agent.updateCmd);
|
|
}
|
|
|
|
// Spawn CLI + skill injection (recursive spawn)
|
|
// The "spawn" step is defaultOn when --beta recursive is active, so it should
|
|
// run when no explicit steps are selected (!enabledSteps) AND the beta flag is set.
|
|
const betaFeaturesPost = new Set((process.env.SPAWN_BETA ?? "").split(",").filter(Boolean));
|
|
if (
|
|
cloud.cloudName !== "local" &&
|
|
betaFeaturesPost.has("recursive") &&
|
|
(!enabledSteps || enabledSteps.has("spawn"))
|
|
) {
|
|
await installSpawnCli(cloud.runner);
|
|
await delegateCloudCredentials(cloud.runner);
|
|
await injectSpawnSkill(cloud.runner, agentName);
|
|
}
|
|
|
|
// Pre-launch hooks (retry loop)
|
|
if (agent.preLaunch) {
|
|
for (;;) {
|
|
const r = await asyncTryCatch(() => agent.preLaunch!());
|
|
if (r.ok) {
|
|
break;
|
|
}
|
|
logError(getErrorMessage(r.error));
|
|
await retryOrQuit("Retry pre-launch setup?");
|
|
}
|
|
}
|
|
|
|
// SSH tunnel for web dashboard
|
|
let tunnelHandle: SshTunnelHandle | undefined;
|
|
if (agent.tunnel) {
|
|
const tunnelCfg = agent.tunnel; // capture for closure (TS can't narrow across async boundaries)
|
|
if (cloud.getConnectionInfo) {
|
|
const getConnInfo = cloud.getConnectionInfo; // capture for closure
|
|
const tunnelResult = await asyncTryCatchIf(isOperationalError, async () => {
|
|
const conn = getConnInfo();
|
|
const keys = await ensureSshKeys();
|
|
tunnelHandle = await startSshTunnel({
|
|
host: conn.host,
|
|
user: conn.user,
|
|
remotePort: tunnelCfg.remotePort,
|
|
sshKeyOpts: getSshKeyOpts(keys),
|
|
});
|
|
if (tunnelCfg.browserUrl) {
|
|
const url = tunnelCfg.browserUrl(tunnelHandle.localPort);
|
|
if (url) {
|
|
openBrowser(url);
|
|
}
|
|
}
|
|
});
|
|
if (!tunnelResult.ok) {
|
|
logWarn("Web dashboard tunnel failed — use the TUI instead");
|
|
}
|
|
} else if (cloud.cloudName === "local") {
|
|
if (agent.tunnel.browserUrl) {
|
|
const url = agent.tunnel.browserUrl(agent.tunnel.remotePort);
|
|
if (url) {
|
|
openBrowser(url);
|
|
}
|
|
}
|
|
}
|
|
|
|
const tunnelMeta: Record<string, string> = {
|
|
tunnel_remote_port: String(agent.tunnel.remotePort),
|
|
};
|
|
if (agent.tunnel.browserUrl) {
|
|
const templateUrl = agent.tunnel.browserUrl(0);
|
|
if (templateUrl) {
|
|
tunnelMeta.tunnel_browser_url_template = templateUrl.replace("localhost:0", "localhost:__PORT__");
|
|
}
|
|
}
|
|
saveMetadata(tunnelMeta, spawnId);
|
|
}
|
|
|
|
// Channel setup
|
|
const ocPath = "export PATH=$HOME/.npm-global/bin:$HOME/.bun/bin:$HOME/.local/bin:$PATH";
|
|
|
|
if (enabledSteps?.has("telegram")) {
|
|
logStep("Telegram pairing...");
|
|
logInfo("To pair your Telegram account:");
|
|
logInfo(" 1. Open Telegram on your phone");
|
|
logInfo(" 2. Search for the bot you created with @BotFather");
|
|
logInfo(' 3. Send it any message (e.g. "hello")');
|
|
logInfo(" 4. The bot will reply with a pairing code");
|
|
logInfo(" 5. Enter the code below");
|
|
process.stderr.write("\n");
|
|
const pairingCode = (await prompt("Telegram pairing code: ")).trim();
|
|
if (pairingCode) {
|
|
const escaped = shellQuote(pairingCode);
|
|
const result = await asyncTryCatchIf(isOperationalError, () =>
|
|
cloud.runner.runServer(
|
|
`source ~/.spawnrc 2>/dev/null; ${ocPath}; openclaw pairing approve telegram ${escaped}`,
|
|
),
|
|
);
|
|
if (result.ok) {
|
|
logInfo("Telegram paired successfully");
|
|
} else {
|
|
logWarn("Pairing failed — you can pair later via: openclaw pairing approve telegram <CODE>");
|
|
}
|
|
} else {
|
|
logInfo("No code entered — pair later via: openclaw pairing approve telegram <CODE>");
|
|
}
|
|
}
|
|
|
|
if (agent.preLaunchMsg) {
|
|
process.stderr.write("\n");
|
|
logInfo(`Tip: ${agent.preLaunchMsg}`);
|
|
}
|
|
|
|
// Launch agent
|
|
logInfo(`Agent setup complete — ${agent.name} is ready on ${cloud.cloudLabel}`);
|
|
process.stderr.write("\n");
|
|
|
|
const launchCmd = agent.launchCmd();
|
|
saveLaunchCmd(launchCmd, spawnId);
|
|
|
|
// In headless mode, provisioning is done — skip the interactive session.
|
|
// The VM is healthy and the agent is installed; callers can SSH in or use `spawn connect`.
|
|
const isHeadless = process.env.SPAWN_HEADLESS === "1";
|
|
if (isHeadless) {
|
|
logInfo("Headless mode — provisioning complete. Skipping interactive session.");
|
|
if (tunnelHandle) {
|
|
tunnelHandle.stop();
|
|
}
|
|
if (cloud.cloudName !== "local") {
|
|
await pullChildHistory(cloud.runner, spawnId);
|
|
}
|
|
process.exit(0);
|
|
}
|
|
|
|
logStep("Provisioning complete. Connecting to agent session...");
|
|
|
|
// Reset terminal state before handing off to the interactive SSH session.
|
|
// @clack/prompts may have left the cursor hidden or set ANSI attributes
|
|
// (e.g. color, bold) that would corrupt the remote agent's TUI rendering.
|
|
if (process.stderr.isTTY) {
|
|
process.stderr.write("\x1b[?25h\x1b[0m");
|
|
}
|
|
|
|
prepareStdinForHandoff();
|
|
|
|
const sessionCmd = cloud.cloudName === "local" ? launchCmd : wrapWithRestartLoop(launchCmd);
|
|
|
|
// Auto-reconnect on connection drops. Ctrl+C (exit 0 or 130) exits immediately.
|
|
// Only applies to remote clouds — local sessions don't have connection drops.
|
|
// SSH exits 255 on connection loss; Sprite CLI exits 1 on "connection closed".
|
|
const maxReconnects = cloud.cloudName === "local" ? 0 : 5;
|
|
const isConnectionDrop = (code: number): boolean => code === 255 || (cloud.cloudName === "sprite" && code === 1);
|
|
let exitCode = 0;
|
|
|
|
for (let attempt = 0; attempt <= maxReconnects; attempt++) {
|
|
if (attempt > 0) {
|
|
process.stderr.write("\n");
|
|
logWarn(`Connection lost. Reconnecting... (${attempt}/${maxReconnects})`);
|
|
await sleep(3000);
|
|
prepareStdinForHandoff();
|
|
}
|
|
exitCode = await cloud.interactiveSession(sessionCmd);
|
|
|
|
if (!isConnectionDrop(exitCode)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (isConnectionDrop(exitCode)) {
|
|
process.stderr.write("\n");
|
|
logWarn("Could not reconnect. Server is still running.");
|
|
logInfo("Reconnect manually: spawn connect");
|
|
}
|
|
|
|
if (tunnelHandle) {
|
|
tunnelHandle.stop();
|
|
}
|
|
|
|
// Pull child's spawn history back to the parent for `spawn tree`
|
|
if (cloud.cloudName !== "local") {
|
|
await pullChildHistory(cloud.runner, spawnId);
|
|
}
|
|
|
|
process.exit(exitCode);
|
|
}
|
|
|
|
/**
|
|
* Pull spawn history from a child VM and merge it into local history.
|
|
* First tells the child to recursively pull from ITS children via
|
|
* `spawn pull-history`, then downloads the child's history.json.
|
|
* This enables `spawn tree` to show the full recursive hierarchy.
|
|
*/
|
|
async function pullChildHistory(runner: CloudRunner, parentSpawnId: string): Promise<void> {
|
|
const result = await asyncTryCatch(async () => {
|
|
const tmpPath = `${getTmpDir()}/child-history-${parentSpawnId}.json`;
|
|
|
|
// Recursive pull: tell the child to pull from ALL its children first.
|
|
const recursePull = await asyncTryCatch(() =>
|
|
runner.runServer(
|
|
'export PATH="$HOME/.local/bin:$HOME/.bun/bin:$PATH"; spawn pull-history 2>/dev/null || true',
|
|
120,
|
|
),
|
|
);
|
|
if (!recursePull.ok) {
|
|
logDebug("Recursive history pull skipped");
|
|
}
|
|
|
|
// Copy the child's history to a temp location then download
|
|
const copyResult = await asyncTryCatch(() =>
|
|
runner.runServer(
|
|
"cp ~/.spawn/history.json /tmp/_spawn_history.json 2>/dev/null || cp ~/.config/spawn/history.json /tmp/_spawn_history.json 2>/dev/null || echo '{}' > /tmp/_spawn_history.json",
|
|
),
|
|
);
|
|
if (!copyResult.ok) {
|
|
return;
|
|
}
|
|
|
|
await runner.downloadFile("/tmp/_spawn_history.json", tmpPath);
|
|
|
|
const json = readFileSync(tmpPath, "utf-8");
|
|
const ChildHistorySchema = v.object({
|
|
version: v.optional(v.number()),
|
|
records: v.array(SpawnRecordSchema),
|
|
});
|
|
const parsed = parseJsonWith(json, ChildHistorySchema);
|
|
if (!parsed || parsed.records.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const validRecords: SpawnRecord[] = [];
|
|
for (const r of parsed.records) {
|
|
if (r.id) {
|
|
validRecords.push({
|
|
id: r.id,
|
|
agent: r.agent,
|
|
cloud: r.cloud,
|
|
timestamp: r.timestamp,
|
|
...(r.name
|
|
? {
|
|
name: r.name,
|
|
}
|
|
: {}),
|
|
...(r.parent_id
|
|
? {
|
|
parent_id: r.parent_id,
|
|
}
|
|
: {}),
|
|
...(r.depth !== undefined
|
|
? {
|
|
depth: r.depth,
|
|
}
|
|
: {}),
|
|
...(r.connection
|
|
? {
|
|
connection: r.connection,
|
|
}
|
|
: {}),
|
|
});
|
|
}
|
|
}
|
|
|
|
if (validRecords.length > 0) {
|
|
mergeChildHistory(parentSpawnId, validRecords);
|
|
logInfo(`Pulled ${validRecords.length} spawn record(s) from child VM`);
|
|
}
|
|
|
|
tryCatch(() => unlinkSync(tmpPath));
|
|
});
|
|
|
|
if (!result.ok) {
|
|
logDebug(`Could not pull child history: ${getErrorMessage(result.error)}`);
|
|
}
|
|
}
|