spawn/packages/cli/src/shared/orchestrate.ts
Ahmed Abushagur d2d71b17ef
feat: add --model flag and preferences file for LLM model override (#2543)
Adds --model / -m CLI flag to override the agent's default LLM model:
  spawn codex gcp --model openai/gpt-5.3-codex

Also supports persistent per-agent model preferences via config file at
~/.config/spawn/preferences.json:
  { "models": { "codex": "openai/gpt-5.3-codex" } }

Priority: --model flag > preferences file > agent default.

This enables a future web UI to pass model selection via CLI args when
invoking spawn programmatically to provision machines.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: L <6723574+louisgv@users.noreply.github.com>
2026-03-12 18:47:09 -04:00

305 lines
10 KiB
TypeScript

// shared/orchestrate.ts — Shared orchestration pipeline for deploying agents
// Each cloud implements CloudOrchestrator and calls runOrchestration().
import type { VMConnection } from "../history.js";
import type { CloudRunner } from "./agent-setup";
import type { AgentConfig } from "./agents";
import type { SshTunnelHandle } from "./ssh";
import { readFileSync } from "node:fs";
import * as v from "valibot";
import { generateSpawnId, saveLaunchCmd, saveSpawnRecord } from "../history.js";
import { offerGithubAuth, wrapSshCall } from "./agent-setup";
import { tryTarballInstall } from "./agent-tarball";
import { generateEnvConfig } from "./agents";
import { getOrPromptApiKey } from "./oauth";
import { getSpawnPreferencesPath } from "./paths";
import { asyncTryCatch, asyncTryCatchIf, isFileError, isOperationalError, tryCatchIf } from "./result.js";
import { startSshTunnel } from "./ssh";
import { ensureSshKeys, getSshKeyOpts } from "./ssh-keys";
import { getErrorMessage } from "./type-guards";
import {
logDebug,
logInfo,
logStep,
logWarn,
openBrowser,
prepareStdinForHandoff,
validateModelId,
withRetry,
} from "./ui";
export interface CloudOrchestrator {
cloudName: string;
cloudLabel: string;
runner: CloudRunner;
/** When true, skip tarball + agent install (e.g. booting from a pre-baked snapshot). */
skipAgentInstall?: boolean;
authenticate(): Promise<void>;
checkAccountReady?(): Promise<void>;
promptSize(): Promise<void>;
createServer(name: string): Promise<VMConnection>;
getServerName(): Promise<string>;
waitForReady(): Promise<void>;
interactiveSession(cmd: string): Promise<number>;
/** Return SSH connection info for tunnel support. Omit for non-SSH clouds. */
getConnectionInfo?(): {
host: string;
user: string;
};
}
/**
* Wrap a launch command in a restart loop for cloud VMs.
* Restarts the agent on non-zero exit (crash, SIGTERM, OOM) up to MAX_RESTARTS times.
* Clean exits (exit code 0) break out of the loop immediately.
* Skipped for local execution where the user controls the process directly.
*/
function wrapWithRestartLoop(cmd: string): string {
// Shell restart loop — bash 3.x compatible (no ((var++)), no set -u)
return [
"_spawn_restarts=0",
"_spawn_max=10",
'while [ "$_spawn_restarts" -lt "$_spawn_max" ]; do',
` ${cmd}`,
" _spawn_exit=$?",
' if [ "$_spawn_exit" -eq 0 ]; then break; fi',
" _spawn_restarts=$((_spawn_restarts + 1))",
' printf "\\n[spawn] Agent exited with code %d. Restarting in 5s (%d/%d)...\\n" "$_spawn_exit" "$_spawn_restarts" "$_spawn_max" >&2',
" sleep 5",
"done",
'if [ "$_spawn_restarts" -ge "$_spawn_max" ]; then',
' printf "\\n[spawn] Agent crashed %d times. Giving up.\\n" "$_spawn_max" >&2',
"fi",
'exit "${_spawn_exit:-0}"',
].join("\n");
}
/** Options for runOrchestration (used in tests to inject mock dependencies). */
export interface OrchestrationOptions {
tryTarball?: (runner: CloudRunner, agentName: string) => Promise<boolean>;
getApiKey?: (agentSlug?: string, cloudSlug?: string) => Promise<string>;
}
/**
* Load a preferred model from ~/.config/spawn/preferences.json.
* Format: { "models": { "codex": "openai/gpt-5.3-codex", "openclaw": "anthropic/claude-sonnet-4.6" } }
* Returns null if no preference is set or the file doesn't exist.
*/
const PreferencesSchema = v.object({
models: v.optional(v.record(v.string(), v.string())),
});
function loadPreferredModel(agentName: string): string | null {
const result = tryCatchIf(isFileError, () => {
const raw = JSON.parse(readFileSync(getSpawnPreferencesPath(), "utf-8"));
const parsed = v.safeParse(PreferencesSchema, raw);
if (!parsed.success) {
return null;
}
return parsed.output.models?.[agentName] ?? null;
});
return result.ok ? result.data : null;
}
export async function runOrchestration(
cloud: CloudOrchestrator,
agent: AgentConfig,
agentName: string,
options?: OrchestrationOptions,
): Promise<void> {
logInfo(`${agent.name} on ${cloud.cloudLabel}`);
process.stderr.write("\n");
// 1. Authenticate with cloud provider
await cloud.authenticate();
// 1b. Pre-flight account readiness check (billing, email verification, etc.)
// Uses try/catch (not guarded) because hooks can throw ANY provider-specific error.
if (cloud.checkAccountReady) {
const r = await asyncTryCatch(() => cloud.checkAccountReady!());
if (!r.ok) {
logWarn("Account readiness check failed — proceeding anyway");
logDebug(getErrorMessage(r.error));
}
}
// 2. Get API key (immediately after cloud auth — before any other prompts
// so the "opening browser" message leads directly to OpenRouter OAuth)
const resolveApiKey = options?.getApiKey ?? getOrPromptApiKey;
const apiKey = await resolveApiKey(agentName, cloud.cloudName);
// 3. Pre-provision hooks (e.g., GitHub auth prompt — non-fatal)
// Uses try/catch (not guarded) because hooks can throw ANY provider-specific error.
if (agent.preProvision) {
const r = await asyncTryCatch(() => agent.preProvision!());
if (!r.ok) {
logWarn("Pre-provision hook failed — continuing");
logDebug(getErrorMessage(r.error));
}
}
// 4. Model ID — priority: --model flag (MODEL_ID env) > preferences file > agent default
const rawModelId = process.env.MODEL_ID || loadPreferredModel(agentName) || agent.modelDefault;
const modelId = rawModelId && validateModelId(rawModelId) ? rawModelId : undefined;
if (rawModelId && !modelId) {
logWarn(`Ignoring invalid MODEL_ID: ${rawModelId}`);
}
// 5. Size/bundle selection
await cloud.promptSize();
// 6. Provision server
const spawnId = generateSpawnId();
const serverName = await cloud.getServerName();
const connection = await cloud.createServer(serverName);
// 6b. Record the spawn atomically with connection data
const spawnName = process.env.SPAWN_NAME_KEBAB || process.env.SPAWN_NAME || undefined;
saveSpawnRecord({
id: spawnId,
agent: agentName,
cloud: cloud.cloudName,
timestamp: new Date().toISOString(),
...(spawnName
? {
name: spawnName,
}
: {}),
connection,
});
// 7. Wait for readiness
await cloud.waitForReady();
const envContent = generateEnvConfig(agent.envVars(apiKey));
// 8. Install agent (skip entirely for snapshot boots, try tarball first on cloud VMs)
if (cloud.skipAgentInstall) {
logInfo("Snapshot boot — skipping agent install");
} else {
let installedFromTarball = false;
const betaFeatures = new Set((process.env.SPAWN_BETA ?? "").split(",").filter(Boolean));
if (cloud.cloudName !== "local" && !agent.skipTarball && betaFeatures.has("tarball")) {
const tarball = options?.tryTarball ?? tryTarballInstall;
installedFromTarball = await tarball(cloud.runner, agentName);
}
if (!installedFromTarball) {
await agent.install();
}
}
// 9. Inject environment variables via .spawnrc
logStep("Setting up environment variables...");
const envB64 = Buffer.from(envContent).toString("base64");
const envResult = await asyncTryCatch(() =>
withRetry(
"env setup",
() =>
wrapSshCall(
cloud.runner.runServer(
`printf '%s' '${envB64}' | base64 -d > ~/.spawnrc && chmod 600 ~/.spawnrc; ` +
"for _rc in ~/.bashrc ~/.profile ~/.bash_profile ~/.zshrc; do " +
`grep -q 'source ~/.spawnrc' "$_rc" 2>/dev/null || echo '[ -f ~/.spawnrc ] && source ~/.spawnrc' >> "$_rc"; ` +
"done",
),
),
2,
5,
),
);
if (!envResult.ok) {
logWarn("Environment setup had errors");
}
// 10. Parse enabled setup steps from env (set by interactive/run prompts)
let enabledSteps: Set<string> | undefined;
const stepsEnv = process.env.SPAWN_ENABLED_STEPS;
if (stepsEnv !== undefined) {
enabledSteps = new Set(stepsEnv.split(",").filter(Boolean));
}
// 10b. Agent-specific configuration
if (agent.configure) {
const configResult = await asyncTryCatch(() =>
withRetry("agent config", () => wrapSshCall(agent.configure!(apiKey, modelId, enabledSteps)), 2, 5),
);
if (!configResult.ok) {
logWarn("Agent configuration failed (continuing with defaults)");
}
}
// GitHub CLI setup (skip if user unchecked in setup options)
if (!enabledSteps || enabledSteps.has("github")) {
await offerGithubAuth(cloud.runner);
}
// 11. Pre-launch hooks (e.g. OpenClaw gateway)
if (agent.preLaunch) {
await agent.preLaunch();
}
// 11b. SSH tunnel for web dashboard
let tunnelHandle: SshTunnelHandle | undefined;
if (agent.tunnel) {
if (cloud.getConnectionInfo) {
// SSH-based cloud: tunnel the remote port to localhost
const tunnelResult = await asyncTryCatchIf(isOperationalError, async () => {
const conn = cloud.getConnectionInfo();
const keys = await ensureSshKeys();
tunnelHandle = await startSshTunnel({
host: conn.host,
user: conn.user,
remotePort: agent.tunnel.remotePort,
sshKeyOpts: getSshKeyOpts(keys),
});
if (agent.tunnel.browserUrl) {
const url = agent.tunnel.browserUrl(tunnelHandle.localPort);
if (url) {
openBrowser(url);
}
}
});
if (!tunnelResult.ok) {
logWarn("Web dashboard tunnel failed — use the TUI instead");
}
} else if (cloud.cloudName === "local") {
// Local: no tunnel needed, open browser directly
if (agent.tunnel.browserUrl) {
const url = agent.tunnel.browserUrl(agent.tunnel.remotePort);
if (url) {
openBrowser(url);
}
}
}
}
// 11c. Agent-specific pre-launch tip (e.g. channel setup ordering hint)
if (agent.preLaunchMsg) {
process.stderr.write("\n");
logInfo(`Tip: ${agent.preLaunchMsg}`);
}
// 12. Launch interactive session
logInfo(`${agent.name} is ready`);
process.stderr.write("\n");
logInfo(`${cloud.cloudLabel} setup completed successfully!`);
process.stderr.write("\n");
logStep("Starting agent...");
// Clean up stdin state accumulated during provisioning (readline, @clack/prompts
// raw mode, keypress listeners) so Bun.spawn gets a pristine FD handoff
prepareStdinForHandoff();
const launchCmd = agent.launchCmd();
saveLaunchCmd(launchCmd, spawnId);
// Wrap in restart loop for cloud VMs — not for local execution
const sessionCmd = cloud.cloudName === "local" ? launchCmd : wrapWithRestartLoop(launchCmd);
const exitCode = await cloud.interactiveSession(sessionCmd);
if (tunnelHandle) {
tunnelHandle.stop();
}
process.exit(exitCode);
}