test(cli): subprocess integration test harness + regression suite for opencode run (#28230)

2026-05-20 18:01:41 +00:00 · 2026-05-18 18:32:20 -04:00 · 2026-05-18 18:32:20 -04:00 · 0f3d168fdd
commit 0f3d168fdd
parent ce09fc8356
4 changed files with 317 additions and 35 deletions
--- a/packages/opencode/test/cli/run/run-process.test.ts
+++ b/packages/opencode/test/cli/run/run-process.test.ts
@ -0,0 +1,84 @@
+// Subprocess integration tests for `opencode run` (non-interactive mode).
+// These exercise the real CLI binary against a TestLLMServer running in the
+// same process. See `test/lib/run-process.ts` for the harness — each test uses
+// `opencode.run(message, opts?)` to spawn `bun src/index.ts run ...` with
+// `OPENCODE_CONFIG_CONTENT` providing the test provider config inline.
+import { describe, expect } from "bun:test"
+import { Effect } from "effect"
+import { runIt } from "../../lib/run-process"
+
+describe("opencode run (non-interactive subprocess)", () => {
+  // Happy path: prompt completes, output reaches stdout, process exits 0.
+  // If this fails, all the others likely will too — debug here first.
+  runIt.live(
+    "exits 0 and writes the response to stdout on a successful prompt",
+    ({ llm, opencode }) =>
+      Effect.gen(function* () {
+        yield* llm.text("hello from the test llm")
+        const result = yield* opencode.run("say hi")
+        opencode.expectExit(result, 0)
+        expect(result.stdout).toContain("hello from the test llm")
+      }),
+    60_000,
+  )
+
+  // Regression for #27371: an unknown model used to hang the process forever
+  // waiting on a session.status === idle event that never arrived. The fix
+  // makes the SDK call surface an error promptly so the process exits nonzero.
+  // We assert nonzero exit AND wall-clock under the harness timeout — a hang
+  // would expire the timeout and produce a different (signal-killed) failure.
+  runIt.live(
+    "exits nonzero promptly when the model is unknown (regression for #27371)",
+    ({ opencode }) =>
+      Effect.gen(function* () {
+        const result = yield* opencode.run("say hi", {
+          model: "test/nonexistent-model",
+          timeoutMs: 15_000,
+        })
+        expect(result.exitCode).not.toBe(0)
+        expect(result.durationMs).toBeLessThan(15_000)
+      }),
+    30_000,
+  )
+
+  // Locks in the current behavior: when the LLM stream errors mid-response
+  // (the prompt was accepted, then the upstream provider failed), opencode
+  // emits a session.error event and the process exits 0 today.
+  //
+  // This is debatable — a future cleanup might flip it to exit 1. If you're
+  // changing this expectation, do it deliberately and say so in the PR.
+  runIt.live(
+    "mid-stream LLM error still exits 0 today (contract lock-in)",
+    ({ llm, opencode }) =>
+      Effect.gen(function* () {
+        yield* llm.fail("upstream provider exploded mid-stream")
+        const result = yield* opencode.run("trigger midstream error", { timeoutMs: 30_000 })
+        expect(result.exitCode).toBe(0)
+      }),
+    60_000,
+  )
+
+  // --format json puts one JSON object per line on stdout for each emitted
+  // event. Consumers (CI scripts, tooling) parse this stream. Asserts the
+  // shape so a future event-emit change has to update this expectation.
+  runIt.live(
+    "--format json emits parseable line-delimited JSON to stdout",
+    ({ llm, opencode }) =>
+      Effect.gen(function* () {
+        yield* llm.text("structured output")
+        const result = yield* opencode.run("say hi", { format: "json" })
+        opencode.expectExit(result, 0)
+
+        const events = opencode.parseJsonEvents(result.stdout)
+        expect(events.length).toBeGreaterThan(0)
+        for (const evt of events) {
+          expect(typeof evt.type).toBe("string")
+          expect(typeof evt.sessionID).toBe("string")
+        }
+        // At least one `text` event should appear with the LLM's response.
+        const text = events.find((e) => e.type === "text")
+        expect(text).toBeDefined()
+      }),
+    60_000,
+  )
+})
--- a/packages/opencode/test/lib/run-process.ts
+++ b/packages/opencode/test/lib/run-process.ts
@ -0,0 +1,193 @@
+// Subprocess test harness for the `opencode run` CLI.
+//
+// This is the missing test tier: every other `cli/run/*.test.ts` is a unit
+// test of an extracted helper. Nothing actually exercises the `RunCommand`
+// handler end-to-end. Bugs that span argv parsing → server boot → SDK call →
+// event consumption → exit code (like the original /event race or the
+// non-interactive hang #27371) are invisible to in-process tests.
+//
+// The harness uses opencode's built-in test affordances to spawn the real CLI
+// hermetically:
+//   - OPENCODE_CONFIG_CONTENT  : provider config inline, no files to find
+//   - OPENCODE_TEST_HOME       : pins os.homedir() → tmpdir
+//   - OPENCODE_DISABLE_PROJECT_CONFIG : skip walking up for opencode.json
+//   - OPENCODE_PURE            : skip external plugin discovery + install
+//   - OPENCODE_DISABLE_AUTOUPDATE / AUTOCOMPACT / MODELS_FETCH : no background work
+//
+// Plus HOME / XDG_* pointing at the tmpdir for belt-and-suspenders isolation.
+//
+// The custom `test` provider points at a TestLLMServer running in the same
+// process at a random port. The CLI subprocess talks to it over real HTTP.
+import type { TestOptions } from "bun:test"
+import * as Scope from "effect/Scope"
+import { Effect } from "effect"
+import path from "node:path"
+import fs from "node:fs/promises"
+import os from "node:os"
+import { Process } from "@/util/process"
+import { TestLLMServer } from "./llm-server"
+import { testProviderConfig } from "./test-provider"
+import { it } from "./effect"
+
+const opencodeRoot = path.resolve(import.meta.dir, "../../")
+const cliEntry = path.join(opencodeRoot, "src/index.ts")
+
+export const testModelID = "test/test-model"
+
+function isolatedEnv(home: string, configJson: string): Record<string, string> {
+  return {
+    OPENCODE_TEST_HOME: home,
+    HOME: home,
+    XDG_CONFIG_HOME: path.join(home, ".config"),
+    XDG_DATA_HOME: path.join(home, ".local/share"),
+    XDG_STATE_HOME: path.join(home, ".local/state"),
+    XDG_CACHE_HOME: path.join(home, ".cache"),
+    OPENCODE_CONFIG_CONTENT: configJson,
+    OPENCODE_DISABLE_PROJECT_CONFIG: "1",
+    OPENCODE_PURE: "1",
+    OPENCODE_DISABLE_AUTOUPDATE: "1",
+    OPENCODE_DISABLE_AUTOCOMPACT: "1",
+    OPENCODE_DISABLE_MODELS_FETCH: "1",
+    OPENCODE_AUTH_CONTENT: "{}",
+  }
+}
+
+export type RunResult = {
+  readonly exitCode: number
+  readonly stdout: string
+  readonly stderr: string
+  readonly durationMs: number
+}
+
+type SpawnOpts = { readonly timeoutMs?: number; readonly env?: Record<string, string> }
+
+// A `RunOpts` is the typed equivalent of constructing argv for `opencode run`.
+// New flags should land here so tests stay grep-able and refactor-safe.
+export type RunOpts = SpawnOpts & {
+  readonly model?: string
+  readonly agent?: string
+  readonly format?: "default" | "json"
+  readonly command?: string
+  readonly printLogs?: boolean
+  readonly extraArgs?: string[]
+}
+
+export type OpencodeCli = {
+  // High-level: run a single prompt against the test model.
+  readonly run: (message: string, opts?: RunOpts) => Effect.Effect<RunResult>
+  // Escape hatch: any CLI invocation with full control over argv.
+  readonly spawn: (args: string[], opts?: SpawnOpts) => Effect.Effect<RunResult>
+  // Convenience assertion. Dumps captured stderr/stdout on mismatch so CI
+  // failures are debuggable without re-running locally.
+  readonly expectExit: (result: RunResult, expected: number, label?: string) => void
+  // Parse `--format json` stdout into one event object per non-empty line.
+  // The CLI writes `JSON.stringify({ type, sessionID, ... }) + EOL` for each
+  // event (see src/cli/cmd/run.ts `emit`). Throws if any line is malformed
+  // so tests fail loudly rather than silently skipping data.
+  readonly parseJsonEvents: (stdout: string) => Array<Record<string, unknown>>
+}
+
+export type RunFixture = {
+  readonly llm: TestLLMServer["Service"]
+  readonly home: string
+  readonly opencode: OpencodeCli
+}
+
+// `withRunFixture(fn)` provisions a TestLLMServer + tmpdir + spawn helper and
+// invokes fn. Cleans up the tmpdir on scope exit.
+//
+// Note on the R channel: TestLLMServer.layer is provided internally so the
+// caller doesn't need to wire it up. The fixture's lifetime is tied to the
+// surrounding Scope.
+export function withRunFixture<A, E>(
+  fn: (input: RunFixture) => Effect.Effect<A, E>,
+): Effect.Effect<A, E | unknown, Scope.Scope> {
+  return Effect.gen(function* () {
+    const llm = yield* TestLLMServer
+
+    const home = path.join(os.tmpdir(), "oc-run-" + Math.random().toString(36).slice(2))
+    yield* Effect.promise(() => fs.mkdir(home, { recursive: true }))
+    yield* Effect.addFinalizer(() =>
+      Effect.promise(() => fs.rm(home, { recursive: true, force: true }).catch(() => undefined)),
+    )
+
+    const configJson = JSON.stringify(testProviderConfig(llm.url))
+    const env = isolatedEnv(home, configJson)
+
+    const spawn = (
+      args: string[],
+      opts?: SpawnOpts,
+    ): Effect.Effect<RunResult> =>
+      Effect.promise(async () => {
+        const start = Date.now()
+        // Process.run pipes stdout/stderr by default and returns them as Buffers.
+        const result = await Process.run(["bun", "run", "--conditions=browser", cliEntry, ...args], {
+          cwd: home,
+          timeout: opts?.timeoutMs ?? 30_000,
+          env: { ...process.env, ...env, ...opts?.env },
+          nothrow: true,
+        })
+        return {
+          exitCode: result.code,
+          stdout: result.stdout.toString(),
+          stderr: result.stderr.toString(),
+          durationMs: Date.now() - start,
+        }
+      })
+
+    const run = (message: string, opts?: RunOpts): Effect.Effect<RunResult> => {
+      const argv: string[] = ["run"]
+      if (opts?.printLogs) argv.push("--print-logs")
+      argv.push("--model", opts?.model ?? testModelID)
+      if (opts?.agent) argv.push("--agent", opts.agent)
+      if (opts?.format) argv.push("--format", opts.format)
+      if (opts?.command) argv.push("--command", opts.command)
+      if (opts?.extraArgs) argv.push(...opts.extraArgs)
+      argv.push(message)
+      return spawn(argv, opts)
+    }
+
+    const opencode: OpencodeCli = { run, spawn, expectExit, parseJsonEvents }
+
+    return yield* fn({ llm, home, opencode })
+  }).pipe(Effect.provide(TestLLMServer.layer))
+}
+
+function parseJsonEvents(stdout: string): Array<Record<string, unknown>> {
+  return stdout
+    .split("\n")
+    .map((line) => line.trim())
+    .filter((line) => line.length > 0)
+    .map((line) => JSON.parse(line) as Record<string, unknown>)
+}
+
+// Convenience for the common assertion pattern. Dumps stderr/stdout when
+// the exit code doesn't match — saves debugging time on CI failures.
+function expectExit(result: RunResult, expected: number, label = "opencode") {
+  if (result.exitCode === expected) return
+  const tail = (s: string, n: number) => (s.length > n ? "..." + s.slice(-n) : s)
+  // eslint-disable-next-line no-console
+  console.error(
+    `[${label}] expected exit ${expected}, got ${result.exitCode} after ${result.durationMs}ms`,
+  )
+  // eslint-disable-next-line no-console
+  console.error(`[${label}] stderr (last 2000):\n${tail(result.stderr, 2000)}`)
+  // eslint-disable-next-line no-console
+  console.error(`[${label}] stdout (last 500):\n${tail(result.stdout, 500)}`)
+  throw new Error(`${label}: expected exit ${expected}, got ${result.exitCode}`)
+}
+
+// `runIt.live(name, fixture => effect)` is the same as
+// `it.live(name, () => withRunFixture(fixture))` — one fewer nesting level at
+// every call site. Use this for any test that needs the opencode CLI fixture.
+//
+// Only `.live` is exposed because subprocess tests must run against the real
+// clock — a TestClock-paused environment can't drive a child process. If you
+// need `.only` or `.skip`, fall back to `it.live` + `withRunFixture` directly.
+export const runIt = {
+  live: <A, E>(
+    name: string,
+    body: (input: RunFixture) => Effect.Effect<A, E>,
+    opts?: number | TestOptions,
+  ) => it.live(name, () => withRunFixture(body), opts),
+}
--- a/packages/opencode/test/lib/test-provider.ts
+++ b/packages/opencode/test/lib/test-provider.ts
@ -0,0 +1,37 @@
+// Shared provider config for tests that need opencode to talk to a fake LLM
+// over a real HTTP endpoint. Registers a single provider `test` with a single
+// model `test-model` (i.e. `--model test/test-model`), pointed at the URL the
+// caller supplies (typically a TestLLMServer instance).
+//
+// Used by:
+//   - test/lib/run-process.ts          (subprocess CLI tests)
+//   - test/server/httpapi-sdk.test.ts  (in-process SDK tests)
+export function testProviderConfig(llmUrl: string) {
+  return {
+    formatter: false,
+    lsp: false,
+    provider: {
+      test: {
+        name: "Test",
+        id: "test",
+        env: [],
+        npm: "@ai-sdk/openai-compatible",
+        models: {
+          "test-model": {
+            id: "test-model",
+            name: "Test Model",
+            attachment: false,
+            reasoning: false,
+            temperature: false,
+            tool_call: true,
+            release_date: "2025-01-01",
+            limit: { context: 100_000, output: 10_000 },
+            cost: { input: 0, output: 0 },
+            options: {},
+          },
+        },
+        options: { apiKey: "test-key", baseURL: llmUrl },
+      },
+    },
+  }
+}
--- a/packages/opencode/test/server/httpapi-sdk.test.ts
+++ b/packages/opencode/test/server/httpapi-sdk.test.ts
@ -23,6 +23,7 @@ import path from "path"
 import { resetDatabase } from "../fixture/db"
 import { disposeAllInstances, TestInstance, tmpdirScoped } from "../fixture/fixture"
 import { awaitWithTimeout, testEffect } from "../lib/effect"
+import { testProviderConfig } from "../lib/test-provider"

 const noopBootstrap = Layer.succeed(InstanceBootstrap.Service, InstanceBootstrap.Service.of({ run: Effect.void }))
 const it = testEffect(
@ -99,39 +100,6 @@ function authorization(username: string, password: string) {
  return `Basic ${Buffer.from(`${username}:${password}`).toString("base64")}`
 }

-function providerConfig(url: string) {
-  return {
-    formatter: false,
-    lsp: false,
-    provider: {
-      test: {
-        name: "Test",
-        id: "test",
-        env: [],
-        npm: "@ai-sdk/openai-compatible",
-        models: {
-          "test-model": {
-            id: "test-model",
-            name: "Test Model",
-            attachment: false,
-            reasoning: false,
-            temperature: false,
-            tool_call: true,
-            release_date: "2025-01-01",
-            limit: { context: 100000, output: 10000 },
-            cost: { input: 0, output: 0 },
-            options: {},
-          },
-        },
-        options: {
-          apiKey: "test-key",
-          baseURL: url,
-        },
-      },
-    },
-  }
-}
-
 function call<T>(request: () => Promise<T>) {
  return Effect.promise(request)
 }
@ -283,7 +251,7 @@ function withStandardProject<A, E>(
 function withFakeLlm<A, E>(serverPath: ServerPath, run: (input: LlmProjectFixture) => Effect.Effect<A, E, TestScope>) {
  return Effect.gen(function* () {
    const llm = yield* TestLLMServer
-    return yield* withProject(serverPath, { config: providerConfig(llm.url) }, (input) => run({ ...input, llm }))
+    return yield* withProject(serverPath, { config: testProviderConfig(llm.url) }, (input) => run({ ...input, llm }))
  }).pipe(Effect.provide(TestLLMServer.layer))
 }

@ -297,7 +265,7 @@ function withFakeLlmProject<A, E>(
    return yield* withProject(
      serverPath,
      {
-        config: providerConfig(llm.url),
+        config: testProviderConfig(llm.url),
        setup: options.setup,
      },
      (input) => run({ ...input, llm }),