test(cli): subprocess integration test harness + regression suite for opencode run (#28230)

This commit is contained in:
Kit Langton 2026-05-18 18:32:20 -04:00 committed by GitHub
parent ce09fc8356
commit 0f3d168fdd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 317 additions and 35 deletions

View file

@ -0,0 +1,84 @@
// Subprocess integration tests for `opencode run` (non-interactive mode).
// These exercise the real CLI binary against a TestLLMServer running in the
// same process. See `test/lib/run-process.ts` for the harness — each test uses
// `opencode.run(message, opts?)` to spawn `bun src/index.ts run ...` with
// `OPENCODE_CONFIG_CONTENT` providing the test provider config inline.
import { describe, expect } from "bun:test"
import { Effect } from "effect"
import { runIt } from "../../lib/run-process"
describe("opencode run (non-interactive subprocess)", () => {
// Happy path: prompt completes, output reaches stdout, process exits 0.
// If this fails, all the others likely will too — debug here first.
runIt.live(
"exits 0 and writes the response to stdout on a successful prompt",
({ llm, opencode }) =>
Effect.gen(function* () {
yield* llm.text("hello from the test llm")
const result = yield* opencode.run("say hi")
opencode.expectExit(result, 0)
expect(result.stdout).toContain("hello from the test llm")
}),
60_000,
)
// Regression for #27371: an unknown model used to hang the process forever
// waiting on a session.status === idle event that never arrived. The fix
// makes the SDK call surface an error promptly so the process exits nonzero.
// We assert nonzero exit AND wall-clock under the harness timeout — a hang
// would expire the timeout and produce a different (signal-killed) failure.
runIt.live(
"exits nonzero promptly when the model is unknown (regression for #27371)",
({ opencode }) =>
Effect.gen(function* () {
const result = yield* opencode.run("say hi", {
model: "test/nonexistent-model",
timeoutMs: 15_000,
})
expect(result.exitCode).not.toBe(0)
expect(result.durationMs).toBeLessThan(15_000)
}),
30_000,
)
// Locks in the current behavior: when the LLM stream errors mid-response
// (the prompt was accepted, then the upstream provider failed), opencode
// emits a session.error event and the process exits 0 today.
//
// This is debatable — a future cleanup might flip it to exit 1. If you're
// changing this expectation, do it deliberately and say so in the PR.
runIt.live(
"mid-stream LLM error still exits 0 today (contract lock-in)",
({ llm, opencode }) =>
Effect.gen(function* () {
yield* llm.fail("upstream provider exploded mid-stream")
const result = yield* opencode.run("trigger midstream error", { timeoutMs: 30_000 })
expect(result.exitCode).toBe(0)
}),
60_000,
)
// --format json puts one JSON object per line on stdout for each emitted
// event. Consumers (CI scripts, tooling) parse this stream. Asserts the
// shape so a future event-emit change has to update this expectation.
runIt.live(
"--format json emits parseable line-delimited JSON to stdout",
({ llm, opencode }) =>
Effect.gen(function* () {
yield* llm.text("structured output")
const result = yield* opencode.run("say hi", { format: "json" })
opencode.expectExit(result, 0)
const events = opencode.parseJsonEvents(result.stdout)
expect(events.length).toBeGreaterThan(0)
for (const evt of events) {
expect(typeof evt.type).toBe("string")
expect(typeof evt.sessionID).toBe("string")
}
// At least one `text` event should appear with the LLM's response.
const text = events.find((e) => e.type === "text")
expect(text).toBeDefined()
}),
60_000,
)
})

View file

@ -0,0 +1,193 @@
// Subprocess test harness for the `opencode run` CLI.
//
// This is the missing test tier: every other `cli/run/*.test.ts` is a unit
// test of an extracted helper. Nothing actually exercises the `RunCommand`
// handler end-to-end. Bugs that span argv parsing → server boot → SDK call →
// event consumption → exit code (like the original /event race or the
// non-interactive hang #27371) are invisible to in-process tests.
//
// The harness uses opencode's built-in test affordances to spawn the real CLI
// hermetically:
// - OPENCODE_CONFIG_CONTENT : provider config inline, no files to find
// - OPENCODE_TEST_HOME : pins os.homedir() → tmpdir
// - OPENCODE_DISABLE_PROJECT_CONFIG : skip walking up for opencode.json
// - OPENCODE_PURE : skip external plugin discovery + install
// - OPENCODE_DISABLE_AUTOUPDATE / AUTOCOMPACT / MODELS_FETCH : no background work
//
// Plus HOME / XDG_* pointing at the tmpdir for belt-and-suspenders isolation.
//
// The custom `test` provider points at a TestLLMServer running in the same
// process at a random port. The CLI subprocess talks to it over real HTTP.
import type { TestOptions } from "bun:test"
import * as Scope from "effect/Scope"
import { Effect } from "effect"
import path from "node:path"
import fs from "node:fs/promises"
import os from "node:os"
import { Process } from "@/util/process"
import { TestLLMServer } from "./llm-server"
import { testProviderConfig } from "./test-provider"
import { it } from "./effect"
const opencodeRoot = path.resolve(import.meta.dir, "../../")
const cliEntry = path.join(opencodeRoot, "src/index.ts")
export const testModelID = "test/test-model"
function isolatedEnv(home: string, configJson: string): Record<string, string> {
return {
OPENCODE_TEST_HOME: home,
HOME: home,
XDG_CONFIG_HOME: path.join(home, ".config"),
XDG_DATA_HOME: path.join(home, ".local/share"),
XDG_STATE_HOME: path.join(home, ".local/state"),
XDG_CACHE_HOME: path.join(home, ".cache"),
OPENCODE_CONFIG_CONTENT: configJson,
OPENCODE_DISABLE_PROJECT_CONFIG: "1",
OPENCODE_PURE: "1",
OPENCODE_DISABLE_AUTOUPDATE: "1",
OPENCODE_DISABLE_AUTOCOMPACT: "1",
OPENCODE_DISABLE_MODELS_FETCH: "1",
OPENCODE_AUTH_CONTENT: "{}",
}
}
export type RunResult = {
readonly exitCode: number
readonly stdout: string
readonly stderr: string
readonly durationMs: number
}
type SpawnOpts = { readonly timeoutMs?: number; readonly env?: Record<string, string> }
// A `RunOpts` is the typed equivalent of constructing argv for `opencode run`.
// New flags should land here so tests stay grep-able and refactor-safe.
export type RunOpts = SpawnOpts & {
readonly model?: string
readonly agent?: string
readonly format?: "default" | "json"
readonly command?: string
readonly printLogs?: boolean
readonly extraArgs?: string[]
}
export type OpencodeCli = {
// High-level: run a single prompt against the test model.
readonly run: (message: string, opts?: RunOpts) => Effect.Effect<RunResult>
// Escape hatch: any CLI invocation with full control over argv.
readonly spawn: (args: string[], opts?: SpawnOpts) => Effect.Effect<RunResult>
// Convenience assertion. Dumps captured stderr/stdout on mismatch so CI
// failures are debuggable without re-running locally.
readonly expectExit: (result: RunResult, expected: number, label?: string) => void
// Parse `--format json` stdout into one event object per non-empty line.
// The CLI writes `JSON.stringify({ type, sessionID, ... }) + EOL` for each
// event (see src/cli/cmd/run.ts `emit`). Throws if any line is malformed
// so tests fail loudly rather than silently skipping data.
readonly parseJsonEvents: (stdout: string) => Array<Record<string, unknown>>
}
export type RunFixture = {
readonly llm: TestLLMServer["Service"]
readonly home: string
readonly opencode: OpencodeCli
}
// `withRunFixture(fn)` provisions a TestLLMServer + tmpdir + spawn helper and
// invokes fn. Cleans up the tmpdir on scope exit.
//
// Note on the R channel: TestLLMServer.layer is provided internally so the
// caller doesn't need to wire it up. The fixture's lifetime is tied to the
// surrounding Scope.
export function withRunFixture<A, E>(
fn: (input: RunFixture) => Effect.Effect<A, E>,
): Effect.Effect<A, E | unknown, Scope.Scope> {
return Effect.gen(function* () {
const llm = yield* TestLLMServer
const home = path.join(os.tmpdir(), "oc-run-" + Math.random().toString(36).slice(2))
yield* Effect.promise(() => fs.mkdir(home, { recursive: true }))
yield* Effect.addFinalizer(() =>
Effect.promise(() => fs.rm(home, { recursive: true, force: true }).catch(() => undefined)),
)
const configJson = JSON.stringify(testProviderConfig(llm.url))
const env = isolatedEnv(home, configJson)
const spawn = (
args: string[],
opts?: SpawnOpts,
): Effect.Effect<RunResult> =>
Effect.promise(async () => {
const start = Date.now()
// Process.run pipes stdout/stderr by default and returns them as Buffers.
const result = await Process.run(["bun", "run", "--conditions=browser", cliEntry, ...args], {
cwd: home,
timeout: opts?.timeoutMs ?? 30_000,
env: { ...process.env, ...env, ...opts?.env },
nothrow: true,
})
return {
exitCode: result.code,
stdout: result.stdout.toString(),
stderr: result.stderr.toString(),
durationMs: Date.now() - start,
}
})
const run = (message: string, opts?: RunOpts): Effect.Effect<RunResult> => {
const argv: string[] = ["run"]
if (opts?.printLogs) argv.push("--print-logs")
argv.push("--model", opts?.model ?? testModelID)
if (opts?.agent) argv.push("--agent", opts.agent)
if (opts?.format) argv.push("--format", opts.format)
if (opts?.command) argv.push("--command", opts.command)
if (opts?.extraArgs) argv.push(...opts.extraArgs)
argv.push(message)
return spawn(argv, opts)
}
const opencode: OpencodeCli = { run, spawn, expectExit, parseJsonEvents }
return yield* fn({ llm, home, opencode })
}).pipe(Effect.provide(TestLLMServer.layer))
}
function parseJsonEvents(stdout: string): Array<Record<string, unknown>> {
return stdout
.split("\n")
.map((line) => line.trim())
.filter((line) => line.length > 0)
.map((line) => JSON.parse(line) as Record<string, unknown>)
}
// Convenience for the common assertion pattern. Dumps stderr/stdout when
// the exit code doesn't match — saves debugging time on CI failures.
function expectExit(result: RunResult, expected: number, label = "opencode") {
if (result.exitCode === expected) return
const tail = (s: string, n: number) => (s.length > n ? "..." + s.slice(-n) : s)
// eslint-disable-next-line no-console
console.error(
`[${label}] expected exit ${expected}, got ${result.exitCode} after ${result.durationMs}ms`,
)
// eslint-disable-next-line no-console
console.error(`[${label}] stderr (last 2000):\n${tail(result.stderr, 2000)}`)
// eslint-disable-next-line no-console
console.error(`[${label}] stdout (last 500):\n${tail(result.stdout, 500)}`)
throw new Error(`${label}: expected exit ${expected}, got ${result.exitCode}`)
}
// `runIt.live(name, fixture => effect)` is the same as
// `it.live(name, () => withRunFixture(fixture))` — one fewer nesting level at
// every call site. Use this for any test that needs the opencode CLI fixture.
//
// Only `.live` is exposed because subprocess tests must run against the real
// clock — a TestClock-paused environment can't drive a child process. If you
// need `.only` or `.skip`, fall back to `it.live` + `withRunFixture` directly.
export const runIt = {
live: <A, E>(
name: string,
body: (input: RunFixture) => Effect.Effect<A, E>,
opts?: number | TestOptions,
) => it.live(name, () => withRunFixture(body), opts),
}

View file

@ -0,0 +1,37 @@
// Shared provider config for tests that need opencode to talk to a fake LLM
// over a real HTTP endpoint. Registers a single provider `test` with a single
// model `test-model` (i.e. `--model test/test-model`), pointed at the URL the
// caller supplies (typically a TestLLMServer instance).
//
// Used by:
// - test/lib/run-process.ts (subprocess CLI tests)
// - test/server/httpapi-sdk.test.ts (in-process SDK tests)
export function testProviderConfig(llmUrl: string) {
return {
formatter: false,
lsp: false,
provider: {
test: {
name: "Test",
id: "test",
env: [],
npm: "@ai-sdk/openai-compatible",
models: {
"test-model": {
id: "test-model",
name: "Test Model",
attachment: false,
reasoning: false,
temperature: false,
tool_call: true,
release_date: "2025-01-01",
limit: { context: 100_000, output: 10_000 },
cost: { input: 0, output: 0 },
options: {},
},
},
options: { apiKey: "test-key", baseURL: llmUrl },
},
},
}
}

View file

@ -23,6 +23,7 @@ import path from "path"
import { resetDatabase } from "../fixture/db"
import { disposeAllInstances, TestInstance, tmpdirScoped } from "../fixture/fixture"
import { awaitWithTimeout, testEffect } from "../lib/effect"
import { testProviderConfig } from "../lib/test-provider"
const noopBootstrap = Layer.succeed(InstanceBootstrap.Service, InstanceBootstrap.Service.of({ run: Effect.void }))
const it = testEffect(
@ -99,39 +100,6 @@ function authorization(username: string, password: string) {
return `Basic ${Buffer.from(`${username}:${password}`).toString("base64")}`
}
function providerConfig(url: string) {
return {
formatter: false,
lsp: false,
provider: {
test: {
name: "Test",
id: "test",
env: [],
npm: "@ai-sdk/openai-compatible",
models: {
"test-model": {
id: "test-model",
name: "Test Model",
attachment: false,
reasoning: false,
temperature: false,
tool_call: true,
release_date: "2025-01-01",
limit: { context: 100000, output: 10000 },
cost: { input: 0, output: 0 },
options: {},
},
},
options: {
apiKey: "test-key",
baseURL: url,
},
},
},
}
}
function call<T>(request: () => Promise<T>) {
return Effect.promise(request)
}
@ -283,7 +251,7 @@ function withStandardProject<A, E>(
function withFakeLlm<A, E>(serverPath: ServerPath, run: (input: LlmProjectFixture) => Effect.Effect<A, E, TestScope>) {
return Effect.gen(function* () {
const llm = yield* TestLLMServer
return yield* withProject(serverPath, { config: providerConfig(llm.url) }, (input) => run({ ...input, llm }))
return yield* withProject(serverPath, { config: testProviderConfig(llm.url) }, (input) => run({ ...input, llm }))
}).pipe(Effect.provide(TestLLMServer.layer))
}
@ -297,7 +265,7 @@ function withFakeLlmProject<A, E>(
return yield* withProject(
serverPath,
{
config: providerConfig(llm.url),
config: testProviderConfig(llm.url),
setup: options.setup,
},
(input) => run({ ...input, llm }),