Refactor LLM route-first provider API (#28523)

2026-05-30 20:44:31 +00:00 · 2026-05-20 20:15:52 -04:00 · 2026-05-20 20:15:52 -04:00 · 41f6daf96a
commit 41f6daf96a
parent 5381795844
87 changed files with 2450 additions and 1520 deletions
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@ -4,7 +4,7 @@ import { Context, Effect, Layer, Record } from "effect"
 import * as Stream from "effect/Stream"
 import { streamText, wrapLanguageModel, type ModelMessage, type Tool, tool as aiTool, jsonSchema } from "ai"
 import type { LLMEvent } from "@opencode-ai/llm"
-import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
+import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route"
 import type { LLMClientService } from "@opencode-ai/llm/route"
 import { mergeDeep } from "remeda"
 import { GitLabWorkflowLanguageModel } from "gitlab-ai-provider"
@ -349,6 +349,8 @@ const live: Layer.Layer<
        ...headers,
      }

+      // Runtime seam: native is an opt-in adapter over @opencode-ai/llm. It
+      // either returns a ready LLMEvent stream or a concrete fallback reason.
      if (flags.experimentalNativeLlm) {
        const native = LLMNativeRuntime.stream({
          model: input.model,
@ -399,6 +401,8 @@ const live: Layer.Layer<
          "llm.model": input.model.id,
        }),
      )
+      // Default runtime path: AI SDK owns provider execution and tool dispatch;
+      // LLMAISDK.toLLMEvents below normalizes fullStream parts for the processor.
      return {
        type: "ai-sdk" as const,
        result: streamText({
@ -481,6 +485,8 @@ const live: Layer.Layer<

            if (result.type === "native") return result.stream

+            // Adapter seam: both runtimes expose the same LLMEvent stream. Native
+            // already returns one; AI SDK streams are converted here.
            const state = LLMAISDK.adapterState()
            return Stream.fromAsyncIterable(result.result.fullStream, (e) =>
              e instanceof Error ? e : new Error(String(e)),
@ -504,7 +510,9 @@ export const defaultLayer = Layer.suspend(() =>
    Layer.provide(Config.defaultLayer),
    Layer.provide(Provider.defaultLayer),
    Layer.provide(Plugin.defaultLayer),
-    Layer.provide(LLMClient.layer.pipe(Layer.provide(RequestExecutor.defaultLayer))),
+    Layer.provide(
+      LLMClient.layer.pipe(Layer.provide(Layer.mergeAll(RequestExecutor.defaultLayer, WebSocketExecutor.layer))),
+    ),
    Layer.provide(RuntimeFlags.defaultLayer),
  ),
 )
--- a/packages/opencode/src/session/llm/AGENTS.md
+++ b/packages/opencode/src/session/llm/AGENTS.md
@ -1,6 +1,6 @@
 # Session LLM Runtime Boundaries

-`../llm.ts` is the opencode session LLM service. It owns opencode concerns: auth, config, model/provider resolution, plugins, permissions, telemetry headers, and runtime selection.
+`../llm.ts` is the opencode session LLM service. It owns opencode concerns: auth, config, model/provider resolution, plugins, permissions, telemetry headers, and runtime selection. It is the only file in this area that should know about the full session request shape.

 This folder contains adapters behind that service boundary:

@ -8,6 +8,29 @@ This folder contains adapters behind that service boundary:
 - `native-request.ts` converts opencode's normalized session input into a native `@opencode-ai/llm` `LLMRequest`. It does not execute requests.
 - `native-runtime.ts` is the opt-in native runtime adapter. It decides whether a selected model is supported, builds the native request, bridges opencode tools into native executable tools, and delegates transport to `LLMClient` / `RequestExecutor`.

+## File Structure
+
+```txt
+src/session/
+  llm.ts                    session-owned orchestration and runtime selection
+  llm/
+    AGENTS.md               boundary notes for the adapter layer
+    ai-sdk.ts               AI SDK fullStream -> @opencode-ai/llm LLMEvent adapter
+    native-request.ts       opencode/AI SDK-shaped input -> @opencode-ai/llm LLMRequest
+    native-runtime.ts       native runtime gate, tool bridge, and LLMClient handoff
+```
+
+Integration points:
+
+- `../llm.ts` imports `LLMClient` from `@opencode-ai/llm/route`; native execution is the only path that calls it directly.
+- `../llm.ts` imports `LLMAISDK` from `./llm/ai-sdk`; the AI SDK path still calls `streamText(...)` locally, then adapts `result.fullStream` into shared `LLMEvent`s.
+- `../llm.ts` imports `LLMNativeRuntime` from `./llm/native-runtime`; this is the runtime-selection seam. Unsupported native requests return a reason and fall back to AI SDK.
+- `native-runtime.ts` imports `LLMNative` from `./native-request`; this keeps request lowering separate from transport and tool execution.
+- `native-request.ts` is the only adapter file that should construct `LLM.request(...)`, `LLM.model(...)`, `Message.*`, `SystemPart`, `ToolCallPart`, `ToolResultPart`, or `ToolDefinition` values from `@opencode-ai/llm`.
+- `ai-sdk.ts` and `native-runtime.ts` both emit `@opencode-ai/llm` `LLMEvent`s so downstream session processing does not care which runtime handled the request.
+
+Keep new integration code on one of these seams. Avoid importing session services into `native-request.ts`; pass normalized data through `RequestInput` instead.
+
 ## Runtime selection

 Both runtimes converge on the same `LLMEvent` stream consumed by the session processor. The gate is per-request: a single session can route some calls through native and fall back for others.
@ -63,5 +86,5 @@ Safety boundary:

 - AI SDK remains the default.
 - `OPENCODE_EXPERIMENTAL_NATIVE_LLM=true` or the umbrella `OPENCODE_EXPERIMENTAL=true` opts in. Native is not a global replacement.
- Native execution currently runs only for OpenAI-compatible Responses models exposed through `@ai-sdk/openai`: direct `openai` API-key auth and console-managed `opencode`/Zen API-key config.
+- Native execution currently supports OpenAI, opencode-managed OpenAI-compatible, and Anthropic API-key paths backed by `@ai-sdk/openai`, `@ai-sdk/openai-compatible`, or `@ai-sdk/anthropic` catalog entries.
 - Unsupported providers, OpenAI OAuth, and missing API-key cases fall back to AI SDK.
--- a/packages/opencode/src/session/llm/native-request.ts
+++ b/packages/opencode/src/session/llm/native-request.ts
@ -1,6 +1,14 @@
 import type { JsonSchema, LLMRequest, ProviderMetadata } from "@opencode-ai/llm"
 import { LLM, Message, SystemPart, ToolCallPart, ToolDefinition, ToolResultPart } from "@opencode-ai/llm"
-import "@opencode-ai/llm/providers"
+import {
+  AmazonBedrock,
+  Anthropic,
+  Azure,
+  Google,
+  OpenAI,
+  OpenAICompatible,
+  OpenRouter,
+} from "@opencode-ai/llm/providers"
 import type { ModelMessage } from "ai"
 import type { Provider } from "@/provider/provider"
 import { isRecord } from "@/util/record"
@ -26,24 +34,6 @@ export type RequestInput = {
  readonly headers?: Record<string, string>
 }

-const DEFAULT_BASE_URL: Record<string, string> = {
-  "@ai-sdk/openai": "https://api.openai.com/v1",
-  "@ai-sdk/anthropic": "https://api.anthropic.com/v1",
-  "@ai-sdk/google": "https://generativelanguage.googleapis.com/v1beta",
-  "@ai-sdk/amazon-bedrock": "https://bedrock-runtime.us-east-1.amazonaws.com",
-  "@openrouter/ai-sdk-provider": "https://openrouter.ai/api/v1",
-}
-
-const ROUTE: Record<string, string> = {
-  "@ai-sdk/openai": "openai-responses",
-  "@ai-sdk/azure": "azure-openai-responses",
-  "@ai-sdk/anthropic": "anthropic-messages",
-  "@ai-sdk/google": "gemini",
-  "@ai-sdk/amazon-bedrock": "bedrock-converse",
-  "@ai-sdk/openai-compatible": "openai-compatible-chat",
-  "@openrouter/ai-sdk-provider": "openrouter",
-}
-
 const providerMetadata = (value: unknown): ProviderMetadata | undefined => {
  if (!isRecord(value)) return undefined
  const result = Object.fromEntries(
@ -147,33 +137,46 @@ const generation = (input: RequestInput) => {
  return Object.values(result).some((value) => value !== undefined) ? result : undefined
 }

-const baseURL = (model: Provider.Model) => {
-  if (model.api.url) return model.api.url
-  const fallback = DEFAULT_BASE_URL[model.api.npm]
-  if (fallback) return fallback
+const baseURL = (input: Provider.Model | RequestInput) =>
+  "model" in input ? (input.baseURL ?? (input.model.api.url || undefined)) : input.api.url || undefined
+
+const requireBaseURL = (model: Provider.Model, url: string | undefined) => {
+  if (url) return url
  throw new Error(`Native LLM request adapter requires a base URL for ${model.providerID}/${model.id}`)
 }

 export const model = (input: Provider.Model | RequestInput, headers?: Record<string, string>) => {
  const model = "model" in input ? input.model : input
-  const route = ROUTE[model.api.npm]
-  if (!route) throw new Error(`Native LLM request adapter does not support provider package ${model.api.npm}`)
-  return LLM.model({
-    id: model.api.id,
-    provider: model.providerID,
-    route,
-    baseURL: "model" in input && input.baseURL ? input.baseURL : baseURL(model),
-    apiKey: "model" in input ? input.apiKey : undefined,
+  const url = baseURL(input)
+  const options = {
+    ...("model" in input && input.apiKey ? { apiKey: input.apiKey } : {}),
+    ...(url ? { baseURL: url } : {}),
    headers: Object.keys({ ...model.headers, ...headers }).length === 0 ? undefined : { ...model.headers, ...headers },
    limits: {
      context: model.limit.context,
      output: model.limit.output,
    },
-  })
+  }
+  if (model.api.npm === "@ai-sdk/openai") return OpenAI.configure(options).responses(model.api.id)
+  if (model.api.npm === "@ai-sdk/azure")
+    return Azure.configure({ ...options, baseURL: requireBaseURL(model, url) }).responses(model.api.id)
+  if (model.api.npm === "@ai-sdk/anthropic") return Anthropic.configure(options).model(model.api.id)
+  if (model.api.npm === "@ai-sdk/google") return Google.configure(options).model(model.api.id)
+  if (model.api.npm === "@ai-sdk/amazon-bedrock") return AmazonBedrock.configure(options).model(model.api.id)
+  if (model.api.npm === "@ai-sdk/openai-compatible")
+    return OpenAICompatible.configure({
+      ...options,
+      provider: String(model.providerID),
+      baseURL: requireBaseURL(model, url),
+    }).model(model.api.id)
+  if (model.api.npm === "@openrouter/ai-sdk-provider") return OpenRouter.configure(options).model(model.api.id)
+  throw new Error(`Native LLM request adapter does not support provider package ${model.api.npm}`)
 }

 export const request = (input: RequestInput) => {
  const converted = messages(input.messages)
+  // This is the only native adapter boundary that should construct canonical
+  // @opencode-ai/llm request objects from opencode's session/AI SDK-shaped data.
  return LLM.request({
    model: model(input, input.headers),
    system: [...(input.system ?? []).map(SystemPart.make), ...converted.system],
--- a/packages/opencode/src/session/llm/native-runtime.ts
+++ b/packages/opencode/src/session/llm/native-runtime.ts
@ -41,8 +41,8 @@ export function status(input: Pick<StreamInput, "model" | "provider" | "auth">):
  if (providerID !== "openai" && providerID !== "anthropic" && !providerID.startsWith("opencode"))
    return { type: "unsupported", reason: "provider is not openai, opencode, or anthropic" }
  const npm = input.model.api.npm
-  if (npm !== "@ai-sdk/openai" && npm !== "@ai-sdk/anthropic")
-    return { type: "unsupported", reason: "provider package is not OpenAI or Anthropic" }
+  if (npm !== "@ai-sdk/openai" && npm !== "@ai-sdk/openai-compatible" && npm !== "@ai-sdk/anthropic")
+    return { type: "unsupported", reason: "provider package is not OpenAI, OpenAI-compatible, or Anthropic" }
  if (input.auth?.type === "oauth") return { type: "unsupported", reason: "OAuth auth is not supported" }

  const apiKey = typeof input.provider.options.apiKey === "string" ? input.provider.options.apiKey : input.provider.key
@ -59,6 +59,8 @@ export function stream(input: StreamInput): StreamResult {
  const current = status(input)
  if (current.type === "unsupported") return current

+  // Integration point with @opencode-ai/llm: native-request lowers session data
+  // into an LLMRequest, then LLMClient handles route selection and transport.
  return {
    ...current,
    stream: input.llmClient.stream({
@ -99,6 +101,8 @@ export function nativeTools(tools: Record<string, Tool>, input: Pick<StreamInput
  return Object.fromEntries(
    Object.entries(tools).map(([name, item]) => [
      name,
+      // Tool execution remains opencode-owned. The native runtime only adapts
+      // the @opencode-ai/llm tool call back into the AI SDK Tool.execute shape.
      nativeTool({
        description: item.description ?? "",
        jsonSchema: nativeSchema(item.inputSchema),
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@ -278,9 +278,11 @@ export const layer = Layer.effect(
        return { call: ctx.toolcalls[input.id], part }
      })

-      const isFilePart = Schema.is(MessageV2.FilePart)
+      const isFilePart = (value: unknown): value is MessageV2.FilePart => Schema.is(MessageV2.FilePart)(value)

-      const toolResultOutput = (value: Extract<StreamEvent, { type: "tool-result" }>) => {
+      const toolResultOutput = (
+        value: Extract<StreamEvent, { type: "tool-result" }>,
+      ): { title: string; metadata: Record<string, any>; output: string; attachments?: MessageV2.FilePart[] } => {
        if (isRecord(value.result.value) && typeof value.result.value.output === "string") {
          return {
            title: typeof value.result.value.title === "string" ? value.result.value.title : value.name,
--- a/packages/opencode/test/server/httpapi-event-diagnostics.test.ts
+++ b/packages/opencode/test/server/httpapi-event-diagnostics.test.ts
@ -56,11 +56,11 @@ afterEach(async () => {
 })

 const inApp = <A, E>(eff: Effect.Effect<A, E, AppServices>) =>
-  Effect.flatMap(InstanceRef, (ctx) =>
-    ctx
-      ? Effect.promise(() => AppRuntime.runPromise(eff.pipe(Effect.provideService(InstanceRef, ctx))))
-      : Effect.die("InstanceRef not provided in test scope"),
-  )
+  Effect.gen(function* () {
+    const ctx = yield* InstanceRef
+    if (!ctx) return yield* Effect.die("InstanceRef not provided in test scope")
+    return yield* Effect.promise(() => AppRuntime.runPromise(eff.pipe(Effect.provideService(InstanceRef, ctx))))
+  })

 const publishConnected = inApp(Bus.Service.use((svc) => svc.publish(ServerEvent.Connected, {})))

@ -112,7 +112,7 @@ const readNextEvent = (reader: ReadableStreamDefaultReader<Uint8Array>) =>
      if (result.done || !result.value) return Effect.fail(new Error("event stream closed"))
      const frames = decodeFrame(result.value)
      if (frames.length === 0) return Effect.fail(new Error("empty SSE frame"))
-      return Effect.succeed(frames[0]!)
+      return Effect.succeed(frames[0])
    }),
  )

@ -186,8 +186,7 @@ describe("/event SSE delivery diagnostics", () => {

        const collected = yield* collectUntilEvent(reader, isPartUpdated)
        const updated = collected.find(isPartUpdated)
-        expect(updated).toBeDefined()
-        expect((updated as SseEvent).properties.part.id).toBe(partID)
+        expect(updated?.properties.part.id).toBe(partID)
      }),
    { git: true, config: { formatter: false, lsp: false } },
  )
@ -217,7 +216,7 @@ describe("/event SSE delivery diagnostics", () => {
          }),
        )
        expect(event.type).toBe(MessageV2.Event.PartUpdated.type)
-        expect((event.properties as { part: { id: string } }).part.id).toBe(partID)
+        expect(event.properties).toMatchObject({ part: { id: partID } })
      }),
    { git: true, config: { formatter: false, lsp: false } },
  )
--- a/packages/opencode/test/session/llm-native-recorded.test.ts
+++ b/packages/opencode/test/session/llm-native-recorded.test.ts
@ -13,7 +13,7 @@ import { Provider } from "@/provider/provider"
 import { ModelID, ProviderID } from "@/provider/schema"
 import { Filesystem } from "@/util/filesystem"
 import { LLMEvent, LLMResponse } from "@opencode-ai/llm"
-import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
+import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route"
 import { RuntimeFlags } from "@/effect/runtime-flags"
 import type { Agent } from "../../src/agent/agent"
 import { LLM } from "../../src/session/llm"
@ -137,7 +137,7 @@ async function loadFixture(providerID: string, modelID: string) {
 function recordedNativeLLMLayer(spec: ProviderSpec) {
  // Only the HTTP client is recorded; RequestExecutor and the opencode LLM stack remain real.
  const recordedClient = LLMClient.layer.pipe(
-    Layer.provide(RequestExecutor.layer),
+    Layer.provide(Layer.mergeAll(RequestExecutor.layer, WebSocketExecutor.layer)),
    Layer.provide(
      HttpRecorder.recordingLayer(spec.cassette, {
        mode: shouldRecord ? "record" : "replay",
--- a/packages/opencode/test/session/llm-native.test.ts
+++ b/packages/opencode/test/session/llm-native.test.ts
@ -1,8 +1,8 @@
 import { describe, expect, test } from "bun:test"
 import { ToolFailure } from "@opencode-ai/llm"
-import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
+import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route"
 import { jsonSchema, tool, type ModelMessage } from "ai"
-import { Effect } from "effect"
+import { Effect, Layer } from "effect"
 import { LLMNative } from "@/session/llm/native-request"
 import { LLMNativeRuntime } from "@/session/llm/native-runtime"
 import type { Provider } from "@/provider/provider"
@ -138,16 +138,16 @@ describe("session.llm-native.request", () => {
    expect(request.model).toMatchObject({
      id: "gpt-5-mini",
      provider: "openai",
-      route: "openai-responses",
-      baseURL: "https://api.openai.com/v1",
-      headers: {
-        "x-model": "model-header",
-        "x-request": "request-header",
-      },
-      limits: {
-        context: 128_000,
-        output: 32_000,
-      },
+      route: { id: "openai-responses" },
+    })
+    expect(request.model.route.endpoint.baseURL).toBe("https://api.openai.com/v1")
+    expect(request.model.route.defaults.headers).toEqual({
+      "x-model": "model-header",
+      "x-request": "request-header",
+    })
+    expect(request.model.route.defaults.limits).toMatchObject({
+      context: 128_000,
+      output: 32_000,
    })
    expect(request.system).toEqual([
      { type: "text", text: "agent system" },
@ -211,29 +211,50 @@ describe("session.llm-native.request", () => {
    ])
  })

-  test("selects native routes from existing provider packages", () => {
-    expect(
-      LLMNative.model({ ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/anthropic" } }),
-    ).toMatchObject({
-      route: "anthropic-messages",
-      baseURL: "https://api.anthropic.com/v1",
+  test("selects native request routes for provider packages", () => {
+    const openai = LLMNative.model({
+      model: { ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/openai" } },
+      apiKey: "test-key",
+      messages: [],
    })
-    expect(LLMNative.model({ ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/google" } })).toMatchObject({
-      route: "gemini",
-      baseURL: "https://generativelanguage.googleapis.com/v1beta",
+    expect(openai.route.id).toBe("openai-responses")
+    expect(openai.route.endpoint.baseURL).toBe("https://api.openai.com/v1")
+
+    const anthropic = LLMNative.model({
+      model: { ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/anthropic" } },
+      apiKey: "test-key",
+      messages: [],
    })
-    expect(
-      LLMNative.model({ ...baseModel, api: { ...baseModel.api, npm: "@ai-sdk/openai-compatible" } }),
-    ).toMatchObject({
-      route: "openai-compatible-chat",
-      baseURL: "https://api.openai.com/v1",
+    expect(anthropic.route.id).toBe("anthropic-messages")
+    expect(anthropic.route.endpoint.baseURL).toBe("https://api.anthropic.com/v1")
+
+    const google = LLMNative.model({
+      model: { ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/google" } },
+      apiKey: "test-key",
+      messages: [],
    })
-    expect(
-      LLMNative.model({ ...baseModel, api: { ...baseModel.api, url: "", npm: "@openrouter/ai-sdk-provider" } }),
-    ).toMatchObject({
-      route: "openrouter",
-      baseURL: "https://openrouter.ai/api/v1",
+    expect(google.route.id).toBe("gemini")
+    expect(google.route.endpoint.baseURL).toBe("https://generativelanguage.googleapis.com/v1beta")
+
+    const compatible = LLMNative.model({
+      model: {
+        ...baseModel,
+        providerID: ProviderID.make("opencode"),
+        api: { ...baseModel.api, url: "https://ai.example.test/v1", npm: "@ai-sdk/openai-compatible" },
+      },
+      apiKey: "test-key",
+      messages: [],
    })
+    expect(compatible.route.id).toBe("openai-compatible-chat")
+    expect(compatible.route.endpoint.baseURL).toBe("https://ai.example.test/v1")
+
+    const openrouter = LLMNative.model({
+      model: { ...baseModel, api: { ...baseModel.api, url: "", npm: "@openrouter/ai-sdk-provider" } },
+      apiKey: "test-key",
+      messages: [],
+    })
+    expect(openrouter.route.id).toBe("openrouter")
+    expect(openrouter.route.endpoint.baseURL).toBe("https://openrouter.ai/api/v1")
  })

  test("fails fast for unsupported provider packages", () => {
@ -260,6 +281,20 @@ describe("session.llm-native.request", () => {
      type: "supported",
      apiKey: "test-openai-key",
    })
+    expect(
+      LLMNativeRuntime.status({
+        model: {
+          ...baseModel,
+          providerID: ProviderID.make("opencode"),
+          api: { ...baseModel.api, npm: "@ai-sdk/openai-compatible" },
+        },
+        provider: { ...providerInfo, id: ProviderID.make("opencode") },
+        auth: undefined,
+      }),
+    ).toMatchObject({
+      type: "supported",
+      apiKey: "test-openai-key",
+    })
    expect(
      LLMNativeRuntime.status({
        model: { ...baseModel, providerID: ProviderID.make("google") },
@ -281,7 +316,7 @@ describe("session.llm-native.request", () => {
        provider: providerInfo,
        auth: undefined,
      }),
-    ).toEqual({ type: "unsupported", reason: "provider package is not OpenAI or Anthropic" })
+    ).toEqual({ type: "unsupported", reason: "provider package is not OpenAI, OpenAI-compatible, or Anthropic" })

    expect(
      LLMNativeRuntime.status({
@ -382,12 +417,16 @@ describe("session.llm-native.request", () => {
      LLMClient.prepare(
        LLMNative.request({
          model: baseModel,
+          apiKey: "test-openai-key",
          messages: [{ role: "user", content: "hello" }],
          providerOptions: { openai: { store: false } },
          maxOutputTokens: 512,
          headers: { "x-request": "request-header" },
        }),
-      ).pipe(Effect.provide(LLMClient.layer), Effect.provide(RequestExecutor.defaultLayer)),
+      ).pipe(
+        Effect.provide(LLMClient.layer),
+        Effect.provide(Layer.mergeAll(RequestExecutor.defaultLayer, WebSocketExecutor.layer)),
+      ),
    )

    expect(prepared).toMatchObject({
--- a/packages/opencode/test/session/llm.test.ts
+++ b/packages/opencode/test/session/llm.test.ts
@ -8,7 +8,7 @@ import { makeRuntime } from "../../src/effect/run-service"
 import { InstanceRef } from "../../src/effect/instance-ref"
 import { LLM } from "../../src/session/llm"
 import type { InstanceContext } from "../../src/project/instance-context"
-import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
+import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route"
 import { Auth } from "@/auth"
 import { Config } from "@/config/config"
 import { Provider } from "@/provider/provider"
@ -82,7 +82,7 @@ function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>, fl
    Layer.provide(Config.defaultLayer),
    Layer.provide(Provider.defaultLayer),
    Layer.provide(Plugin.defaultLayer),
-    Layer.provide(LLMClient.layer.pipe(Layer.provide(executor))),
+    Layer.provide(LLMClient.layer.pipe(Layer.provide(Layer.mergeAll(executor, WebSocketExecutor.layer)))),
    Layer.provide(RuntimeFlags.layer(flags)),
  )
 }
@ -1975,54 +1975,45 @@ describe("session.llm.stream", () => {
        const body = capture.body

        expect(capture.url.pathname.endsWith("/messages")).toBe(true)
-        expect(body.messages).toStrictEqual([
+        const messages = body.messages as Array<{ role: string; content: Array<Record<string, unknown>> }>
+        expect(messages[0]?.role).toBe("user")
+        expect(messages[0]?.content[0]).toMatchObject({
+          type: "text",
+          text: "Can you check whether there are any PDF files in my home directory?",
+        })
+        expect(messages.some((message) => message.content.some((part) => "cache_control" in part))).toBe(true)
+        const toolUseIndex = messages.findIndex((message) => message.content.some((part) => part.type === "tool_use"))
+        expect(toolUseIndex).toBeGreaterThan(0)
+        expect(messages[toolUseIndex].role).toBe("assistant")
+        expect(messages[toolUseIndex].content.filter((part) => part.type === "tool_use")).toMatchObject([
          {
-            role: "user",
-            content: [{ type: "text", text: "Can you check whether there are any PDF files in my home directory?" }],
+            type: "tool_use",
+            id: "toolu_01N8mDEzG8DSTs7UPHFtmgCT",
+            name: "read",
+            input: { filePath: "/root" },
          },
          {
-            role: "assistant",
-            content: [
-              {
-                type: "text",
-                text: "I checked your home directory and looked for PDF files.",
-              },
-              {
-                type: "tool_use",
-                id: "toolu_01N8mDEzG8DSTs7UPHFtmgCT",
-                name: "read",
-                input: { filePath: "/root" },
-              },
-              {
-                type: "tool_use",
-                id: "toolu_01APxrADs7VozN8uWzw9WwHr",
-                name: "glob",
-                input: { pattern: "**/*.pdf", path: "/root" },
-                cache_control: {
-                  type: "ephemeral",
-                },
-              },
-            ],
-          },
-          {
-            role: "user",
-            content: [
-              {
-                type: "tool_result",
-                tool_use_id: "toolu_01N8mDEzG8DSTs7UPHFtmgCT",
-                content: "<path>/root</path>",
-              },
-              {
-                type: "tool_result",
-                tool_use_id: "toolu_01APxrADs7VozN8uWzw9WwHr",
-                content: "No files found",
-                cache_control: {
-                  type: "ephemeral",
-                },
-              },
-            ],
+            type: "tool_use",
+            id: "toolu_01APxrADs7VozN8uWzw9WwHr",
+            name: "glob",
+            input: { pattern: "**/*.pdf", path: "/root" },
          },
        ])
+        expect(messages[toolUseIndex + 1]).toMatchObject({
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_01N8mDEzG8DSTs7UPHFtmgCT",
+              content: "<path>/root</path>",
+            },
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_01APxrADs7VozN8uWzw9WwHr",
+              content: "No files found",
+            },
+          ],
+        })
      },
    })
  })