opencode/packages/llm/test/schema.test.ts
Kit Langton d4ff331052 refactor(llm): inclusive total + non-overlapping breakdown for Usage
Final shape after considering ecosystem conventions:

  inputTokens             — inclusive total (matches AI SDK / OpenAI / LangChain)
  outputTokens            — inclusive total (includes reasoning)
  nonCachedInputTokens    — breakdown: fresh prompt
  cacheReadInputTokens    — breakdown: cache hit
  cacheWriteInputTokens   — breakdown: cache write
  reasoningTokens         — subset of outputTokens

Invariant:
  nonCached + cacheRead + cacheWrite = inputTokens
  reasoningTokens <= outputTokens

Why this shape:

- `inputTokens` keeps its AI-SDK / OpenAI semantics, so a reader from any
  major ecosystem sees the number they expect.
- The non-overlapping breakdown fields are populated alongside the
  inclusive totals — consumers read whichever they need without
  subtracting. This eliminates the underflow bug class (opencode#26620)
  structurally without diverging on naming.
- Aligns with the AI SDK v3 spec proposal (vercel/ai#9921), which adds
  exactly this kind of non-overlapping breakdown to address the active
  ecosystem bugs around cache token double-counting and underflow
  (pydantic-ai#4364, langfuse#12306/#11979, vercel/ai#8349,
  langchain#32818, langchainjs#10249).

Mappers:

- OpenAI Chat / Responses / Bedrock: provider reports inclusive totals
  natively; mapper derives `nonCachedInputTokens` via
  `ProviderShared.subtractTokens`.
- Gemini: `promptTokenCount` is inclusive; `candidatesTokenCount` is
  *exclusive* of `thoughtsTokenCount`, so mapper sums those to produce
  the inclusive `outputTokens`. Only computes the total when the visible
  component is reported (avoids fabricating an inclusive number from a
  partial breakdown).
- Anthropic: `input_tokens` is *non-cached* natively; mapper sums it with
  cache reads/writes to produce the inclusive `inputTokens`.
  `output_tokens` is inclusive (Anthropic doesn't break thinking out, so
  `reasoningTokens` stays undefined).

Added a `visibleOutputTokens` getter (clamped `outputTokens - reasoningTokens`)
as the one safe escape hatch for consumers wanting the non-reasoning view.

Added `ProviderShared.sumTokens` to derive an inclusive total from a
non-overlapping breakdown, returning `undefined` when every input is
undefined (so we don't fabricate a 0).
2026-05-10 20:39:22 -04:00

78 lines
2.9 KiB
TypeScript

import { describe, expect, test } from "bun:test"
import { Schema } from "effect"
import { ContentPart, LLMEvent, LLMRequest, ModelID, ModelLimits, ModelRef, ProviderID, Usage } from "../src/schema"
import { ProviderShared } from "../src/protocols/shared"
const model = new ModelRef({
id: ModelID.make("fake-model"),
provider: ProviderID.make("fake-provider"),
route: "openai-chat",
baseURL: "https://fake.local",
limits: new ModelLimits({}),
})
describe("llm schema", () => {
test("decodes a minimal request", () => {
const input: unknown = {
id: "req_1",
model,
system: [{ type: "text", text: "You are terse." }],
messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }],
tools: [],
generation: {},
}
const decoded = Schema.decodeUnknownSync(LLMRequest)(input)
expect(decoded.id).toBe("req_1")
expect(decoded.messages[0]?.content[0]?.type).toBe("text")
})
test("accepts custom route ids", () => {
const decoded = Schema.decodeUnknownSync(LLMRequest)({
model: { ...model, route: "custom-route" },
system: [],
messages: [],
tools: [],
generation: {},
})
expect(decoded.model.route).toBe("custom-route")
})
test("rejects invalid event type", () => {
expect(() => Schema.decodeUnknownSync(LLMEvent)({ type: "bogus" })).toThrow()
})
test("content part tagged union exposes guards", () => {
expect(ContentPart.guards.text({ type: "text", text: "hi" })).toBe(true)
expect(ContentPart.guards.media({ type: "text", text: "hi" })).toBe(false)
})
})
describe("LLM.Usage", () => {
test("subtractTokens clamps non-sensical breakdowns to zero", () => {
// Defense against a provider reporting cached_tokens > prompt_tokens or
// reasoning_tokens > completion_tokens — the negative would otherwise
// round-trip through the pipeline and crash strict downstream schemas.
expect(ProviderShared.subtractTokens(5, 3)).toBe(2)
expect(ProviderShared.subtractTokens(5, 10)).toBe(0)
expect(ProviderShared.subtractTokens(5, undefined)).toBe(5)
expect(ProviderShared.subtractTokens(undefined, 3)).toBeUndefined()
expect(ProviderShared.subtractTokens(undefined, undefined)).toBeUndefined()
})
test("sumTokens returns undefined only when every input is undefined", () => {
expect(ProviderShared.sumTokens(1, 2, 3)).toBe(6)
expect(ProviderShared.sumTokens(1, undefined, 3)).toBe(4)
expect(ProviderShared.sumTokens(undefined, undefined, undefined)).toBeUndefined()
expect(ProviderShared.sumTokens()).toBeUndefined()
})
test("visibleOutputTokens clamps reasoning > output to zero", () => {
expect(new Usage({ outputTokens: 10, reasoningTokens: 4 }).visibleOutputTokens).toBe(6)
expect(new Usage({ outputTokens: 10 }).visibleOutputTokens).toBe(10)
expect(new Usage({ outputTokens: 4, reasoningTokens: 10 }).visibleOutputTokens).toBe(0)
expect(new Usage({}).visibleOutputTokens).toBe(0)
})
})