mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-25 14:55:28 +00:00
Final shape after considering ecosystem conventions: inputTokens — inclusive total (matches AI SDK / OpenAI / LangChain) outputTokens — inclusive total (includes reasoning) nonCachedInputTokens — breakdown: fresh prompt cacheReadInputTokens — breakdown: cache hit cacheWriteInputTokens — breakdown: cache write reasoningTokens — subset of outputTokens Invariant: nonCached + cacheRead + cacheWrite = inputTokens reasoningTokens <= outputTokens Why this shape: - `inputTokens` keeps its AI-SDK / OpenAI semantics, so a reader from any major ecosystem sees the number they expect. - The non-overlapping breakdown fields are populated alongside the inclusive totals — consumers read whichever they need without subtracting. This eliminates the underflow bug class (opencode#26620) structurally without diverging on naming. - Aligns with the AI SDK v3 spec proposal (vercel/ai#9921), which adds exactly this kind of non-overlapping breakdown to address the active ecosystem bugs around cache token double-counting and underflow (pydantic-ai#4364, langfuse#12306/#11979, vercel/ai#8349, langchain#32818, langchainjs#10249). Mappers: - OpenAI Chat / Responses / Bedrock: provider reports inclusive totals natively; mapper derives `nonCachedInputTokens` via `ProviderShared.subtractTokens`. - Gemini: `promptTokenCount` is inclusive; `candidatesTokenCount` is *exclusive* of `thoughtsTokenCount`, so mapper sums those to produce the inclusive `outputTokens`. Only computes the total when the visible component is reported (avoids fabricating an inclusive number from a partial breakdown). - Anthropic: `input_tokens` is *non-cached* natively; mapper sums it with cache reads/writes to produce the inclusive `inputTokens`. `output_tokens` is inclusive (Anthropic doesn't break thinking out, so `reasoningTokens` stays undefined). Added a `visibleOutputTokens` getter (clamped `outputTokens - reasoningTokens`) as the one safe escape hatch for consumers wanting the non-reasoning view. Added `ProviderShared.sumTokens` to derive an inclusive total from a non-overlapping breakdown, returning `undefined` when every input is undefined (so we don't fabricate a 0).
78 lines
2.9 KiB
TypeScript
78 lines
2.9 KiB
TypeScript
import { describe, expect, test } from "bun:test"
|
|
import { Schema } from "effect"
|
|
import { ContentPart, LLMEvent, LLMRequest, ModelID, ModelLimits, ModelRef, ProviderID, Usage } from "../src/schema"
|
|
import { ProviderShared } from "../src/protocols/shared"
|
|
|
|
const model = new ModelRef({
|
|
id: ModelID.make("fake-model"),
|
|
provider: ProviderID.make("fake-provider"),
|
|
route: "openai-chat",
|
|
baseURL: "https://fake.local",
|
|
limits: new ModelLimits({}),
|
|
})
|
|
|
|
describe("llm schema", () => {
|
|
test("decodes a minimal request", () => {
|
|
const input: unknown = {
|
|
id: "req_1",
|
|
model,
|
|
system: [{ type: "text", text: "You are terse." }],
|
|
messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }],
|
|
tools: [],
|
|
generation: {},
|
|
}
|
|
|
|
const decoded = Schema.decodeUnknownSync(LLMRequest)(input)
|
|
|
|
expect(decoded.id).toBe("req_1")
|
|
expect(decoded.messages[0]?.content[0]?.type).toBe("text")
|
|
})
|
|
|
|
test("accepts custom route ids", () => {
|
|
const decoded = Schema.decodeUnknownSync(LLMRequest)({
|
|
model: { ...model, route: "custom-route" },
|
|
system: [],
|
|
messages: [],
|
|
tools: [],
|
|
generation: {},
|
|
})
|
|
|
|
expect(decoded.model.route).toBe("custom-route")
|
|
})
|
|
|
|
test("rejects invalid event type", () => {
|
|
expect(() => Schema.decodeUnknownSync(LLMEvent)({ type: "bogus" })).toThrow()
|
|
})
|
|
|
|
test("content part tagged union exposes guards", () => {
|
|
expect(ContentPart.guards.text({ type: "text", text: "hi" })).toBe(true)
|
|
expect(ContentPart.guards.media({ type: "text", text: "hi" })).toBe(false)
|
|
})
|
|
})
|
|
|
|
describe("LLM.Usage", () => {
|
|
test("subtractTokens clamps non-sensical breakdowns to zero", () => {
|
|
// Defense against a provider reporting cached_tokens > prompt_tokens or
|
|
// reasoning_tokens > completion_tokens — the negative would otherwise
|
|
// round-trip through the pipeline and crash strict downstream schemas.
|
|
expect(ProviderShared.subtractTokens(5, 3)).toBe(2)
|
|
expect(ProviderShared.subtractTokens(5, 10)).toBe(0)
|
|
expect(ProviderShared.subtractTokens(5, undefined)).toBe(5)
|
|
expect(ProviderShared.subtractTokens(undefined, 3)).toBeUndefined()
|
|
expect(ProviderShared.subtractTokens(undefined, undefined)).toBeUndefined()
|
|
})
|
|
|
|
test("sumTokens returns undefined only when every input is undefined", () => {
|
|
expect(ProviderShared.sumTokens(1, 2, 3)).toBe(6)
|
|
expect(ProviderShared.sumTokens(1, undefined, 3)).toBe(4)
|
|
expect(ProviderShared.sumTokens(undefined, undefined, undefined)).toBeUndefined()
|
|
expect(ProviderShared.sumTokens()).toBeUndefined()
|
|
})
|
|
|
|
test("visibleOutputTokens clamps reasoning > output to zero", () => {
|
|
expect(new Usage({ outputTokens: 10, reasoningTokens: 4 }).visibleOutputTokens).toBe(6)
|
|
expect(new Usage({ outputTokens: 10 }).visibleOutputTokens).toBe(10)
|
|
expect(new Usage({ outputTokens: 4, reasoningTokens: 10 }).visibleOutputTokens).toBe(0)
|
|
expect(new Usage({}).visibleOutputTokens).toBe(0)
|
|
})
|
|
})
|