mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-25 23:15:18 +00:00
refactor(llm): inclusive total + non-overlapping breakdown for Usage
Final shape after considering ecosystem conventions: inputTokens — inclusive total (matches AI SDK / OpenAI / LangChain) outputTokens — inclusive total (includes reasoning) nonCachedInputTokens — breakdown: fresh prompt cacheReadInputTokens — breakdown: cache hit cacheWriteInputTokens — breakdown: cache write reasoningTokens — subset of outputTokens Invariant: nonCached + cacheRead + cacheWrite = inputTokens reasoningTokens <= outputTokens Why this shape: - `inputTokens` keeps its AI-SDK / OpenAI semantics, so a reader from any major ecosystem sees the number they expect. - The non-overlapping breakdown fields are populated alongside the inclusive totals — consumers read whichever they need without subtracting. This eliminates the underflow bug class (opencode#26620) structurally without diverging on naming. - Aligns with the AI SDK v3 spec proposal (vercel/ai#9921), which adds exactly this kind of non-overlapping breakdown to address the active ecosystem bugs around cache token double-counting and underflow (pydantic-ai#4364, langfuse#12306/#11979, vercel/ai#8349, langchain#32818, langchainjs#10249). Mappers: - OpenAI Chat / Responses / Bedrock: provider reports inclusive totals natively; mapper derives `nonCachedInputTokens` via `ProviderShared.subtractTokens`. - Gemini: `promptTokenCount` is inclusive; `candidatesTokenCount` is *exclusive* of `thoughtsTokenCount`, so mapper sums those to produce the inclusive `outputTokens`. Only computes the total when the visible component is reported (avoids fabricating an inclusive number from a partial breakdown). - Anthropic: `input_tokens` is *non-cached* natively; mapper sums it with cache reads/writes to produce the inclusive `inputTokens`. `output_tokens` is inclusive (Anthropic doesn't break thinking out, so `reasoningTokens` stays undefined). Added a `visibleOutputTokens` getter (clamped `outputTokens - reasoningTokens`) as the one safe escape hatch for consumers wanting the non-reasoning view. Added `ProviderShared.sumTokens` to derive an inclusive total from a non-overlapping breakdown, returning `undefined` when every input is undefined (so we don't fabricate a 0).
This commit is contained in:
parent
f5d199db62
commit
d4ff331052
12 changed files with 173 additions and 115 deletions
|
|
@ -364,40 +364,49 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
|
|||
return "unknown"
|
||||
}
|
||||
|
||||
// Anthropic already reports input/cache-read/cache-write as separate
|
||||
// non-overlapping categories per the Messages API docs, so the additive
|
||||
// `LLM.Usage` contract is satisfied by direct pass-through. Extended
|
||||
// Anthropic reports the non-overlapping breakdown natively — its
|
||||
// `input_tokens` is the *non-cached* count per the Messages API docs, with
|
||||
// cache reads and writes as separate fields. We sum them to derive the
|
||||
// inclusive `inputTokens` the rest of the contract expects. Extended
|
||||
// thinking tokens are *not* broken out by Anthropic — they're billed as
|
||||
// part of `output_tokens`, so `outputTokens` here may include reasoning
|
||||
// the same way OpenAI's `output_tokens` does pre-normalization. This is
|
||||
// a documented limitation of the Anthropic API surface, not a contract
|
||||
// violation.
|
||||
// part of `output_tokens`, so `reasoningTokens` stays `undefined` and
|
||||
// `outputTokens` carries the combined total.
|
||||
const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
|
||||
if (!usage) return undefined
|
||||
const nonCached = usage.input_tokens
|
||||
const cacheRead = usage.cache_read_input_tokens ?? undefined
|
||||
const cacheWrite = usage.cache_creation_input_tokens ?? undefined
|
||||
const inputTokens = ProviderShared.sumTokens(nonCached, cacheRead, cacheWrite)
|
||||
return new Usage({
|
||||
inputTokens: usage.input_tokens,
|
||||
inputTokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
|
||||
cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
|
||||
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
|
||||
nonCachedInputTokens: nonCached,
|
||||
cacheReadInputTokens: cacheRead,
|
||||
cacheWriteInputTokens: cacheWrite,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, usage.output_tokens, undefined),
|
||||
native: usage,
|
||||
})
|
||||
}
|
||||
|
||||
// Anthropic emits usage on `message_start` and again on `message_delta` — the
|
||||
// final delta carries the authoritative totals. Right-biased merge: each
|
||||
// field prefers `right` when defined, falls back to `left`. `totalTokens` is
|
||||
// recomputed from the merged input/output to stay consistent.
|
||||
// field prefers `right` when defined, falls back to `left`. `inputTokens` is
|
||||
// recomputed from the merged breakdown so the inclusive total stays
|
||||
// consistent with `nonCached + cacheRead + cacheWrite`.
|
||||
const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
|
||||
if (!left) return right
|
||||
if (!right) return left
|
||||
const inputTokens = right.inputTokens ?? left.inputTokens
|
||||
const nonCachedInputTokens = right.nonCachedInputTokens ?? left.nonCachedInputTokens
|
||||
const cacheReadInputTokens = right.cacheReadInputTokens ?? left.cacheReadInputTokens
|
||||
const cacheWriteInputTokens = right.cacheWriteInputTokens ?? left.cacheWriteInputTokens
|
||||
const inputTokens = ProviderShared.sumTokens(nonCachedInputTokens, cacheReadInputTokens, cacheWriteInputTokens)
|
||||
const outputTokens = right.outputTokens ?? left.outputTokens
|
||||
return new Usage({
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
|
||||
cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
|
||||
nonCachedInputTokens,
|
||||
cacheReadInputTokens,
|
||||
cacheWriteInputTokens,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
|
||||
native: { ...left.native, ...right.native },
|
||||
})
|
||||
|
|
|
|||
|
|
@ -363,21 +363,21 @@ const mapFinishReason = (reason: string): FinishReason => {
|
|||
return "unknown"
|
||||
}
|
||||
|
||||
// AWS Bedrock Converse reports `inputTokens` as the total prompt with
|
||||
// cached and cache-write tokens included (per the Bedrock prompt-caching
|
||||
// docs). Pull each subtotal out at the boundary so the additive
|
||||
// `LLM.Usage` contract holds. Bedrock does not separately report
|
||||
// reasoning tokens for any current model.
|
||||
// AWS Bedrock Converse reports `inputTokens` (inclusive total) with
|
||||
// `cacheReadInputTokens` and `cacheWriteInputTokens` as subsets. Pass
|
||||
// the total through and derive the non-cached breakdown. Bedrock does
|
||||
// not break reasoning out of `outputTokens` for any current model.
|
||||
const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
|
||||
if (!usage) return undefined
|
||||
const cacheTotal = (usage.cacheReadInputTokens ?? 0) + (usage.cacheWriteInputTokens ?? 0)
|
||||
const inputTokens = ProviderShared.subtractTokens(usage.inputTokens, cacheTotal)
|
||||
const nonCached = ProviderShared.subtractTokens(usage.inputTokens, cacheTotal)
|
||||
return new Usage({
|
||||
inputTokens,
|
||||
inputTokens: usage.inputTokens,
|
||||
outputTokens: usage.outputTokens,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, usage.outputTokens, usage.totalTokens),
|
||||
nonCachedInputTokens: nonCached,
|
||||
cacheReadInputTokens: usage.cacheReadInputTokens,
|
||||
cacheWriteInputTokens: usage.cacheWriteInputTokens,
|
||||
totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
|
||||
native: usage,
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -281,21 +281,29 @@ const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMReque
|
|||
// =============================================================================
|
||||
// Stream Parsing
|
||||
// =============================================================================
|
||||
// Gemini reports `promptTokenCount` as the total prompt with cached
|
||||
// content included, but `candidatesTokenCount` already excludes
|
||||
// `thoughtsTokenCount` (visible vs reasoning are separate). Pull the
|
||||
// cached portion out at the boundary so the additive `LLM.Usage` contract
|
||||
// holds across providers.
|
||||
// Gemini reports `promptTokenCount` (inclusive total) with a
|
||||
// `cachedContentTokenCount` subset. `candidatesTokenCount` is *exclusive*
|
||||
// of `thoughtsTokenCount` — visible-only, not a total — so we sum the two
|
||||
// to produce the inclusive `outputTokens` the rest of the contract expects.
|
||||
const mapUsage = (usage: GeminiUsage | undefined) => {
|
||||
if (!usage) return undefined
|
||||
const cached = usage.cachedContentTokenCount
|
||||
const inputTokens = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
|
||||
const nonCached = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
|
||||
// `candidatesTokenCount` is visible-only; sum with thoughts to produce the
|
||||
// inclusive `outputTokens` the contract expects. Only compute the total
|
||||
// when the visible component is reported — otherwise we'd fabricate an
|
||||
// inclusive number from a partial breakdown.
|
||||
const outputTokens =
|
||||
usage.candidatesTokenCount !== undefined
|
||||
? usage.candidatesTokenCount + (usage.thoughtsTokenCount ?? 0)
|
||||
: undefined
|
||||
return new Usage({
|
||||
inputTokens,
|
||||
outputTokens: usage.candidatesTokenCount,
|
||||
reasoningTokens: usage.thoughtsTokenCount,
|
||||
inputTokens: usage.promptTokenCount,
|
||||
outputTokens,
|
||||
nonCachedInputTokens: nonCached,
|
||||
cacheReadInputTokens: cached,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, usage.candidatesTokenCount, usage.totalTokenCount),
|
||||
reasoningTokens: usage.thoughtsTokenCount,
|
||||
totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, outputTokens, usage.totalTokenCount),
|
||||
native: usage,
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -290,22 +290,23 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
|
|||
return "unknown"
|
||||
}
|
||||
|
||||
// OpenAI Chat reports `prompt_tokens` as the total prompt (cached tokens
|
||||
// included) and `completion_tokens` as the total output (reasoning tokens
|
||||
// included). Pull each subtotal out at the boundary so the additive
|
||||
// `LLM.Usage` contract holds and consumers never subtract.
|
||||
// OpenAI Chat reports `prompt_tokens` (inclusive total) with a
|
||||
// `cached_tokens` subset, and `completion_tokens` (inclusive total) with
|
||||
// a `reasoning_tokens` subset. We pass the inclusive totals through and
|
||||
// derive the non-cached breakdown so the `LLM.Usage` contract is
|
||||
// satisfied on both sides.
|
||||
const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
|
||||
if (!usage) return undefined
|
||||
const cached = usage.prompt_tokens_details?.cached_tokens
|
||||
const reasoning = usage.completion_tokens_details?.reasoning_tokens
|
||||
const inputTokens = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
|
||||
const outputTokens = ProviderShared.subtractTokens(usage.completion_tokens, reasoning)
|
||||
const nonCached = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
|
||||
return new Usage({
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
reasoningTokens: reasoning,
|
||||
inputTokens: usage.prompt_tokens,
|
||||
outputTokens: usage.completion_tokens,
|
||||
nonCachedInputTokens: nonCached,
|
||||
cacheReadInputTokens: cached,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
|
||||
reasoningTokens: reasoning,
|
||||
totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
|
||||
native: usage,
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -276,22 +276,22 @@ const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request:
|
|||
// =============================================================================
|
||||
// Stream Parsing
|
||||
// =============================================================================
|
||||
// OpenAI Responses reports `input_tokens` as the total prompt (cached
|
||||
// included) and `output_tokens` as the total output (reasoning included).
|
||||
// The additive `LLM.Usage` contract pulls each subtotal out at the boundary
|
||||
// so consumers never subtract.
|
||||
// OpenAI Responses reports `input_tokens` (inclusive total) with a
|
||||
// `cached_tokens` subset, and `output_tokens` (inclusive total) with a
|
||||
// `reasoning_tokens` subset. Pass the totals through and derive the
|
||||
// non-cached breakdown.
|
||||
const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
|
||||
if (!usage) return undefined
|
||||
const cached = usage.input_tokens_details?.cached_tokens
|
||||
const reasoning = usage.output_tokens_details?.reasoning_tokens
|
||||
const inputTokens = ProviderShared.subtractTokens(usage.input_tokens, cached)
|
||||
const outputTokens = ProviderShared.subtractTokens(usage.output_tokens, reasoning)
|
||||
const nonCached = ProviderShared.subtractTokens(usage.input_tokens, cached)
|
||||
return new Usage({
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
reasoningTokens: reasoning,
|
||||
inputTokens: usage.input_tokens,
|
||||
outputTokens: usage.output_tokens,
|
||||
nonCachedInputTokens: nonCached,
|
||||
cacheReadInputTokens: cached,
|
||||
totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
|
||||
reasoningTokens: reasoning,
|
||||
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
|
||||
native: usage,
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,11 +63,9 @@ export const totalTokens = (
|
|||
/**
|
||||
* Subtract `subtrahend` from `total`, clamping to zero if the provider
|
||||
* reports a non-sensical breakdown (e.g. `cached_tokens > prompt_tokens`).
|
||||
* Used by protocol mappers to enforce the additive `LLM.Usage` contract:
|
||||
* each provider's "inclusive" subtotals (cached, reasoning) are pulled out
|
||||
* of the parent count at the boundary so downstream consumers never have to
|
||||
* subtract — eliminating the underflow class of bug where a clamped
|
||||
* difference would silently store the wrong value.
|
||||
* Used by protocol mappers when deriving a non-overlapping breakdown field
|
||||
* from a provider's inclusive total — `nonCachedInputTokens` from
|
||||
* `inputTokens - cacheReadInputTokens - cacheWriteInputTokens`.
|
||||
*
|
||||
* If `total` is `undefined`, returns `undefined` (we don't fabricate
|
||||
* counts). If `subtrahend` is `undefined`, returns `total` unchanged. The
|
||||
|
|
@ -82,6 +80,18 @@ export const subtractTokens = (
|
|||
return Math.max(0, total - subtrahend)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sum a list of optional token counts, returning `undefined` only when
|
||||
* every value is `undefined` (so we don't fabricate a `0`). Used by
|
||||
* protocol mappers to derive the inclusive `inputTokens` total from a
|
||||
* provider that natively reports a non-overlapping breakdown
|
||||
* (e.g. Anthropic, whose `input_tokens` is already non-cached only).
|
||||
*/
|
||||
export const sumTokens = (...values: ReadonlyArray<number | undefined>): number | undefined => {
|
||||
if (values.every((value) => value === undefined)) return undefined
|
||||
return values.reduce<number>((acc, value) => acc + (value ?? 0), 0)
|
||||
}
|
||||
|
||||
export const eventError = (route: string, message: string, raw?: string) =>
|
||||
new LLMError({
|
||||
module: "ProviderShared",
|
||||
|
|
|
|||
|
|
@ -4,54 +4,64 @@ import { ModelRef } from "./options"
|
|||
import { ToolResultValue } from "./messages"
|
||||
|
||||
/**
|
||||
* Token usage reported by an LLM provider, normalized to a fully-additive
|
||||
* contract so consumers never have to subtract.
|
||||
* Token usage reported by an LLM provider.
|
||||
*
|
||||
* **Field semantics** (each non-negative; missing means "not reported"):
|
||||
* **Inclusive totals** (match AI SDK / OpenAI / LangChain convention — a
|
||||
* reader from any of those ecosystems sees the number they expect):
|
||||
*
|
||||
* - `inputTokens` — non-cached input tokens (the "fresh" prompt portion).
|
||||
* - `inputTokens` — total prompt tokens, *including* cached reads/writes.
|
||||
* - `outputTokens` — total output tokens, *including* reasoning.
|
||||
* - `totalTokens` — provider-supplied total, or `inputTokens + outputTokens`.
|
||||
*
|
||||
* **Non-overlapping breakdown** (every field is independently meaningful;
|
||||
* consumers never have to subtract):
|
||||
*
|
||||
* - `nonCachedInputTokens` — the "fresh" portion of the prompt.
|
||||
* - `cacheReadInputTokens` — input tokens served from cache.
|
||||
* - `cacheWriteInputTokens` — input tokens written to cache.
|
||||
* - `outputTokens` — visible output tokens (text + tool calls).
|
||||
* - `reasoningTokens` — hidden reasoning / thinking tokens.
|
||||
* - `totalTokens` — provider-supplied total, or sum of input + output as a
|
||||
* fallback (see `ProviderShared.totalTokens`).
|
||||
* - `native` — the provider's raw usage payload, preserved for debugging.
|
||||
* - `reasoningTokens` — subset of `outputTokens` spent on hidden reasoning.
|
||||
*
|
||||
* **Invariant**: every aggregate of interest is a *sum*, never a difference.
|
||||
* Total billable input = `inputTokens + cacheReadInputTokens +
|
||||
* cacheWriteInputTokens`. Total billable output = `outputTokens +
|
||||
* reasoningTokens`. Adding two non-negatives cannot underflow, so consumers
|
||||
* cannot reproduce the underflow-then-clamp bug class where a stored
|
||||
* negative gets rejected by a strict schema later.
|
||||
* **Invariant**: `nonCachedInputTokens + cacheReadInputTokens +
|
||||
* cacheWriteInputTokens = inputTokens`, and `reasoningTokens ≤ outputTokens`.
|
||||
* Each protocol mapper computes whichever side it doesn't get natively,
|
||||
* with `Math.max(0, …)` clamping for defense against provider bugs. Because
|
||||
* every breakdown field is stored independently, downstream consumers can
|
||||
* read whatever they need (cost-by-category, context-pressure, AI-SDK-style
|
||||
* inclusive total) without ever subtracting — eliminating the underflow
|
||||
* class of bug where a clamped difference would silently store the wrong
|
||||
* value.
|
||||
*
|
||||
* Each protocol mapper enforces this contract at the provider boundary.
|
||||
* Providers that report cache or reasoning as subsets of input/output
|
||||
* (OpenAI Chat/Responses, Gemini, Bedrock) have those subsets pulled out
|
||||
* once via `ProviderShared.subtractTokens`, with `Math.max(0, …)` clamping
|
||||
* for defense against provider bugs. Providers that already report
|
||||
* separately (Anthropic) pass through. Where a provider doesn't surface a
|
||||
* category at all (e.g. Anthropic does not break out extended-thinking
|
||||
* tokens), the corresponding field is `undefined` and the parent count
|
||||
* carries the combined total — a documented limitation of that API.
|
||||
* **Semantics by provider**:
|
||||
*
|
||||
* - OpenAI Chat / Responses / Gemini / Bedrock: provider reports inclusive
|
||||
* `inputTokens` and an inclusive `outputTokens`; mapper subtracts to
|
||||
* derive the breakdown.
|
||||
* - Anthropic: provider reports the breakdown natively (`input_tokens` is
|
||||
* non-cached only); mapper sums to derive the inclusive `inputTokens`.
|
||||
* Anthropic does *not* break extended-thinking out of `output_tokens`, so
|
||||
* `reasoningTokens` is `undefined` and `outputTokens` carries the
|
||||
* combined total — a documented limitation of the Anthropic API.
|
||||
*
|
||||
* `native` always carries the provider's raw usage payload for debugging.
|
||||
*/
|
||||
export class Usage extends Schema.Class<Usage>("LLM.Usage")({
|
||||
inputTokens: Schema.optional(Schema.Number),
|
||||
outputTokens: Schema.optional(Schema.Number),
|
||||
reasoningTokens: Schema.optional(Schema.Number),
|
||||
nonCachedInputTokens: Schema.optional(Schema.Number),
|
||||
cacheReadInputTokens: Schema.optional(Schema.Number),
|
||||
cacheWriteInputTokens: Schema.optional(Schema.Number),
|
||||
reasoningTokens: Schema.optional(Schema.Number),
|
||||
totalTokens: Schema.optional(Schema.Number),
|
||||
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
|
||||
}) {
|
||||
/** Sum of every input-side category. Monotonic under the additive contract. */
|
||||
get totalInputTokens() {
|
||||
return (this.inputTokens ?? 0) + (this.cacheReadInputTokens ?? 0) + (this.cacheWriteInputTokens ?? 0)
|
||||
}
|
||||
|
||||
/** Sum of every output-side category. Monotonic under the additive contract. */
|
||||
get totalOutputTokens() {
|
||||
return (this.outputTokens ?? 0) + (this.reasoningTokens ?? 0)
|
||||
/**
|
||||
* Visible output tokens — `outputTokens` minus `reasoningTokens`, clamped
|
||||
* to zero. The one place subtraction happens in this contract; the clamp
|
||||
* means a provider reporting `reasoningTokens > outputTokens` produces a
|
||||
* harmless zero rather than a negative that crashes downstream schemas.
|
||||
*/
|
||||
get visibleOutputTokens() {
|
||||
return Math.max(0, (this.outputTokens ?? 0) - (this.reasoningTokens ?? 0))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -110,10 +110,11 @@ describe("Anthropic Messages route", () => {
|
|||
expect(response.text).toBe("Hello!")
|
||||
expect(response.reasoning).toBe("thinking")
|
||||
expect(response.usage).toMatchObject({
|
||||
inputTokens: 5,
|
||||
inputTokens: 6,
|
||||
outputTokens: 2,
|
||||
nonCachedInputTokens: 5,
|
||||
cacheReadInputTokens: 1,
|
||||
totalTokens: 7,
|
||||
totalTokens: 8,
|
||||
})
|
||||
expect(response.events.find((event) => event.type === "reasoning-end")).toMatchObject({
|
||||
providerMetadata: { anthropic: { signature: "sig_1" } },
|
||||
|
|
@ -152,7 +153,13 @@ describe("Anthropic Messages route", () => {
|
|||
{
|
||||
type: "request-finish",
|
||||
reason: "tool-calls",
|
||||
usage: new Usage({ inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }),
|
||||
usage: new Usage({
|
||||
inputTokens: 5,
|
||||
outputTokens: 1,
|
||||
nonCachedInputTokens: 5,
|
||||
totalTokens: 6,
|
||||
native: { input_tokens: 5, output_tokens: 1 },
|
||||
}),
|
||||
},
|
||||
])
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -197,10 +197,11 @@ describe("Gemini route", () => {
|
|||
expect(response.text).toBe("Hello!")
|
||||
expect(response.reasoning).toBe("thinking")
|
||||
expect(response.usage).toMatchObject({
|
||||
inputTokens: 4,
|
||||
outputTokens: 2,
|
||||
reasoningTokens: 1,
|
||||
inputTokens: 5,
|
||||
outputTokens: 3,
|
||||
nonCachedInputTokens: 4,
|
||||
cacheReadInputTokens: 1,
|
||||
reasoningTokens: 1,
|
||||
totalTokens: 7,
|
||||
})
|
||||
expect(response.events).toEqual([
|
||||
|
|
@ -211,10 +212,11 @@ describe("Gemini route", () => {
|
|||
type: "request-finish",
|
||||
reason: "stop",
|
||||
usage: new Usage({
|
||||
inputTokens: 4,
|
||||
outputTokens: 2,
|
||||
reasoningTokens: 1,
|
||||
inputTokens: 5,
|
||||
outputTokens: 3,
|
||||
nonCachedInputTokens: 4,
|
||||
cacheReadInputTokens: 1,
|
||||
reasoningTokens: 1,
|
||||
totalTokens: 7,
|
||||
native: {
|
||||
promptTokenCount: 5,
|
||||
|
|
@ -260,6 +262,7 @@ describe("Gemini route", () => {
|
|||
usage: new Usage({
|
||||
inputTokens: 5,
|
||||
outputTokens: 1,
|
||||
nonCachedInputTokens: 5,
|
||||
totalTokens: 6,
|
||||
native: { promptTokenCount: 5, candidatesTokenCount: 1 },
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -231,10 +231,11 @@ describe("OpenAI Chat route", () => {
|
|||
type: "request-finish",
|
||||
reason: "stop",
|
||||
usage: new Usage({
|
||||
inputTokens: 4,
|
||||
inputTokens: 5,
|
||||
outputTokens: 2,
|
||||
reasoningTokens: 0,
|
||||
nonCachedInputTokens: 4,
|
||||
cacheReadInputTokens: 1,
|
||||
reasoningTokens: 0,
|
||||
totalTokens: 7,
|
||||
native: {
|
||||
prompt_tokens: 5,
|
||||
|
|
|
|||
|
|
@ -343,10 +343,11 @@ describe("OpenAI Responses route", () => {
|
|||
reason: "stop",
|
||||
providerMetadata: { openai: { responseId: "resp_1", serviceTier: "default" } },
|
||||
usage: new Usage({
|
||||
inputTokens: 4,
|
||||
inputTokens: 5,
|
||||
outputTokens: 2,
|
||||
reasoningTokens: 0,
|
||||
nonCachedInputTokens: 4,
|
||||
cacheReadInputTokens: 1,
|
||||
reasoningTokens: 0,
|
||||
totalTokens: 7,
|
||||
native: {
|
||||
input_tokens: 5,
|
||||
|
|
@ -411,7 +412,13 @@ describe("OpenAI Responses route", () => {
|
|||
{
|
||||
type: "request-finish",
|
||||
reason: "tool-calls",
|
||||
usage: new Usage({ inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }),
|
||||
usage: new Usage({
|
||||
inputTokens: 5,
|
||||
outputTokens: 1,
|
||||
nonCachedInputTokens: 5,
|
||||
totalTokens: 6,
|
||||
native: { input_tokens: 5, output_tokens: 1 },
|
||||
}),
|
||||
},
|
||||
])
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ describe("llm schema", () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe("LLM.Usage additive contract", () => {
|
||||
describe("LLM.Usage", () => {
|
||||
test("subtractTokens clamps non-sensical breakdowns to zero", () => {
|
||||
// Defense against a provider reporting cached_tokens > prompt_tokens or
|
||||
// reasoning_tokens > completion_tokens — the negative would otherwise
|
||||
|
|
@ -62,15 +62,17 @@ describe("LLM.Usage additive contract", () => {
|
|||
expect(ProviderShared.subtractTokens(undefined, undefined)).toBeUndefined()
|
||||
})
|
||||
|
||||
test("totalInputTokens sums every input-side category", () => {
|
||||
expect(new Usage({ inputTokens: 10, cacheReadInputTokens: 3, cacheWriteInputTokens: 2 }).totalInputTokens).toBe(15)
|
||||
expect(new Usage({ inputTokens: 10 }).totalInputTokens).toBe(10)
|
||||
expect(new Usage({}).totalInputTokens).toBe(0)
|
||||
test("sumTokens returns undefined only when every input is undefined", () => {
|
||||
expect(ProviderShared.sumTokens(1, 2, 3)).toBe(6)
|
||||
expect(ProviderShared.sumTokens(1, undefined, 3)).toBe(4)
|
||||
expect(ProviderShared.sumTokens(undefined, undefined, undefined)).toBeUndefined()
|
||||
expect(ProviderShared.sumTokens()).toBeUndefined()
|
||||
})
|
||||
|
||||
test("totalOutputTokens sums every output-side category", () => {
|
||||
expect(new Usage({ outputTokens: 7, reasoningTokens: 4 }).totalOutputTokens).toBe(11)
|
||||
expect(new Usage({ outputTokens: 7 }).totalOutputTokens).toBe(7)
|
||||
expect(new Usage({}).totalOutputTokens).toBe(0)
|
||||
test("visibleOutputTokens clamps reasoning > output to zero", () => {
|
||||
expect(new Usage({ outputTokens: 10, reasoningTokens: 4 }).visibleOutputTokens).toBe(6)
|
||||
expect(new Usage({ outputTokens: 10 }).visibleOutputTokens).toBe(10)
|
||||
expect(new Usage({ outputTokens: 4, reasoningTokens: 10 }).visibleOutputTokens).toBe(0)
|
||||
expect(new Usage({}).visibleOutputTokens).toBe(0)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue