diff --git a/packages/llm/src/protocols/anthropic-messages.ts b/packages/llm/src/protocols/anthropic-messages.ts
index afef20f1fb..f9b7ef523a 100644
--- a/packages/llm/src/protocols/anthropic-messages.ts
+++ b/packages/llm/src/protocols/anthropic-messages.ts
@@ -364,40 +364,49 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
   return "unknown"
 }
 
-// Anthropic already reports input/cache-read/cache-write as separate
-// non-overlapping categories per the Messages API docs, so the additive
-// `LLM.Usage` contract is satisfied by direct pass-through. Extended
+// Anthropic reports the non-overlapping breakdown natively — its
+// `input_tokens` is the *non-cached* count per the Messages API docs, with
+// cache reads and writes as separate fields. We sum them to derive the
+// inclusive `inputTokens` the rest of the contract expects. Extended
 // thinking tokens are *not* broken out by Anthropic — they're billed as
-// part of `output_tokens`, so `outputTokens` here may include reasoning
-// the same way OpenAI's `output_tokens` does pre-normalization. This is
-// a documented limitation of the Anthropic API surface, not a contract
-// violation.
+// part of `output_tokens`, so `reasoningTokens` stays `undefined` and
+// `outputTokens` carries the combined total.
 const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
   if (!usage) return undefined
+  const nonCached = usage.input_tokens
+  const cacheRead = usage.cache_read_input_tokens ?? undefined
+  const cacheWrite = usage.cache_creation_input_tokens ?? undefined
+  const inputTokens = ProviderShared.sumTokens(nonCached, cacheRead, cacheWrite)
   return new Usage({
-    inputTokens: usage.input_tokens,
+    inputTokens,
     outputTokens: usage.output_tokens,
-    cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
-    cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
-    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: cacheRead,
+    cacheWriteInputTokens: cacheWrite,
+    totalTokens: ProviderShared.totalTokens(inputTokens, usage.output_tokens, undefined),
     native: usage,
   })
 }
 
 // Anthropic emits usage on `message_start` and again on `message_delta` — the
 // final delta carries the authoritative totals. Right-biased merge: each
-// field prefers `right` when defined, falls back to `left`. `totalTokens` is
-// recomputed from the merged input/output to stay consistent.
+// field prefers `right` when defined, falls back to `left`. `inputTokens` is
+// recomputed from the merged breakdown so the inclusive total stays
+// consistent with `nonCached + cacheRead + cacheWrite`.
 const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
   if (!left) return right
   if (!right) return left
-  const inputTokens = right.inputTokens ?? left.inputTokens
+  const nonCachedInputTokens = right.nonCachedInputTokens ?? left.nonCachedInputTokens
+  const cacheReadInputTokens = right.cacheReadInputTokens ?? left.cacheReadInputTokens
+  const cacheWriteInputTokens = right.cacheWriteInputTokens ?? left.cacheWriteInputTokens
+  const inputTokens = ProviderShared.sumTokens(nonCachedInputTokens, cacheReadInputTokens, cacheWriteInputTokens)
   const outputTokens = right.outputTokens ?? left.outputTokens
   return new Usage({
     inputTokens,
     outputTokens,
-    cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
-    cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
+    nonCachedInputTokens,
+    cacheReadInputTokens,
+    cacheWriteInputTokens,
     totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
     native: { ...left.native, ...right.native },
   })
diff --git a/packages/llm/src/protocols/bedrock-converse.ts b/packages/llm/src/protocols/bedrock-converse.ts
index 42e149f03a..8385c7fe51 100644
--- a/packages/llm/src/protocols/bedrock-converse.ts
+++ b/packages/llm/src/protocols/bedrock-converse.ts
@@ -363,21 +363,21 @@ const mapFinishReason = (reason: string): FinishReason => {
   return "unknown"
 }
 
-// AWS Bedrock Converse reports `inputTokens` as the total prompt with
-// cached and cache-write tokens included (per the Bedrock prompt-caching
-// docs). Pull each subtotal out at the boundary so the additive
-// `LLM.Usage` contract holds. Bedrock does not separately report
-// reasoning tokens for any current model.
+// AWS Bedrock Converse reports `inputTokens` (inclusive total) with
+// `cacheReadInputTokens` and `cacheWriteInputTokens` as subsets. Pass
+// the total through and derive the non-cached breakdown. Bedrock does
+// not break reasoning out of `outputTokens` for any current model.
 const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
   if (!usage) return undefined
   const cacheTotal = (usage.cacheReadInputTokens ?? 0) + (usage.cacheWriteInputTokens ?? 0)
-  const inputTokens = ProviderShared.subtractTokens(usage.inputTokens, cacheTotal)
+  const nonCached = ProviderShared.subtractTokens(usage.inputTokens, cacheTotal)
   return new Usage({
-    inputTokens,
+    inputTokens: usage.inputTokens,
     outputTokens: usage.outputTokens,
-    totalTokens: ProviderShared.totalTokens(inputTokens, usage.outputTokens, usage.totalTokens),
+    nonCachedInputTokens: nonCached,
     cacheReadInputTokens: usage.cacheReadInputTokens,
     cacheWriteInputTokens: usage.cacheWriteInputTokens,
+    totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/gemini.ts b/packages/llm/src/protocols/gemini.ts
index fbb03d1fd8..f78a6c9e87 100644
--- a/packages/llm/src/protocols/gemini.ts
+++ b/packages/llm/src/protocols/gemini.ts
@@ -281,21 +281,29 @@ const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMReque
 // =============================================================================
 // Stream Parsing
 // =============================================================================
-// Gemini reports `promptTokenCount` as the total prompt with cached
-// content included, but `candidatesTokenCount` already excludes
-// `thoughtsTokenCount` (visible vs reasoning are separate). Pull the
-// cached portion out at the boundary so the additive `LLM.Usage` contract
-// holds across providers.
+// Gemini reports `promptTokenCount` (inclusive total) with a
+// `cachedContentTokenCount` subset. `candidatesTokenCount` is *exclusive*
+// of `thoughtsTokenCount` — visible-only, not a total — so we sum the two
+// to produce the inclusive `outputTokens` the rest of the contract expects.
 const mapUsage = (usage: GeminiUsage | undefined) => {
   if (!usage) return undefined
   const cached = usage.cachedContentTokenCount
-  const inputTokens = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
+  const nonCached = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
+  // `candidatesTokenCount` is visible-only; sum with thoughts to produce the
+  // inclusive `outputTokens` the contract expects. Only compute the total
+  // when the visible component is reported — otherwise we'd fabricate an
+  // inclusive number from a partial breakdown.
+  const outputTokens =
+    usage.candidatesTokenCount !== undefined
+      ? usage.candidatesTokenCount + (usage.thoughtsTokenCount ?? 0)
+      : undefined
   return new Usage({
-    inputTokens,
-    outputTokens: usage.candidatesTokenCount,
-    reasoningTokens: usage.thoughtsTokenCount,
+    inputTokens: usage.promptTokenCount,
+    outputTokens,
+    nonCachedInputTokens: nonCached,
     cacheReadInputTokens: cached,
-    totalTokens: ProviderShared.totalTokens(inputTokens, usage.candidatesTokenCount, usage.totalTokenCount),
+    reasoningTokens: usage.thoughtsTokenCount,
+    totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, outputTokens, usage.totalTokenCount),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/openai-chat.ts b/packages/llm/src/protocols/openai-chat.ts
index e7613903aa..6633f1bfed 100644
--- a/packages/llm/src/protocols/openai-chat.ts
+++ b/packages/llm/src/protocols/openai-chat.ts
@@ -290,22 +290,23 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
   return "unknown"
 }
 
-// OpenAI Chat reports `prompt_tokens` as the total prompt (cached tokens
-// included) and `completion_tokens` as the total output (reasoning tokens
-// included). Pull each subtotal out at the boundary so the additive
-// `LLM.Usage` contract holds and consumers never subtract.
+// OpenAI Chat reports `prompt_tokens` (inclusive total) with a
+// `cached_tokens` subset, and `completion_tokens` (inclusive total) with
+// a `reasoning_tokens` subset. We pass the inclusive totals through and
+// derive the non-cached breakdown so the `LLM.Usage` contract is
+// satisfied on both sides.
 const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
   if (!usage) return undefined
   const cached = usage.prompt_tokens_details?.cached_tokens
   const reasoning = usage.completion_tokens_details?.reasoning_tokens
-  const inputTokens = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
-  const outputTokens = ProviderShared.subtractTokens(usage.completion_tokens, reasoning)
+  const nonCached = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
   return new Usage({
-    inputTokens,
-    outputTokens,
-    reasoningTokens: reasoning,
+    inputTokens: usage.prompt_tokens,
+    outputTokens: usage.completion_tokens,
+    nonCachedInputTokens: nonCached,
     cacheReadInputTokens: cached,
-    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
+    reasoningTokens: reasoning,
+    totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/openai-responses.ts b/packages/llm/src/protocols/openai-responses.ts
index 6a0918efb8..a90a5d32c7 100644
--- a/packages/llm/src/protocols/openai-responses.ts
+++ b/packages/llm/src/protocols/openai-responses.ts
@@ -276,22 +276,22 @@ const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request:
 // =============================================================================
 // Stream Parsing
 // =============================================================================
-// OpenAI Responses reports `input_tokens` as the total prompt (cached
-// included) and `output_tokens` as the total output (reasoning included).
-// The additive `LLM.Usage` contract pulls each subtotal out at the boundary
-// so consumers never subtract.
+// OpenAI Responses reports `input_tokens` (inclusive total) with a
+// `cached_tokens` subset, and `output_tokens` (inclusive total) with a
+// `reasoning_tokens` subset. Pass the totals through and derive the
+// non-cached breakdown.
 const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
   if (!usage) return undefined
   const cached = usage.input_tokens_details?.cached_tokens
   const reasoning = usage.output_tokens_details?.reasoning_tokens
-  const inputTokens = ProviderShared.subtractTokens(usage.input_tokens, cached)
-  const outputTokens = ProviderShared.subtractTokens(usage.output_tokens, reasoning)
+  const nonCached = ProviderShared.subtractTokens(usage.input_tokens, cached)
   return new Usage({
-    inputTokens,
-    outputTokens,
-    reasoningTokens: reasoning,
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    nonCachedInputTokens: nonCached,
     cacheReadInputTokens: cached,
-    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
+    reasoningTokens: reasoning,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/shared.ts b/packages/llm/src/protocols/shared.ts
index 79e019097e..3b9886553a 100644
--- a/packages/llm/src/protocols/shared.ts
+++ b/packages/llm/src/protocols/shared.ts
@@ -63,11 +63,9 @@ export const totalTokens = (
 /**
  * Subtract `subtrahend` from `total`, clamping to zero if the provider
  * reports a non-sensical breakdown (e.g. `cached_tokens > prompt_tokens`).
- * Used by protocol mappers to enforce the additive `LLM.Usage` contract:
- * each provider's "inclusive" subtotals (cached, reasoning) are pulled out
- * of the parent count at the boundary so downstream consumers never have to
- * subtract — eliminating the underflow class of bug where a clamped
- * difference would silently store the wrong value.
+ * Used by protocol mappers when deriving a non-overlapping breakdown field
+ * from a provider's inclusive total — `nonCachedInputTokens` from
+ * `inputTokens - cacheReadInputTokens - cacheWriteInputTokens`.
  *
  * If `total` is `undefined`, returns `undefined` (we don't fabricate
  * counts). If `subtrahend` is `undefined`, returns `total` unchanged. The
@@ -82,6 +80,18 @@ export const subtractTokens = (
   return Math.max(0, total - subtrahend)
 }
 
+/**
+ * Sum a list of optional token counts, returning `undefined` only when
+ * every value is `undefined` (so we don't fabricate a `0`). Used by
+ * protocol mappers to derive the inclusive `inputTokens` total from a
+ * provider that natively reports a non-overlapping breakdown
+ * (e.g. Anthropic, whose `input_tokens` is already non-cached only).
+ */
+export const sumTokens = (...values: ReadonlyArray<number | undefined>): number | undefined => {
+  if (values.every((value) => value === undefined)) return undefined
+  return values.reduce<number>((acc, value) => acc + (value ?? 0), 0)
+}
+
 export const eventError = (route: string, message: string, raw?: string) =>
   new LLMError({
     module: "ProviderShared",
diff --git a/packages/llm/src/schema/events.ts b/packages/llm/src/schema/events.ts
index ee755e93e3..5c34a01b5c 100644
--- a/packages/llm/src/schema/events.ts
+++ b/packages/llm/src/schema/events.ts
@@ -4,54 +4,64 @@ import { ModelRef } from "./options"
 import { ToolResultValue } from "./messages"
 
 /**
- * Token usage reported by an LLM provider, normalized to a fully-additive
- * contract so consumers never have to subtract.
+ * Token usage reported by an LLM provider.
  *
- * **Field semantics** (each non-negative; missing means "not reported"):
+ * **Inclusive totals** (match AI SDK / OpenAI / LangChain convention — a
+ * reader from any of those ecosystems sees the number they expect):
  *
- * - `inputTokens` — non-cached input tokens (the "fresh" prompt portion).
+ * - `inputTokens` — total prompt tokens, *including* cached reads/writes.
+ * - `outputTokens` — total output tokens, *including* reasoning.
+ * - `totalTokens` — provider-supplied total, or `inputTokens + outputTokens`.
+ *
+ * **Non-overlapping breakdown** (every field is independently meaningful;
+ * consumers never have to subtract):
+ *
+ * - `nonCachedInputTokens` — the "fresh" portion of the prompt.
  * - `cacheReadInputTokens` — input tokens served from cache.
  * - `cacheWriteInputTokens` — input tokens written to cache.
- * - `outputTokens` — visible output tokens (text + tool calls).
- * - `reasoningTokens` — hidden reasoning / thinking tokens.
- * - `totalTokens` — provider-supplied total, or sum of input + output as a
- *   fallback (see `ProviderShared.totalTokens`).
- * - `native` — the provider's raw usage payload, preserved for debugging.
+ * - `reasoningTokens` — subset of `outputTokens` spent on hidden reasoning.
  *
- * **Invariant**: every aggregate of interest is a *sum*, never a difference.
- * Total billable input = `inputTokens + cacheReadInputTokens +
- * cacheWriteInputTokens`. Total billable output = `outputTokens +
- * reasoningTokens`. Adding two non-negatives cannot underflow, so consumers
- * cannot reproduce the underflow-then-clamp bug class where a stored
- * negative gets rejected by a strict schema later.
+ * **Invariant**: `nonCachedInputTokens + cacheReadInputTokens +
+ * cacheWriteInputTokens = inputTokens`, and `reasoningTokens ≤ outputTokens`.
+ * Each protocol mapper computes whichever side it doesn't get natively,
+ * with `Math.max(0, …)` clamping for defense against provider bugs. Because
+ * every breakdown field is stored independently, downstream consumers can
+ * read whatever they need (cost-by-category, context-pressure, AI-SDK-style
+ * inclusive total) without ever subtracting — eliminating the underflow
+ * class of bug where a clamped difference would silently store the wrong
+ * value.
  *
- * Each protocol mapper enforces this contract at the provider boundary.
- * Providers that report cache or reasoning as subsets of input/output
- * (OpenAI Chat/Responses, Gemini, Bedrock) have those subsets pulled out
- * once via `ProviderShared.subtractTokens`, with `Math.max(0, …)` clamping
- * for defense against provider bugs. Providers that already report
- * separately (Anthropic) pass through. Where a provider doesn't surface a
- * category at all (e.g. Anthropic does not break out extended-thinking
- * tokens), the corresponding field is `undefined` and the parent count
- * carries the combined total — a documented limitation of that API.
+ * **Semantics by provider**:
+ *
+ * - OpenAI Chat / Responses / Gemini / Bedrock: provider reports inclusive
+ *   `inputTokens` and an inclusive `outputTokens`; mapper subtracts to
+ *   derive the breakdown.
+ * - Anthropic: provider reports the breakdown natively (`input_tokens` is
+ *   non-cached only); mapper sums to derive the inclusive `inputTokens`.
+ *   Anthropic does *not* break extended-thinking out of `output_tokens`, so
+ *   `reasoningTokens` is `undefined` and `outputTokens` carries the
+ *   combined total — a documented limitation of the Anthropic API.
+ *
+ * `native` always carries the provider's raw usage payload for debugging.
  */
 export class Usage extends Schema.Class<Usage>("LLM.Usage")({
   inputTokens: Schema.optional(Schema.Number),
   outputTokens: Schema.optional(Schema.Number),
-  reasoningTokens: Schema.optional(Schema.Number),
+  nonCachedInputTokens: Schema.optional(Schema.Number),
   cacheReadInputTokens: Schema.optional(Schema.Number),
   cacheWriteInputTokens: Schema.optional(Schema.Number),
+  reasoningTokens: Schema.optional(Schema.Number),
   totalTokens: Schema.optional(Schema.Number),
   native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
 }) {
-  /** Sum of every input-side category. Monotonic under the additive contract. */
-  get totalInputTokens() {
-    return (this.inputTokens ?? 0) + (this.cacheReadInputTokens ?? 0) + (this.cacheWriteInputTokens ?? 0)
-  }
-
-  /** Sum of every output-side category. Monotonic under the additive contract. */
-  get totalOutputTokens() {
-    return (this.outputTokens ?? 0) + (this.reasoningTokens ?? 0)
+  /**
+   * Visible output tokens — `outputTokens` minus `reasoningTokens`, clamped
+   * to zero. The one place subtraction happens in this contract; the clamp
+   * means a provider reporting `reasoningTokens > outputTokens` produces a
+   * harmless zero rather than a negative that crashes downstream schemas.
+   */
+  get visibleOutputTokens() {
+    return Math.max(0, (this.outputTokens ?? 0) - (this.reasoningTokens ?? 0))
   }
 }
 
diff --git a/packages/llm/test/provider/anthropic-messages.test.ts b/packages/llm/test/provider/anthropic-messages.test.ts
index 6541454cb5..eb867530c3 100644
--- a/packages/llm/test/provider/anthropic-messages.test.ts
+++ b/packages/llm/test/provider/anthropic-messages.test.ts
@@ -110,10 +110,11 @@ describe("Anthropic Messages route", () => {
       expect(response.text).toBe("Hello!")
       expect(response.reasoning).toBe("thinking")
       expect(response.usage).toMatchObject({
-        inputTokens: 5,
+        inputTokens: 6,
         outputTokens: 2,
+        nonCachedInputTokens: 5,
         cacheReadInputTokens: 1,
-        totalTokens: 7,
+        totalTokens: 8,
       })
       expect(response.events.find((event) => event.type === "reasoning-end")).toMatchObject({
         providerMetadata: { anthropic: { signature: "sig_1" } },
@@ -152,7 +153,13 @@ describe("Anthropic Messages route", () => {
         {
           type: "request-finish",
           reason: "tool-calls",
-          usage: new Usage({ inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }),
+          usage: new Usage({
+            inputTokens: 5,
+            outputTokens: 1,
+            nonCachedInputTokens: 5,
+            totalTokens: 6,
+            native: { input_tokens: 5, output_tokens: 1 },
+          }),
         },
       ])
     }),
diff --git a/packages/llm/test/provider/gemini.test.ts b/packages/llm/test/provider/gemini.test.ts
index cd34360cce..50f597b429 100644
--- a/packages/llm/test/provider/gemini.test.ts
+++ b/packages/llm/test/provider/gemini.test.ts
@@ -197,10 +197,11 @@ describe("Gemini route", () => {
       expect(response.text).toBe("Hello!")
       expect(response.reasoning).toBe("thinking")
       expect(response.usage).toMatchObject({
-        inputTokens: 4,
-        outputTokens: 2,
-        reasoningTokens: 1,
+        inputTokens: 5,
+        outputTokens: 3,
+        nonCachedInputTokens: 4,
         cacheReadInputTokens: 1,
+        reasoningTokens: 1,
         totalTokens: 7,
       })
       expect(response.events).toEqual([
@@ -211,10 +212,11 @@ describe("Gemini route", () => {
           type: "request-finish",
           reason: "stop",
           usage: new Usage({
-            inputTokens: 4,
-            outputTokens: 2,
-            reasoningTokens: 1,
+            inputTokens: 5,
+            outputTokens: 3,
+            nonCachedInputTokens: 4,
             cacheReadInputTokens: 1,
+            reasoningTokens: 1,
             totalTokens: 7,
             native: {
               promptTokenCount: 5,
@@ -260,6 +262,7 @@ describe("Gemini route", () => {
           usage: new Usage({
             inputTokens: 5,
             outputTokens: 1,
+            nonCachedInputTokens: 5,
             totalTokens: 6,
             native: { promptTokenCount: 5, candidatesTokenCount: 1 },
           }),
diff --git a/packages/llm/test/provider/openai-chat.test.ts b/packages/llm/test/provider/openai-chat.test.ts
index ecb1a81141..1bac72ba64 100644
--- a/packages/llm/test/provider/openai-chat.test.ts
+++ b/packages/llm/test/provider/openai-chat.test.ts
@@ -231,10 +231,11 @@ describe("OpenAI Chat route", () => {
           type: "request-finish",
           reason: "stop",
           usage: new Usage({
-            inputTokens: 4,
+            inputTokens: 5,
             outputTokens: 2,
-            reasoningTokens: 0,
+            nonCachedInputTokens: 4,
             cacheReadInputTokens: 1,
+            reasoningTokens: 0,
             totalTokens: 7,
             native: {
               prompt_tokens: 5,
diff --git a/packages/llm/test/provider/openai-responses.test.ts b/packages/llm/test/provider/openai-responses.test.ts
index 0723ddf816..3cdb3e070b 100644
--- a/packages/llm/test/provider/openai-responses.test.ts
+++ b/packages/llm/test/provider/openai-responses.test.ts
@@ -343,10 +343,11 @@ describe("OpenAI Responses route", () => {
           reason: "stop",
           providerMetadata: { openai: { responseId: "resp_1", serviceTier: "default" } },
           usage: new Usage({
-            inputTokens: 4,
+            inputTokens: 5,
             outputTokens: 2,
-            reasoningTokens: 0,
+            nonCachedInputTokens: 4,
             cacheReadInputTokens: 1,
+            reasoningTokens: 0,
             totalTokens: 7,
             native: {
               input_tokens: 5,
@@ -411,7 +412,13 @@ describe("OpenAI Responses route", () => {
         {
           type: "request-finish",
           reason: "tool-calls",
-          usage: new Usage({ inputTokens: 5, outputTokens: 1, totalTokens: 6, native: { input_tokens: 5, output_tokens: 1 } }),
+          usage: new Usage({
+            inputTokens: 5,
+            outputTokens: 1,
+            nonCachedInputTokens: 5,
+            totalTokens: 6,
+            native: { input_tokens: 5, output_tokens: 1 },
+          }),
         },
       ])
     }),
diff --git a/packages/llm/test/schema.test.ts b/packages/llm/test/schema.test.ts
index 9ddfe9e597..23bd9fd9bb 100644
--- a/packages/llm/test/schema.test.ts
+++ b/packages/llm/test/schema.test.ts
@@ -50,7 +50,7 @@ describe("llm schema", () => {
   })
 })
 
-describe("LLM.Usage additive contract", () => {
+describe("LLM.Usage", () => {
   test("subtractTokens clamps non-sensical breakdowns to zero", () => {
     // Defense against a provider reporting cached_tokens > prompt_tokens or
     // reasoning_tokens > completion_tokens — the negative would otherwise
@@ -62,15 +62,17 @@ describe("LLM.Usage additive contract", () => {
     expect(ProviderShared.subtractTokens(undefined, undefined)).toBeUndefined()
   })
 
-  test("totalInputTokens sums every input-side category", () => {
-    expect(new Usage({ inputTokens: 10, cacheReadInputTokens: 3, cacheWriteInputTokens: 2 }).totalInputTokens).toBe(15)
-    expect(new Usage({ inputTokens: 10 }).totalInputTokens).toBe(10)
-    expect(new Usage({}).totalInputTokens).toBe(0)
+  test("sumTokens returns undefined only when every input is undefined", () => {
+    expect(ProviderShared.sumTokens(1, 2, 3)).toBe(6)
+    expect(ProviderShared.sumTokens(1, undefined, 3)).toBe(4)
+    expect(ProviderShared.sumTokens(undefined, undefined, undefined)).toBeUndefined()
+    expect(ProviderShared.sumTokens()).toBeUndefined()
   })
 
-  test("totalOutputTokens sums every output-side category", () => {
-    expect(new Usage({ outputTokens: 7, reasoningTokens: 4 }).totalOutputTokens).toBe(11)
-    expect(new Usage({ outputTokens: 7 }).totalOutputTokens).toBe(7)
-    expect(new Usage({}).totalOutputTokens).toBe(0)
+  test("visibleOutputTokens clamps reasoning > output to zero", () => {
+    expect(new Usage({ outputTokens: 10, reasoningTokens: 4 }).visibleOutputTokens).toBe(6)
+    expect(new Usage({ outputTokens: 10 }).visibleOutputTokens).toBe(10)
+    expect(new Usage({ outputTokens: 4, reasoningTokens: 10 }).visibleOutputTokens).toBe(0)
+    expect(new Usage({}).visibleOutputTokens).toBe(0)
   })
 })