diff --git a/packages/llm/src/protocols/anthropic-messages.ts b/packages/llm/src/protocols/anthropic-messages.ts
index fba785373d..afef20f1fb 100644
--- a/packages/llm/src/protocols/anthropic-messages.ts
+++ b/packages/llm/src/protocols/anthropic-messages.ts
@@ -364,6 +364,14 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
   return "unknown"
 }
 
+// Anthropic already reports input/cache-read/cache-write as separate
+// non-overlapping categories per the Messages API docs, so the additive
+// `LLM.Usage` contract is satisfied by direct pass-through. Extended
+// thinking tokens are *not* broken out by Anthropic — they're billed as
+// part of `output_tokens`, so `outputTokens` here may include reasoning
+// the same way OpenAI's `output_tokens` does pre-normalization. This is
+// a documented limitation of the Anthropic API surface, not a contract
+// violation.
 const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
   if (!usage) return undefined
   return new Usage({
diff --git a/packages/llm/src/protocols/bedrock-converse.ts b/packages/llm/src/protocols/bedrock-converse.ts
index 260ee612cd..80620d3463 100644
--- a/packages/llm/src/protocols/bedrock-converse.ts
+++ b/packages/llm/src/protocols/bedrock-converse.ts
@@ -363,12 +363,21 @@ const mapFinishReason = (reason: string): FinishReason => {
   return "unknown"
 }
 
+// AWS Bedrock Converse reports `inputTokens` as the total prompt with
+// cached and cache-write tokens included (per the Bedrock prompt-caching
+// docs). Pull each subtotal out at the boundary so the additive
+// `LLM.Usage` contract holds. Bedrock does not separately report
+// reasoning tokens for any current model.
 const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
   if (!usage) return undefined
+  const inputTokens = ProviderShared.subtractTokens(
+    ProviderShared.subtractTokens(usage.inputTokens, usage.cacheReadInputTokens),
+    usage.cacheWriteInputTokens,
+  )
   return new Usage({
-    inputTokens: usage.inputTokens,
+    inputTokens,
     outputTokens: usage.outputTokens,
-    totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
+    totalTokens: ProviderShared.totalTokens(inputTokens, usage.outputTokens, usage.totalTokens),
     cacheReadInputTokens: usage.cacheReadInputTokens,
     cacheWriteInputTokens: usage.cacheWriteInputTokens,
     native: usage,
diff --git a/packages/llm/src/protocols/gemini.ts b/packages/llm/src/protocols/gemini.ts
index 140da521a5..fbb03d1fd8 100644
--- a/packages/llm/src/protocols/gemini.ts
+++ b/packages/llm/src/protocols/gemini.ts
@@ -281,14 +281,21 @@ const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMReque
 // =============================================================================
 // Stream Parsing
 // =============================================================================
+// Gemini reports `promptTokenCount` as the total prompt with cached
+// content included, but `candidatesTokenCount` already excludes
+// `thoughtsTokenCount` (visible vs reasoning are separate). Pull the
+// cached portion out at the boundary so the additive `LLM.Usage` contract
+// holds across providers.
 const mapUsage = (usage: GeminiUsage | undefined) => {
   if (!usage) return undefined
+  const cached = usage.cachedContentTokenCount
+  const inputTokens = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
   return new Usage({
-    inputTokens: usage.promptTokenCount,
+    inputTokens,
     outputTokens: usage.candidatesTokenCount,
     reasoningTokens: usage.thoughtsTokenCount,
-    cacheReadInputTokens: usage.cachedContentTokenCount,
-    totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount),
+    cacheReadInputTokens: cached,
+    totalTokens: ProviderShared.totalTokens(inputTokens, usage.candidatesTokenCount, usage.totalTokenCount),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/openai-chat.ts b/packages/llm/src/protocols/openai-chat.ts
index 5d42c0a4e9..09165d502d 100644
--- a/packages/llm/src/protocols/openai-chat.ts
+++ b/packages/llm/src/protocols/openai-chat.ts
@@ -290,14 +290,23 @@ const mapFinishReason = (reason: string | null | undefined): FinishReason => {
   return "unknown"
 }
 
+// OpenAI Chat reports `prompt_tokens` as the total prompt (cached tokens
+// included) and `completion_tokens` as the total output (reasoning tokens
+// included). The additive `LLM.Usage` contract pulls each subtotal out at
+// the boundary so consumers never subtract — eliminating the underflow
+// class addressed by opencode#26620.
 const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
   if (!usage) return undefined
+  const cached = usage.prompt_tokens_details?.cached_tokens
+  const reasoning = usage.completion_tokens_details?.reasoning_tokens
+  const inputTokens = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
+  const outputTokens = ProviderShared.subtractTokens(usage.completion_tokens, reasoning)
   return new Usage({
-    inputTokens: usage.prompt_tokens,
-    outputTokens: usage.completion_tokens,
-    reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
-    cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens,
-    totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
+    inputTokens,
+    outputTokens,
+    reasoningTokens: reasoning,
+    cacheReadInputTokens: cached,
+    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/openai-responses.ts b/packages/llm/src/protocols/openai-responses.ts
index 14dc32130c..6a0918efb8 100644
--- a/packages/llm/src/protocols/openai-responses.ts
+++ b/packages/llm/src/protocols/openai-responses.ts
@@ -276,14 +276,22 @@ const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request:
 // =============================================================================
 // Stream Parsing
 // =============================================================================
+// OpenAI Responses reports `input_tokens` as the total prompt (cached
+// included) and `output_tokens` as the total output (reasoning included).
+// The additive `LLM.Usage` contract pulls each subtotal out at the boundary
+// so consumers never subtract.
 const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
   if (!usage) return undefined
+  const cached = usage.input_tokens_details?.cached_tokens
+  const reasoning = usage.output_tokens_details?.reasoning_tokens
+  const inputTokens = ProviderShared.subtractTokens(usage.input_tokens, cached)
+  const outputTokens = ProviderShared.subtractTokens(usage.output_tokens, reasoning)
   return new Usage({
-    inputTokens: usage.input_tokens,
-    outputTokens: usage.output_tokens,
-    reasoningTokens: usage.output_tokens_details?.reasoning_tokens,
-    cacheReadInputTokens: usage.input_tokens_details?.cached_tokens,
-    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
+    inputTokens,
+    outputTokens,
+    reasoningTokens: reasoning,
+    cacheReadInputTokens: cached,
+    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, usage.total_tokens),
     native: usage,
   })
 }
diff --git a/packages/llm/src/protocols/shared.ts b/packages/llm/src/protocols/shared.ts
index c931353998..79e019097e 100644
--- a/packages/llm/src/protocols/shared.ts
+++ b/packages/llm/src/protocols/shared.ts
@@ -42,6 +42,13 @@ export interface ToolAccumulator {
  * supplied total; otherwise falls back to `inputTokens + outputTokens` only
  * when at least one is defined. Returns `undefined` when neither input nor
  * output is known so routes don't publish a misleading `0`.
+ *
+ * Under the additive `LLM.Usage` contract, `inputTokens` and `outputTokens`
+ * are the non-cached input and visible output only. The provider-supplied
+ * `total` is the source of truth when present; the computed fallback
+ * under-counts cache and reasoning by design and exists mainly so
+ * Anthropic-style providers (which don't surface a total) still get a
+ * sensible aggregate on the input + output axes.
  */
 export const totalTokens = (
   inputTokens: number | undefined,
@@ -53,6 +60,28 @@ export const totalTokens = (
   return (inputTokens ?? 0) + (outputTokens ?? 0)
 }
 
+/**
+ * Subtract `subtrahend` from `total`, clamping to zero if the provider
+ * reports a non-sensical breakdown (e.g. `cached_tokens > prompt_tokens`).
+ * Used by protocol mappers to enforce the additive `LLM.Usage` contract:
+ * each provider's "inclusive" subtotals (cached, reasoning) are pulled out
+ * of the parent count at the boundary so downstream consumers never have to
+ * subtract — eliminating the underflow class of bug where a clamped
+ * difference would silently store the wrong value.
+ *
+ * If `total` is `undefined`, returns `undefined` (we don't fabricate
+ * counts). If `subtrahend` is `undefined`, returns `total` unchanged. The
+ * provider-native breakdown stays available on `Usage.native` for debugging.
+ */
+export const subtractTokens = (
+  total: number | undefined,
+  subtrahend: number | undefined,
+): number | undefined => {
+  if (total === undefined) return undefined
+  if (subtrahend === undefined) return total
+  return Math.max(0, total - subtrahend)
+}
+
 export const eventError = (route: string, message: string, raw?: string) =>
   new LLMError({
     module: "ProviderShared",
diff --git a/packages/llm/src/schema/events.ts b/packages/llm/src/schema/events.ts
index d0befe246e..6c7d91fe43 100644
--- a/packages/llm/src/schema/events.ts
+++ b/packages/llm/src/schema/events.ts
@@ -3,6 +3,38 @@ import { ContentBlockID, FinishReason, ProtocolID, ProviderMetadata, ResponseID,
 import { ModelRef } from "./options"
 import { ToolResultValue } from "./messages"
 
+/**
+ * Token usage reported by an LLM provider, normalized to a fully-additive
+ * contract so consumers never have to subtract.
+ *
+ * **Field semantics** (each non-negative; missing means "not reported"):
+ *
+ * - `inputTokens` — non-cached input tokens (the "fresh" prompt portion).
+ * - `cacheReadInputTokens` — input tokens served from cache.
+ * - `cacheWriteInputTokens` — input tokens written to cache.
+ * - `outputTokens` — visible output tokens (text + tool calls).
+ * - `reasoningTokens` — hidden reasoning / thinking tokens.
+ * - `totalTokens` — provider-supplied total, or sum of input + output as a
+ *   fallback (see `ProviderShared.totalTokens`).
+ * - `native` — the provider's raw usage payload, preserved for debugging.
+ *
+ * **Invariant**: every aggregate of interest is a *sum*, never a difference.
+ * Total billable input = `inputTokens + cacheReadInputTokens +
+ * cacheWriteInputTokens`. Total billable output = `outputTokens +
+ * reasoningTokens`. Adding two non-negatives cannot underflow, so consumers
+ * cannot reproduce the underflow-then-clamp bug class where a stored
+ * negative gets rejected by a strict schema later.
+ *
+ * Each protocol mapper enforces this contract at the provider boundary.
+ * Providers that report cache or reasoning as subsets of input/output
+ * (OpenAI Chat/Responses, Gemini, Bedrock) have those subsets pulled out
+ * once via `ProviderShared.subtractTokens`, with `Math.max(0, …)` clamping
+ * for defense against provider bugs. Providers that already report
+ * separately (Anthropic) pass through. Where a provider doesn't surface a
+ * category at all (e.g. Anthropic does not break out extended-thinking
+ * tokens), the corresponding field is `undefined` and the parent count
+ * carries the combined total — a documented limitation of that API.
+ */
 export class Usage extends Schema.Class<Usage>("LLM.Usage")({
   inputTokens: Schema.optional(Schema.Number),
   outputTokens: Schema.optional(Schema.Number),
@@ -13,6 +45,24 @@ export class Usage extends Schema.Class<Usage>("LLM.Usage")({
   native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
 }) {}
 
+export namespace Usage {
+  type InputFields = Pick<Usage, "inputTokens" | "cacheReadInputTokens" | "cacheWriteInputTokens">
+  type OutputFields = Pick<Usage, "outputTokens" | "reasoningTokens">
+
+  /**
+   * Sum of every input-side category: non-cached input + cache reads +
+   * cache writes. Monotonic; cannot underflow under the additive contract.
+   */
+  export const totalInput = (usage: InputFields) =>
+    (usage.inputTokens ?? 0) + (usage.cacheReadInputTokens ?? 0) + (usage.cacheWriteInputTokens ?? 0)
+
+  /**
+   * Sum of every output-side category: visible output + reasoning.
+   * Monotonic; cannot underflow under the additive contract.
+   */
+  export const totalOutput = (usage: OutputFields) => (usage.outputTokens ?? 0) + (usage.reasoningTokens ?? 0)
+}
+
 export const RequestStart = Schema.Struct({
   type: Schema.tag("request-start"),
   id: ResponseID,
diff --git a/packages/llm/test/provider/gemini.test.ts b/packages/llm/test/provider/gemini.test.ts
index 9de4e0dc25..55d77a4e85 100644
--- a/packages/llm/test/provider/gemini.test.ts
+++ b/packages/llm/test/provider/gemini.test.ts
@@ -197,7 +197,10 @@ describe("Gemini route", () => {
       expect(response.text).toBe("Hello!")
       expect(response.reasoning).toBe("thinking")
       expect(response.usage).toMatchObject({
-        inputTokens: 5,
+        // Additive contract: promptTokenCount=5 includes 1 cached, so
+        // inputTokens=4 + cacheReadInputTokens=1. Gemini already splits
+        // candidates from thoughts, so outputTokens=2 + reasoningTokens=1.
+        inputTokens: 4,
         outputTokens: 2,
         reasoningTokens: 1,
         cacheReadInputTokens: 1,
@@ -211,7 +214,7 @@ describe("Gemini route", () => {
           type: "request-finish",
           reason: "stop",
           usage: {
-            inputTokens: 5,
+            inputTokens: 4,
             outputTokens: 2,
             reasoningTokens: 1,
             cacheReadInputTokens: 1,
diff --git a/packages/llm/test/provider/openai-chat.test.ts b/packages/llm/test/provider/openai-chat.test.ts
index 8b0dfc2894..1938580f3b 100644
--- a/packages/llm/test/provider/openai-chat.test.ts
+++ b/packages/llm/test/provider/openai-chat.test.ts
@@ -231,7 +231,10 @@ describe("OpenAI Chat route", () => {
           type: "request-finish",
           reason: "stop",
           usage: {
-            inputTokens: 5,
+            // Additive contract: prompt_tokens=5 includes 1 cached, so
+            // inputTokens=4 (non-cached) + cacheReadInputTokens=1.
+            // completion_tokens=2 includes 0 reasoning, so outputTokens=2.
+            inputTokens: 4,
             outputTokens: 2,
             reasoningTokens: 0,
             cacheReadInputTokens: 1,
diff --git a/packages/llm/test/provider/openai-responses.test.ts b/packages/llm/test/provider/openai-responses.test.ts
index 5141b44cc2..8f232854a9 100644
--- a/packages/llm/test/provider/openai-responses.test.ts
+++ b/packages/llm/test/provider/openai-responses.test.ts
@@ -343,7 +343,10 @@ describe("OpenAI Responses route", () => {
           reason: "stop",
           providerMetadata: { openai: { responseId: "resp_1", serviceTier: "default" } },
           usage: {
-            inputTokens: 5,
+            // Additive contract: input_tokens=5 includes 1 cached, so
+            // inputTokens=4 + cacheReadInputTokens=1.
+            // output_tokens=2 includes 0 reasoning, so outputTokens=2.
+            inputTokens: 4,
             outputTokens: 2,
             reasoningTokens: 0,
             cacheReadInputTokens: 1,
diff --git a/packages/llm/test/schema.test.ts b/packages/llm/test/schema.test.ts
index 46eb85b075..7ef3247f8b 100644
--- a/packages/llm/test/schema.test.ts
+++ b/packages/llm/test/schema.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, test } from "bun:test"
 import { Schema } from "effect"
-import { ContentPart, LLMEvent, LLMRequest, ModelID, ModelLimits, ModelRef, ProviderID } from "../src/schema"
+import { ContentPart, LLMEvent, LLMRequest, ModelID, ModelLimits, ModelRef, ProviderID, Usage } from "../src/schema"
+import { ProviderShared } from "../src/protocols/shared"
 
 const model = new ModelRef({
   id: ModelID.make("fake-model"),
@@ -48,3 +49,28 @@ describe("llm schema", () => {
     expect(ContentPart.guards.media({ type: "text", text: "hi" })).toBe(false)
   })
 })
+
+describe("LLM.Usage additive contract", () => {
+  test("subtractTokens clamps non-sensical breakdowns to zero", () => {
+    // Defense against a provider reporting cached_tokens > prompt_tokens or
+    // reasoning_tokens > completion_tokens. The clamp prevents the negative
+    // values that triggered opencode#26620 from ever entering the pipeline.
+    expect(ProviderShared.subtractTokens(5, 3)).toBe(2)
+    expect(ProviderShared.subtractTokens(5, 10)).toBe(0)
+    expect(ProviderShared.subtractTokens(5, undefined)).toBe(5)
+    expect(ProviderShared.subtractTokens(undefined, 3)).toBeUndefined()
+    expect(ProviderShared.subtractTokens(undefined, undefined)).toBeUndefined()
+  })
+
+  test("totalInput sums every input-side category", () => {
+    expect(Usage.totalInput(new Usage({ inputTokens: 10, cacheReadInputTokens: 3, cacheWriteInputTokens: 2 }))).toBe(15)
+    expect(Usage.totalInput(new Usage({ inputTokens: 10 }))).toBe(10)
+    expect(Usage.totalInput(new Usage({}))).toBe(0)
+  })
+
+  test("totalOutput sums every output-side category", () => {
+    expect(Usage.totalOutput(new Usage({ outputTokens: 7, reasoningTokens: 4 }))).toBe(11)
+    expect(Usage.totalOutput(new Usage({ outputTokens: 7 }))).toBe(7)
+    expect(Usage.totalOutput(new Usage({}))).toBe(0)
+  })
+})