From 3bfe34a1dc949c2b3cfba4e372522b435a67a086 Mon Sep 17 00:00:00 2001
From: pomelo-nwu <czynwu@outlook.com>
Date: Wed, 18 Mar 2026 17:51:50 +0800
Subject: [PATCH] telemetry: track cached content tokens for accurate context
 calculation

- Add cachedContentTokenCount tracking in uiTelemetry service
- Collect cached_content_token_count from streaming usage metadata
- Use cached tokens instead of estimated overhead when available
- Fix messages token calculation to avoid 'messages = 0' issue

This improves context window display accuracy when using providers
that support prefix caching (e.g., DashScope).

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
---
 packages/cli/src/ui/commands/contextCommand.ts | 15 ++++++++++++++-
 packages/core/src/core/geminiChat.ts           |  9 ++++++++-
 packages/core/src/telemetry/uiTelemetry.ts     | 10 ++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/ui/commands/contextCommand.ts b/packages/cli/src/ui/commands/contextCommand.ts
index b4b7f4f04..c693606a9 100644
--- a/packages/cli/src/ui/commands/contextCommand.ts
+++ b/packages/cli/src/ui/commands/contextCommand.ts
@@ -120,6 +120,10 @@ export const contextCommand: SlashCommand = {
 
     // Total prompt token count from API (most accurate)
     const apiTotalTokens = uiTelemetryService.getLastPromptTokenCount();
+    // Cached content token count — when available (e.g. DashScope prefix caching),
+    // represents the cached overhead (system prompt + tools). Using this gives a much
+    // more accurate "Messages" count: promptTokens - cachedTokens = actual history tokens.
+    const apiCachedTokens = uiTelemetryService.getLastCachedContentTokenCount();
 
     // 1. System prompt tokens (without memory, as memory is counted separately)
     const systemPromptText = getCoreSystemPrompt(undefined, modelName);
@@ -302,7 +306,16 @@ export const contextCommand: SlashCommand = {
         scaledAllTools +
         displayMemoryFiles +
         Math.round(loadedBodiesTokens * overheadScale);
-      messagesTokens = Math.max(0, totalTokens - scaledOverhead);
+
+      // When the API reports cached content tokens (e.g. DashScope prefix caching),
+      // use them as the actual overhead indicator for a more accurate messages count.
+      // cachedTokens ≈ system prompt + tools tokens actually served from cache.
+      // This avoids the "messages = 0" problem caused by estimation overshoot.
+      if (apiCachedTokens > 0) {
+        messagesTokens = Math.max(0, totalTokens - apiCachedTokens);
+      } else {
+        messagesTokens = Math.max(0, totalTokens - scaledOverhead);
+      }
 
       freeSpace = Math.max(
         0,
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 03b78f06c..1d1cb064f 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -649,11 +649,18 @@ export class GeminiChat {
       // Collect token usage for consolidated recording
       if (chunk.usageMetadata) {
         usageMetadata = chunk.usageMetadata;
+        // Use || instead of ?? so that totalTokenCount=0 falls back to promptTokenCount.
+        // Some providers omit total_tokens or return 0 in streaming usage chunks.
         const lastPromptTokenCount =
-          usageMetadata.totalTokenCount ?? usageMetadata.promptTokenCount;
+          usageMetadata.totalTokenCount || usageMetadata.promptTokenCount;
         if (lastPromptTokenCount) {
           uiTelemetryService.setLastPromptTokenCount(lastPromptTokenCount);
         }
+        if (usageMetadata.cachedContentTokenCount) {
+          uiTelemetryService.setLastCachedContentTokenCount(
+            usageMetadata.cachedContentTokenCount,
+          );
+        }
       }
 
       yield chunk; // Yield every chunk to the UI immediately.
diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts
index 0f8f2146c..a7361f038 100644
--- a/packages/core/src/telemetry/uiTelemetry.ts
+++ b/packages/core/src/telemetry/uiTelemetry.ts
@@ -119,6 +119,7 @@ const createInitialMetrics = (): SessionMetrics => ({
 export class UiTelemetryService extends EventEmitter {
   #metrics: SessionMetrics = createInitialMetrics();
   #lastPromptTokenCount = 0;
+  #lastCachedContentTokenCount = 0;
 
   addEvent(event: UiEvent) {
     switch (event['event.name']) {
@@ -158,12 +159,21 @@ export class UiTelemetryService extends EventEmitter {
     });
   }
 
+  getLastCachedContentTokenCount(): number {
+    return this.#lastCachedContentTokenCount;
+  }
+
+  setLastCachedContentTokenCount(count: number): void {
+    this.#lastCachedContentTokenCount = count;
+  }
+
   /**
    * Resets metrics to the initial state (used when resuming a session).
    */
   reset(): void {
     this.#metrics = createInitialMetrics();
     this.#lastPromptTokenCount = 0;
+    this.#lastCachedContentTokenCount = 0;
     this.emit('update', {
       metrics: this.#metrics,
       lastPromptTokenCount: this.#lastPromptTokenCount,