From 3bfe34a1dc949c2b3cfba4e372522b435a67a086 Mon Sep 17 00:00:00 2001 From: pomelo-nwu Date: Wed, 18 Mar 2026 17:51:50 +0800 Subject: [PATCH] telemetry: track cached content tokens for accurate context calculation - Add cachedContentTokenCount tracking in uiTelemetry service - Collect cached_content_token_count from streaming usage metadata - Use cached tokens instead of estimated overhead when available - Fix messages token calculation to avoid 'messages = 0' issue This improves context window display accuracy when using providers that support prefix caching (e.g., DashScope). Co-authored-by: Qwen-Coder --- packages/cli/src/ui/commands/contextCommand.ts | 15 ++++++++++++++- packages/core/src/core/geminiChat.ts | 9 ++++++++- packages/core/src/telemetry/uiTelemetry.ts | 10 ++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/ui/commands/contextCommand.ts b/packages/cli/src/ui/commands/contextCommand.ts index b4b7f4f04..c693606a9 100644 --- a/packages/cli/src/ui/commands/contextCommand.ts +++ b/packages/cli/src/ui/commands/contextCommand.ts @@ -120,6 +120,10 @@ export const contextCommand: SlashCommand = { // Total prompt token count from API (most accurate) const apiTotalTokens = uiTelemetryService.getLastPromptTokenCount(); + // Cached content token count — when available (e.g. DashScope prefix caching), + // represents the cached overhead (system prompt + tools). Using this gives a much + // more accurate "Messages" count: promptTokens - cachedTokens = actual history tokens. + const apiCachedTokens = uiTelemetryService.getLastCachedContentTokenCount(); // 1. System prompt tokens (without memory, as memory is counted separately) const systemPromptText = getCoreSystemPrompt(undefined, modelName); @@ -302,7 +306,16 @@ export const contextCommand: SlashCommand = { scaledAllTools + displayMemoryFiles + Math.round(loadedBodiesTokens * overheadScale); - messagesTokens = Math.max(0, totalTokens - scaledOverhead); + + // When the API reports cached content tokens (e.g. DashScope prefix caching), + // use them as the actual overhead indicator for a more accurate messages count. + // cachedTokens ≈ system prompt + tools tokens actually served from cache. + // This avoids the "messages = 0" problem caused by estimation overshoot. + if (apiCachedTokens > 0) { + messagesTokens = Math.max(0, totalTokens - apiCachedTokens); + } else { + messagesTokens = Math.max(0, totalTokens - scaledOverhead); + } freeSpace = Math.max( 0, diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 03b78f06c..1d1cb064f 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -649,11 +649,18 @@ export class GeminiChat { // Collect token usage for consolidated recording if (chunk.usageMetadata) { usageMetadata = chunk.usageMetadata; + // Use || instead of ?? so that totalTokenCount=0 falls back to promptTokenCount. + // Some providers omit total_tokens or return 0 in streaming usage chunks. const lastPromptTokenCount = - usageMetadata.totalTokenCount ?? usageMetadata.promptTokenCount; + usageMetadata.totalTokenCount || usageMetadata.promptTokenCount; if (lastPromptTokenCount) { uiTelemetryService.setLastPromptTokenCount(lastPromptTokenCount); } + if (usageMetadata.cachedContentTokenCount) { + uiTelemetryService.setLastCachedContentTokenCount( + usageMetadata.cachedContentTokenCount, + ); + } } yield chunk; // Yield every chunk to the UI immediately. diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts index 0f8f2146c..a7361f038 100644 --- a/packages/core/src/telemetry/uiTelemetry.ts +++ b/packages/core/src/telemetry/uiTelemetry.ts @@ -119,6 +119,7 @@ const createInitialMetrics = (): SessionMetrics => ({ export class UiTelemetryService extends EventEmitter { #metrics: SessionMetrics = createInitialMetrics(); #lastPromptTokenCount = 0; + #lastCachedContentTokenCount = 0; addEvent(event: UiEvent) { switch (event['event.name']) { @@ -158,12 +159,21 @@ export class UiTelemetryService extends EventEmitter { }); } + getLastCachedContentTokenCount(): number { + return this.#lastCachedContentTokenCount; + } + + setLastCachedContentTokenCount(count: number): void { + this.#lastCachedContentTokenCount = count; + } + /** * Resets metrics to the initial state (used when resuming a session). */ reset(): void { this.#metrics = createInitialMetrics(); this.#lastPromptTokenCount = 0; + this.#lastCachedContentTokenCount = 0; this.emit('update', { metrics: this.#metrics, lastPromptTokenCount: this.#lastPromptTokenCount,