diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 96408de55..bd5a4a3dd 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -761,6 +761,8 @@ export const AppContainer = (props: AppContainerProps) => { activePtyId, loopDetectionConfirmationRequest, pendingToolCalls, + streamingResponseLengthRef, + isReceivingContent, } = useGeminiStream( config.getGeminiClient(), historyManager.history, @@ -2117,6 +2119,9 @@ export const AppContainer = (props: AppContainerProps) => { isFeedbackDialogOpen, // Per-task token tracking taskStartTokens, + // Real-time token display + streamingResponseLengthRef, + isReceivingContent, // Prompt suggestion promptSuggestion, dismissPromptSuggestion, @@ -2225,6 +2230,9 @@ export const AppContainer = (props: AppContainerProps) => { isFeedbackDialogOpen, // Per-task token tracking taskStartTokens, + // Real-time token display + streamingResponseLengthRef, + isReceivingContent, // Prompt suggestion promptSuggestion, dismissPromptSuggestion, diff --git a/packages/cli/src/ui/components/Composer.test.tsx b/packages/cli/src/ui/components/Composer.test.tsx index 5d969de5c..449d042f8 100644 --- a/packages/cli/src/ui/components/Composer.test.tsx +++ b/packages/cli/src/ui/components/Composer.test.tsx @@ -112,6 +112,9 @@ const createMockUIState = (overrides: Partial = {}): UIState => nightly: false, isTrustedFolder: true, taskStartTokens: 0, + streamingResponseLengthRef: { current: 0 }, + isReceivingContent: false, + pendingGeminiHistoryItems: [], ...overrides, }) as UIState; diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx index 4dca07f0b..263988686 100644 --- a/packages/cli/src/ui/components/Composer.tsx +++ b/packages/cli/src/ui/components/Composer.tsx @@ -15,7 +15,7 @@ import { useUIState } from '../contexts/UIStateContext.js'; import { useUIActions } from '../contexts/UIActionsContext.js'; import { useVimMode } from '../contexts/VimModeContext.js'; import { useConfig } from '../contexts/ConfigContext.js'; -import { StreamingState } from '../types.js'; +import { StreamingState, type HistoryItemToolGroup } from '../types.js'; import { ConfigInitDisplay } from '../components/ConfigInitDisplay.js'; import { FeedbackDialog } from '../FeedbackDialog.js'; import { t } from '../../i18n/index.js'; @@ -27,17 +27,40 @@ export const Composer = () => { const uiActions = useUIActions(); const { vimEnabled } = useVimMode(); - const { showAutoAcceptIndicator, sessionStats, taskStartTokens } = uiState; + const { + showAutoAcceptIndicator, + streamingResponseLengthRef, + isReceivingContent, + } = uiState; - const tokens = Object.values(sessionStats.metrics?.models ?? {}).reduce( - (acc, model) => ({ - prompt: acc.prompt + (model.tokens?.prompt ?? 0), - candidates: acc.candidates + (model.tokens?.candidates ?? 0), - }), - { prompt: 0, candidates: 0 }, - ); + // Real-time token animation is performed inside LoadingIndicator itself, so + // the 100ms polling only re-renders that one component — keeping InputPrompt + // and Footer static avoids terminal flicker during streaming. + const isStreaming = + uiState.streamingState === StreamingState.Responding || + uiState.streamingState === StreamingState.WaitingForConfirmation; - const taskTokens = tokens.candidates - taskStartTokens; + // Aggregate agent tool tokens from executing tool calls. Only changes when + // a subagent reports progress, so it doesn't drive the animation loop. + let agentTokens = 0; + for (const item of uiState.pendingGeminiHistoryItems ?? []) { + if (item.type === 'tool_group') { + const toolGroup = item as HistoryItemToolGroup; + for (const tool of toolGroup.tools) { + const display = tool.resultDisplay; + if ( + typeof display === 'object' && + display !== null && + 'type' in display && + display.type === 'task_execution' && + 'tokenCount' in display && + typeof display.tokenCount === 'number' + ) { + agentTokens += display.tokenCount; + } + } + } + } // State for keyboard shortcuts display toggle const [showShortcuts, setShowShortcuts] = useState(false); @@ -74,7 +97,10 @@ export const Composer = () => { : uiState.currentLoadingPhrase } elapsedTime={uiState.elapsedTime} - candidatesTokens={taskTokens} + candidatesTokens={agentTokens} + streamingCharsRef={streamingResponseLengthRef} + isStreaming={isStreaming} + isReceivingContent={isReceivingContent} /> )} diff --git a/packages/cli/src/ui/components/LoadingIndicator.test.tsx b/packages/cli/src/ui/components/LoadingIndicator.test.tsx index c608f4a4e..66bd0792b 100644 --- a/packages/cli/src/ui/components/LoadingIndicator.test.tsx +++ b/packages/cli/src/ui/components/LoadingIndicator.test.tsx @@ -374,5 +374,29 @@ describe('', () => { const output = lastFrame(); expect(output).toContain('(5s · ↓ 5.4k tokens · esc to cancel)'); }); + + it('should show ↑ arrow when waiting for API response', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toContain('↑ 500 tokens'); + expect(output).not.toContain('↓'); + }); + + it('should show ↓ arrow when receiving content (default)', () => { + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + const output = lastFrame(); + expect(output).toContain('↓ 500 tokens'); + expect(output).not.toContain('↑'); + }); }); }); diff --git a/packages/cli/src/ui/components/LoadingIndicator.tsx b/packages/cli/src/ui/components/LoadingIndicator.tsx index 7b6f2f06f..617f879e5 100644 --- a/packages/cli/src/ui/components/LoadingIndicator.tsx +++ b/packages/cli/src/ui/components/LoadingIndicator.tsx @@ -6,6 +6,7 @@ import type { ThoughtSummary } from '@qwen-code/qwen-code-core'; import type React from 'react'; +import { useRef } from 'react'; import { Box, Text } from 'ink'; import { theme } from '../semantic-colors.js'; import { useStreamingContext } from '../contexts/StreamingContext.js'; @@ -13,6 +14,7 @@ import { StreamingState } from '../types.js'; import { GeminiRespondingSpinner } from './GeminiRespondingSpinner.js'; import { formatDuration, formatTokenCount } from '../utils/formatters.js'; import { useTerminalSize } from '../hooks/useTerminalSize.js'; +import { useAnimationFrame } from '../hooks/useAnimationFrame.js'; import { isNarrowWidth } from '../utils/isNarrowWidth.js'; import { t } from '../../i18n/index.js'; @@ -22,6 +24,21 @@ interface LoadingIndicatorProps { rightContent?: React.ReactNode; thought?: ThoughtSummary | null; candidatesTokens?: number; + /** + * Live-updating character counter for the streaming response. When provided + * together with `isStreaming`, the indicator animates a token estimate + * (chars / 4) internally, so the animation never re-renders `Composer` or + * the input prompt. + */ + streamingCharsRef?: React.RefObject; + /** Whether to poll `streamingCharsRef` (true during Responding/WaitingForConfirmation). */ + isStreaming?: boolean; + /** + * True when receiving content (shows ↓ arrow), false when waiting for API + * response (shows ↑ arrow). + * @default true + */ + isReceivingContent?: boolean; } export const LoadingIndicator: React.FC = ({ @@ -30,25 +47,40 @@ export const LoadingIndicator: React.FC = ({ rightContent, thought, candidatesTokens, + streamingCharsRef, + isStreaming, + isReceivingContent = true, }) => { const streamingState = useStreamingContext(); const { columns: terminalWidth } = useTerminalSize(); const isNarrow = isNarrowWidth(terminalWidth); + // Animate the streaming-chars counter locally so only this component + // re-renders on each animation frame (100ms ≈ spinner cadence). Siblings + // like InputPrompt / Footer stay static, which eliminates terminal flicker + // during streaming output. + const fallbackRef = useRef(0); + const animatedChars = useAnimationFrame( + streamingCharsRef ?? fallbackRef, + streamingCharsRef && isStreaming ? 100 : null, + ); + if (streamingState === StreamingState.Idle) { return null; } const primaryText = thought?.subject || currentLoadingPhrase; - const outputTokens = candidatesTokens ?? 0; + const streamingTokens = streamingCharsRef ? Math.round(animatedChars / 4) : 0; + const outputTokens = (candidatesTokens ?? 0) + streamingTokens; const showTokens = !isNarrow && outputTokens > 0; + const tokenArrow = isReceivingContent ? '↓' : '↑'; const timeStr = elapsedTime < 60 ? `${elapsedTime}s` : formatDuration(elapsedTime * 1000); const tokenStr = showTokens - ? ` · ↓ ${formatTokenCount(outputTokens)} tokens` + ? ` · ${tokenArrow} ${formatTokenCount(outputTokens)} tokens` : ''; const cancelAndTimerContent = diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx index a06007416..252bb43fc 100644 --- a/packages/cli/src/ui/contexts/UIStateContext.tsx +++ b/packages/cli/src/ui/contexts/UIStateContext.tsx @@ -144,6 +144,10 @@ export interface UIState { isFeedbackDialogOpen: boolean; // Per-task token tracking taskStartTokens: number; + // Real-time token display: ref to streaming output char length (polled, not state) + streamingResponseLengthRef: React.RefObject; + // True = receiving content (↓), false = waiting for API response (↑) + isReceivingContent: boolean; // Prompt suggestion promptSuggestion: string | null; /** Dismiss prompt suggestion (clears state, aborts speculation) */ diff --git a/packages/cli/src/ui/hooks/useAnimationFrame.ts b/packages/cli/src/ui/hooks/useAnimationFrame.ts new file mode 100644 index 000000000..c29f3ed80 --- /dev/null +++ b/packages/cli/src/ui/hooks/useAnimationFrame.ts @@ -0,0 +1,96 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useEffect, useRef, useState } from 'react'; + +/** + * Hook that polls a ref at a fixed interval and smoothly animates the + * displayed value toward the real value. This avoids jarring jumps when + * large chunks of characters arrive at once (e.g. tool call args JSON). + * + * Animation rules (matching Claude Code's SpinnerAnimationRow): + * - Gap < 70: increment by 3 per frame + * - Gap 70–200: increment by ~20% of gap per frame + * - Gap > 200: increment by 50 per frame + * + * When the real value decreases (e.g. ref reset to 0), the displayed + * value snaps immediately — animation only applies to increases. + * + * Pass `null` as intervalMs to pause polling entirely. + * + * @param watchRef - The ref to poll for changes. + * @param intervalMs - How often to check (ms), or null to pause. + * @returns The smoothly animated value. + */ +export function useAnimationFrame( + watchRef: React.RefObject, + intervalMs: number | null = 50, +): number { + const [displayValue, setDisplayValue] = useState(() => watchRef.current); + const displayRef = useRef(watchRef.current); + const targetRef = useRef(watchRef.current); + + // Snap down synchronously on render when the external ref drops below the + // last displayed value (e.g. ref reset to 0 at the start of a new turn). + // Without this, the previous turn's count would briefly flash before the + // next interval tick fires. Idempotent under StrictMode double-render. + const currentTarget = watchRef.current; + if (currentTarget < displayRef.current) { + displayRef.current = currentTarget; + targetRef.current = currentTarget; + } + + useEffect(() => { + if (intervalMs === null) return; + + // Re-sync when the interval resumes or the ref changed externally + // (e.g. ref reset to 0 at new turn start while paused). + const current = watchRef.current; + if (current !== targetRef.current) { + targetRef.current = current; + // Snap down immediately (reset), animate up + if (current < displayRef.current) { + displayRef.current = current; + setDisplayValue(current); + } + } + + const id = setInterval(() => { + const realValue = watchRef.current; + targetRef.current = realValue; + + // Snap down immediately on reset + if (realValue < displayRef.current) { + displayRef.current = realValue; + setDisplayValue(realValue); + return; + } + + const gap = realValue - displayRef.current; + if (gap <= 0) return; + + // Smooth interpolation: small gaps crawl, large gaps leap + let increment: number; + if (gap < 70) { + increment = 3; + } else if (gap <= 200) { + increment = Math.max(3, Math.round(gap * 0.2)); + } else { + increment = 50; + } + + const next = Math.min(displayRef.current + increment, realValue); + displayRef.current = next; + setDisplayValue(next); + }, intervalMs); + + return () => clearInterval(id); + }, [watchRef, intervalMs]); + + // Return the lower of state vs current ref so a freshly reset ref is + // reflected immediately, before setDisplayValue catches up next tick. + return Math.min(displayValue, currentTarget); +} diff --git a/packages/cli/src/ui/hooks/useGeminiStream.ts b/packages/cli/src/ui/hooks/useGeminiStream.ts index 9d4156159..c63b474dc 100644 --- a/packages/cli/src/ui/hooks/useGeminiStream.ts +++ b/packages/cli/src/ui/hooks/useGeminiStream.ts @@ -241,6 +241,12 @@ export const useGeminiStream = ( const processedMemoryToolsRef = useRef>(new Set()); const submitPromptOnCompleteRef = useRef<(() => Promise) | null>(null); const modelOverrideRef = useRef(undefined); + // --- Real-time token display --- + // Accumulates output character count across the whole turn (not per API call). + // Uses a ref to avoid re-renders on every text_delta. + const streamingResponseLengthRef = useRef(0); + // Tracks whether we are receiving content (↓) or waiting for API (↑). + const [isReceivingContent, setIsReceivingContent] = useState(false); const { startNewPrompt, getPromptCount, @@ -671,6 +677,9 @@ export const useGeminiStream = ( // Prevents additional output after a user initiated cancel. return ''; } + // Track output chars for real-time token estimation & mark as receiving. + streamingResponseLengthRef.current += eventValue.length; + setIsReceivingContent(true); let newGeminiMessageBuffer = currentGeminiMessageBuffer + eventValue; if ( pendingHistoryItemRef.current?.type !== 'gemini' && @@ -1138,6 +1147,14 @@ export const useGeminiStream = ( break; case ServerGeminiEventType.ToolCallRequest: toolCallRequests.push(event.value); + // Count tool call args JSON toward token estimation (matches + // Claude Code's input_json_delta handling). + try { + const argsJson = JSON.stringify(event.value.args); + streamingResponseLengthRef.current += argsJson.length; + } catch { + // Best-effort — don't block on serialization errors + } break; case ServerGeminiEventType.UserCancelled: handleUserCancelledEvent(userMessageTimestamp); @@ -1386,6 +1403,13 @@ export const useGeminiStream = ( setIsResponding(true); setInitError(null); + // Entering "requesting" phase — no content yet for this API call. + setIsReceivingContent(false); + // Reset char counter only on new user queries; tool-result continuations + // keep accumulating so the token count only goes up within a turn. + if (submitType !== SendMessageType.ToolResult) { + streamingResponseLengthRef.current = 0; + } try { // Emit user message to dual output sidecar (if enabled). @@ -1977,5 +2001,7 @@ export const useGeminiStream = ( handleApprovalModeChange, activePtyId, loopDetectionConfirmationRequest, + streamingResponseLengthRef, + isReceivingContent, }; }; diff --git a/packages/core/src/tools/agent/agent.ts b/packages/core/src/tools/agent/agent.ts index b85694df6..aef153ada 100644 --- a/packages/core/src/tools/agent/agent.ts +++ b/packages/core/src/tools/agent/agent.ts @@ -49,6 +49,7 @@ import type { AgentFinishEvent, AgentErrorEvent, AgentApprovalRequestEvent, + AgentUsageEvent, } from '../../agents/runtime/agent-events.js'; import { BuiltinAgentRegistry } from '../../subagents/builtin-agents.js'; import { createDebugLogger } from '../../utils/debugLogger.js'; @@ -514,6 +515,26 @@ class AgentToolInvocation extends BaseToolInvocation { ); }); + // Track real-time token consumption from subagent API calls. + // Each USAGE_METADATA event carries per-round usage, so we accumulate + // output tokens across rounds. We use candidatesTokenCount (output-only) + // to stay consistent with the main stream's chars/4 output-token estimate. + let accumulatedOutputTokens = 0; + this.eventEmitter.on( + AgentEventType.USAGE_METADATA, + (...args: unknown[]) => { + const event = args[0] as AgentUsageEvent; + const outputTokens = event.usage?.candidatesTokenCount ?? 0; + if (outputTokens > 0) { + accumulatedOutputTokens += outputTokens; + this.updateDisplay( + { tokenCount: accumulatedOutputTokens }, + updateOutput, + ); + } + }, + ); + // Indicate when a tool call is waiting for approval this.eventEmitter.on( AgentEventType.TOOL_WAITING_APPROVAL, diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index dd8d68e9b..c807cf05c 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -499,6 +499,8 @@ export interface AgentResultDisplay { terminateReason?: string; result?: string; executionSummary?: AgentStatsSummary; + /** Real-time output-token count during execution, accumulated across subagent rounds. */ + tokenCount?: number; // If the subagent is awaiting approval for a tool call, // this contains the confirmation details for inline UI rendering.