Split token display for OpenAI (#46829)

This feature cost $15. Up -> Tokens we're sending to the model Down -> Tokens we've received from the model. <img width="377" height="69" alt="Screenshot 2026-01-14 at 12 31 01 PM" src="https://github.com/user-attachments/assets/fc15824f-de5d-466b-8cc1-329f3c1940bb" /> Release Notes: - Changed the display of tokens for OpenAI models to reflect the input/output limits. --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-23 21:05:08 +00:00 · 2026-01-14 14:29:56 -08:00 · 2026-01-14 14:29:56 -08:00 · 9c5fc6ecbd
commit 9c5fc6ecbd
parent b5b13d1f4d
8 changed files with 137 additions and 37 deletions
--- a/crates/acp_thread/src/acp_thread.rs
+++ b/crates/acp_thread/src/acp_thread.rs
@ -904,6 +904,7 @@ impl PlanEntry {
 pub struct TokenUsage {
    pub max_tokens: u64,
    pub used_tokens: u64,
+    pub input_tokens: u64,
    pub output_tokens: u64,
 }

--- a/crates/agent/src/tests/mod.rs
+++ b/crates/agent/src/tests/mod.rs
@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
            Some(acp_thread::TokenUsage {
                used_tokens: 32_000 + 16_000,
                max_tokens: 1_000_000,
+                input_tokens: 32_000,
                output_tokens: 16_000,
            })
        );
@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
            Some(acp_thread::TokenUsage {
                used_tokens: 40_000 + 20_000,
                max_tokens: 1_000_000,
+                input_tokens: 40_000,
                output_tokens: 20_000,
            })
        );
@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
                Some(acp_thread::TokenUsage {
                    used_tokens: 32_000 + 16_000,
                    max_tokens: 1_000_000,
+                    input_tokens: 32_000,
                    output_tokens: 16_000,
                })
            );
@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
            Some(acp_thread::TokenUsage {
                used_tokens: 40_000 + 20_000,
                max_tokens: 1_000_000,
+                input_tokens: 40_000,
                output_tokens: 20_000,
            })
        );
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -1291,6 +1291,7 @@ impl Thread {
        Some(acp_thread::TokenUsage {
            max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
            used_tokens: usage.total_tokens(),
+            input_tokens: usage.input_tokens,
            output_tokens: usage.output_tokens,
        })
    }
--- a/crates/agent_ui/src/acp/thread_view.rs
+++ b/crates/agent_ui/src/acp/thread_view.rs
@ -79,10 +79,9 @@ use crate::{
    ToggleBurnMode, ToggleProfileSelector,
 };

-/// Maximum number of lines to show for a collapsed terminal command preview.
 const MAX_COLLAPSED_LINES: usize = 3;
-const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1);
-const TOKEN_THRESHOLD: u64 = 1;
+const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);
+const TOKEN_THRESHOLD: u64 = 250;

 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum ThreadFeedback {
@ -6058,46 +6057,118 @@ impl AcpThreadView {
            .is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID)
    }

+    fn supports_split_token_display(&self, cx: &App) -> bool {
+        self.as_native_thread(cx)
+            .and_then(|thread| thread.read(cx).model())
+            .is_some_and(|model| model.supports_split_token_display())
+    }
+
    fn render_token_usage(&self, cx: &mut Context<Self>) -> Option<Div> {
        let thread = self.thread()?.read(cx);
        let usage = thread.token_usage()?;
        let is_generating = thread.status() != ThreadStatus::Idle;
+        let show_split = self.supports_split_token_display(cx);

-        let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
-        let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+        let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5));
+        let token_label = |text: String, animation_id: &'static str| {
+            Label::new(text)
+                .size(LabelSize::Small)
+                .color(Color::Muted)
+                .map(|label| {
+                    if is_generating {
+                        label
+                            .with_animation(
+                                animation_id,
+                                Animation::new(Duration::from_secs(2))
+                                    .repeat()
+                                    .with_easing(pulsating_between(0.3, 0.8)),
+                                |label, delta| label.alpha(delta),
+                            )
+                            .into_any()
+                    } else {
+                        label.into_any_element()
+                    }
+                })
+        };

-        Some(
-            h_flex()
-                .flex_shrink_0()
-                .gap_0p5()
-                .mr_1p5()
-                .child(
-                    Label::new(used)
-                        .size(LabelSize::Small)
-                        .color(Color::Muted)
-                        .map(|label| {
-                            if is_generating {
-                                label
-                                    .with_animation(
-                                        "used-tokens-label",
-                                        Animation::new(Duration::from_secs(2))
-                                            .repeat()
-                                            .with_easing(pulsating_between(0.3, 0.8)),
-                                        |label, delta| label.alpha(delta),
-                                    )
-                                    .into_any()
-                            } else {
-                                label.into_any_element()
-                            }
-                        }),
-                )
-                .child(
-                    Label::new("/")
-                        .size(LabelSize::Small)
-                        .color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))),
-                )
-                .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
-        )
+        if show_split {
+            let max_output_tokens = self
+                .as_native_thread(cx)
+                .and_then(|thread| thread.read(cx).model())
+                .and_then(|model| model.max_output_tokens())
+                .unwrap_or(0);
+
+            let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens);
+            let input_max = crate::text_thread_editor::humanize_token_count(
+                usage.max_tokens.saturating_sub(max_output_tokens),
+            );
+            let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens);
+            let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_1()
+                    .mr_1p5()
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowUp)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(input, "input-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(input_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    )
+                    .child(
+                        h_flex()
+                            .gap_0p5()
+                            .child(
+                                Icon::new(IconName::ArrowDown)
+                                    .size(IconSize::XSmall)
+                                    .color(Color::Muted),
+                            )
+                            .child(token_label(output, "output-tokens-label"))
+                            .child(
+                                Label::new("/")
+                                    .size(LabelSize::Small)
+                                    .color(separator_color),
+                            )
+                            .child(
+                                Label::new(output_max)
+                                    .size(LabelSize::Small)
+                                    .color(Color::Muted),
+                            ),
+                    ),
+            )
+        } else {
+            let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
+            let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
+
+            Some(
+                h_flex()
+                    .flex_shrink_0()
+                    .gap_0p5()
+                    .mr_1p5()
+                    .child(token_label(used, "used-tokens-label"))
+                    .child(
+                        Label::new("/")
+                            .size(LabelSize::Small)
+                            .color(separator_color),
+                    )
+                    .child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
+            )
+        }
    }

    fn toggle_burn_mode(
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync {
        false
    }

+    /// Returns whether this model/provider reports accurate split input/output token counts.
+    /// When true, the UI may show separate input/output token indicators.
+    fn supports_split_token_display(&self) -> bool {
+        false
+    }
+
    fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
        LanguageModelToolSchemaFormat::JsonSchema
    }
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel {
        self.model.supports_max_mode
    }

+    fn supports_split_token_display(&self) -> bool {
+        use cloud_llm_client::LanguageModelProvider::*;
+        matches!(self.model.provider, OpenAi)
+    }
+
    fn telemetry_id(&self) -> String {
        format!("zed.dev/{}", self.model.id)
    }
@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel {
            .map(|max_token_count| max_token_count as u64)
    }

+    fn max_output_tokens(&self) -> Option<u64> {
+        Some(self.model.max_output_tokens as u64)
+    }
+
    fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
        match &self.model.provider {
            cloud_llm_client::LanguageModelProvider::Anthropic => {
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel {
        }
    }

+    fn supports_split_token_display(&self) -> bool {
+        true
+    }
+
    fn telemetry_id(&self) -> String {
        format!("openai/{}", self.model.id())
    }
--- a/crates/language_models/src/provider/open_ai_compatible.rs
+++ b/crates/language_models/src/provider/open_ai_compatible.rs
@ -319,6 +319,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
        }
    }

+    fn supports_split_token_display(&self) -> bool {
+        true
+    }
+
    fn telemetry_id(&self) -> String {
        format!("openai/{}", self.model.name)
    }