Split token display for OpenAI (#46829)

This feature cost $15.

Up -> Tokens we're sending to the model
Down -> Tokens we've received from the model.

<img width="377" height="69" alt="Screenshot 2026-01-14 at 12 31 01 PM"
src="https://github.com/user-attachments/assets/fc15824f-de5d-466b-8cc1-329f3c1940bb"
/>



Release Notes:

- Changed the display of tokens for OpenAI models to reflect the
input/output limits.

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Mikayla Maki 2026-01-14 14:29:56 -08:00 committed by GitHub
parent b5b13d1f4d
commit 9c5fc6ecbd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 137 additions and 37 deletions

View file

@ -904,6 +904,7 @@ impl PlanEntry {
pub struct TokenUsage {
pub max_tokens: u64,
pub used_tokens: u64,
pub input_tokens: u64,
pub output_tokens: u64,
}

View file

@ -2516,6 +2516,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 32_000 + 16_000,
max_tokens: 1_000_000,
input_tokens: 32_000,
output_tokens: 16_000,
})
);
@ -2576,6 +2577,7 @@ async fn test_truncate_first_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 40_000 + 20_000,
max_tokens: 1_000_000,
input_tokens: 40_000,
output_tokens: 20_000,
})
);
@ -2625,6 +2627,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 32_000 + 16_000,
max_tokens: 1_000_000,
input_tokens: 32_000,
output_tokens: 16_000,
})
);
@ -2680,6 +2683,7 @@ async fn test_truncate_second_message(cx: &mut TestAppContext) {
Some(acp_thread::TokenUsage {
used_tokens: 40_000 + 20_000,
max_tokens: 1_000_000,
input_tokens: 40_000,
output_tokens: 20_000,
})
);

View file

@ -1291,6 +1291,7 @@ impl Thread {
Some(acp_thread::TokenUsage {
max_tokens: model.max_token_count_for_mode(self.completion_mode.into()),
used_tokens: usage.total_tokens(),
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
})
}

View file

@ -79,10 +79,9 @@ use crate::{
ToggleBurnMode, ToggleProfileSelector,
};
/// Maximum number of lines to show for a collapsed terminal command preview.
const MAX_COLLAPSED_LINES: usize = 3;
const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(1);
const TOKEN_THRESHOLD: u64 = 1;
const STOPWATCH_THRESHOLD: Duration = Duration::from_secs(30);
const TOKEN_THRESHOLD: u64 = 250;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum ThreadFeedback {
@ -6058,46 +6057,118 @@ impl AcpThreadView {
.is_some_and(|model| model.provider_id() == language_model::ZED_CLOUD_PROVIDER_ID)
}
fn supports_split_token_display(&self, cx: &App) -> bool {
self.as_native_thread(cx)
.and_then(|thread| thread.read(cx).model())
.is_some_and(|model| model.supports_split_token_display())
}
fn render_token_usage(&self, cx: &mut Context<Self>) -> Option<Div> {
let thread = self.thread()?.read(cx);
let usage = thread.token_usage()?;
let is_generating = thread.status() != ThreadStatus::Idle;
let show_split = self.supports_split_token_display(cx);
let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
let separator_color = Color::Custom(cx.theme().colors().text_muted.opacity(0.5));
let token_label = |text: String, animation_id: &'static str| {
Label::new(text)
.size(LabelSize::Small)
.color(Color::Muted)
.map(|label| {
if is_generating {
label
.with_animation(
animation_id,
Animation::new(Duration::from_secs(2))
.repeat()
.with_easing(pulsating_between(0.3, 0.8)),
|label, delta| label.alpha(delta),
)
.into_any()
} else {
label.into_any_element()
}
})
};
Some(
h_flex()
.flex_shrink_0()
.gap_0p5()
.mr_1p5()
.child(
Label::new(used)
.size(LabelSize::Small)
.color(Color::Muted)
.map(|label| {
if is_generating {
label
.with_animation(
"used-tokens-label",
Animation::new(Duration::from_secs(2))
.repeat()
.with_easing(pulsating_between(0.3, 0.8)),
|label, delta| label.alpha(delta),
)
.into_any()
} else {
label.into_any_element()
}
}),
)
.child(
Label::new("/")
.size(LabelSize::Small)
.color(Color::Custom(cx.theme().colors().text_muted.opacity(0.5))),
)
.child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
)
if show_split {
let max_output_tokens = self
.as_native_thread(cx)
.and_then(|thread| thread.read(cx).model())
.and_then(|model| model.max_output_tokens())
.unwrap_or(0);
let input = crate::text_thread_editor::humanize_token_count(usage.input_tokens);
let input_max = crate::text_thread_editor::humanize_token_count(
usage.max_tokens.saturating_sub(max_output_tokens),
);
let output = crate::text_thread_editor::humanize_token_count(usage.output_tokens);
let output_max = crate::text_thread_editor::humanize_token_count(max_output_tokens);
Some(
h_flex()
.flex_shrink_0()
.gap_1()
.mr_1p5()
.child(
h_flex()
.gap_0p5()
.child(
Icon::new(IconName::ArrowUp)
.size(IconSize::XSmall)
.color(Color::Muted),
)
.child(token_label(input, "input-tokens-label"))
.child(
Label::new("/")
.size(LabelSize::Small)
.color(separator_color),
)
.child(
Label::new(input_max)
.size(LabelSize::Small)
.color(Color::Muted),
),
)
.child(
h_flex()
.gap_0p5()
.child(
Icon::new(IconName::ArrowDown)
.size(IconSize::XSmall)
.color(Color::Muted),
)
.child(token_label(output, "output-tokens-label"))
.child(
Label::new("/")
.size(LabelSize::Small)
.color(separator_color),
)
.child(
Label::new(output_max)
.size(LabelSize::Small)
.color(Color::Muted),
),
),
)
} else {
let used = crate::text_thread_editor::humanize_token_count(usage.used_tokens);
let max = crate::text_thread_editor::humanize_token_count(usage.max_tokens);
Some(
h_flex()
.flex_shrink_0()
.gap_0p5()
.mr_1p5()
.child(token_label(used, "used-tokens-label"))
.child(
Label::new("/")
.size(LabelSize::Small)
.color(separator_color),
)
.child(Label::new(max).size(LabelSize::Small).color(Color::Muted)),
)
}
}
fn toggle_burn_mode(

View file

@ -617,6 +617,12 @@ pub trait LanguageModel: Send + Sync {
false
}
/// Returns whether this model/provider reports accurate split input/output token counts.
/// When true, the UI may show separate input/output token indicators.
fn supports_split_token_display(&self) -> bool {
false
}
fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
LanguageModelToolSchemaFormat::JsonSchema
}

View file

@ -624,6 +624,11 @@ impl LanguageModel for CloudLanguageModel {
self.model.supports_max_mode
}
fn supports_split_token_display(&self) -> bool {
use cloud_llm_client::LanguageModelProvider::*;
matches!(self.model.provider, OpenAi)
}
fn telemetry_id(&self) -> String {
format!("zed.dev/{}", self.model.id)
}
@ -652,6 +657,10 @@ impl LanguageModel for CloudLanguageModel {
.map(|max_token_count| max_token_count as u64)
}
fn max_output_tokens(&self) -> Option<u64> {
Some(self.model.max_output_tokens as u64)
}
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
match &self.model.provider {
cloud_llm_client::LanguageModelProvider::Anthropic => {

View file

@ -329,6 +329,10 @@ impl LanguageModel for OpenAiLanguageModel {
}
}
fn supports_split_token_display(&self) -> bool {
true
}
fn telemetry_id(&self) -> String {
format!("openai/{}", self.model.id())
}

View file

@ -319,6 +319,10 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
}
}
fn supports_split_token_display(&self) -> bool {
true
}
fn telemetry_id(&self) -> String {
format!("openai/{}", self.model.name)
}