feat(providers): strip chain-of-thought markers from custom provider output (#8635)
Some checks are pending
Canary / Prepare Version (push) Waiting to run
Canary / build-cli (push) Blocked by required conditions
Canary / Upload Install Script (push) Blocked by required conditions
Canary / bundle-desktop (push) Blocked by required conditions
Canary / bundle-desktop-intel (push) Blocked by required conditions
Canary / bundle-desktop-linux (push) Blocked by required conditions
Canary / bundle-desktop-windows (push) Blocked by required conditions
Canary / bundle-desktop-windows-cuda (push) Blocked by required conditions
Canary / Release (push) Blocked by required conditions
Unused Dependencies / machete (push) Waiting to run
CI / changes (push) Waiting to run
CI / Check Rust Code Format (push) Blocked by required conditions
CI / Build and Test Rust Project (push) Blocked by required conditions
CI / Build Rust Project on Windows (push) Waiting to run
CI / Check MSRV (push) Blocked by required conditions
CI / Lint Rust Code (push) Blocked by required conditions
CI / Check Generated Schemas are Up-to-Date (push) Blocked by required conditions
CI / Test and Lint Electron Desktop App (push) Blocked by required conditions
Live Provider Tests / check-fork (push) Waiting to run
Live Provider Tests / changes (push) Blocked by required conditions
Live Provider Tests / Build Binary (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (push) Blocked by required conditions
Live Provider Tests / Smoke Tests (Code Execution) (push) Blocked by required conditions
Live Provider Tests / Compaction Tests (push) Blocked by required conditions
Live Provider Tests / goose server HTTP integration tests (push) Blocked by required conditions
Publish Docker Image / docker (push) Waiting to run
Scorecard supply-chain security / Scorecard analysis (push) Waiting to run

Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
This commit is contained in:
Matt Van Horn 2026-05-13 01:35:23 -07:00 committed by GitHub
parent f59c45b7f8
commit eeeee3ff10
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 762 additions and 4 deletions

View file

@ -30,6 +30,276 @@ use std::pin::Pin;
use std::sync::LazyLock;
use std::sync::Mutex;
#[derive(Debug, Default, PartialEq, Eq)]
pub struct FilterOut {
pub content: String,
pub thinking: String,
}
pub struct ThinkFilter {
buffer: String,
inside_think: bool,
think_depth: usize,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum ThinkTag {
Open,
Close,
// `<think/>` is XML-legal but carries no reasoning payload. Treat it as a
// no-op so we don't flip `inside_think` forever and swallow the rest of
// the stream into the thinking bucket.
SelfClosing,
}
enum BufferEvent {
Tag {
pos: usize,
end: usize,
kind: ThinkTag,
},
Partial(usize),
}
impl ThinkFilter {
pub fn new() -> Self {
Self {
buffer: String::new(),
inside_think: false,
think_depth: 0,
}
}
pub fn push(&mut self, chunk: &str) -> FilterOut {
self.buffer.push_str(chunk);
self.process_buffer()
}
pub fn finish(mut self) -> FilterOut {
let mut out = self.process_buffer();
if !self.buffer.is_empty() {
if self.inside_think {
out.thinking.push_str(&self.buffer);
} else {
out.content.push_str(&self.buffer);
}
self.buffer.clear();
}
out
}
fn process_buffer(&mut self) -> FilterOut {
let mut out = FilterOut::default();
loop {
match next_buffer_event(&self.buffer, self.inside_think) {
Some(BufferEvent::Tag { pos, end, kind }) => {
if pos > 0 {
let prefix = self.buffer.get(..pos).unwrap_or_default().to_string();
if self.inside_think {
out.thinking.push_str(&prefix);
} else {
out.content.push_str(&prefix);
}
}
self.buffer.drain(..end);
match kind {
ThinkTag::Open => {
self.think_depth += 1;
self.inside_think = true;
}
ThinkTag::Close => {
self.think_depth = self.think_depth.saturating_sub(1);
self.inside_think = self.think_depth > 0;
}
ThinkTag::SelfClosing => {}
}
}
Some(BufferEvent::Partial(pos)) => {
if pos > 0 {
let prefix = self.buffer.get(..pos).unwrap_or_default().to_string();
if self.inside_think {
out.thinking.push_str(&prefix);
} else {
out.content.push_str(&prefix);
}
self.buffer.drain(..pos);
}
break;
}
None => {
if !self.buffer.is_empty() {
if self.inside_think {
out.thinking.push_str(&self.buffer);
} else {
out.content.push_str(&self.buffer);
}
self.buffer.clear();
}
break;
}
}
}
out
}
}
impl Default for ThinkFilter {
fn default() -> Self {
Self::new()
}
}
pub fn split_think_blocks(text: &str) -> (String, String) {
let mut filter = ThinkFilter::new();
let mut out = filter.push(text);
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
(out.content, out.thinking)
}
fn next_buffer_event(buffer: &str, inside_think: bool) -> Option<BufferEvent> {
let mut search_from = 0;
while let Some(rel_pos) = buffer.get(search_from..).and_then(|rest| rest.find('<')) {
let pos = search_from + rel_pos;
let suffix = buffer.get(pos..).unwrap_or_default();
if let Some((kind, end)) = parse_think_tag(buffer, pos) {
if inside_think || matches!(kind, ThinkTag::Open | ThinkTag::SelfClosing) {
return Some(BufferEvent::Tag { pos, end, kind });
}
} else if !contains_unquoted_gt(suffix) && is_possible_partial_think_tag(suffix) {
return Some(BufferEvent::Partial(pos));
}
search_from = pos + 1;
}
None
}
fn parse_think_tag(buffer: &str, start: usize) -> Option<(ThinkTag, usize)> {
let bytes = buffer.as_bytes();
if bytes.get(start) != Some(&b'<') {
return None;
}
let mut idx = start + 1;
let is_close = if bytes.get(idx) == Some(&b'/') {
idx += 1;
true
} else {
false
};
let name_start = idx;
while bytes.get(idx).is_some_and(u8::is_ascii_alphabetic) {
idx += 1;
}
if idx == name_start {
return None;
}
let name = buffer.get(name_start..idx).unwrap_or_default();
let is_think = name.eq_ignore_ascii_case("think") || name.eq_ignore_ascii_case("thinking");
if !is_think {
return None;
}
if is_close {
while bytes.get(idx).is_some_and(u8::is_ascii_whitespace) {
idx += 1;
}
if bytes.get(idx) == Some(&b'>') {
return Some((ThinkTag::Close, idx + 1));
}
return None;
}
// Require a real tag boundary immediately after the name (>, /, or whitespace).
// Without this, `<thinking-mode>` or `<thinking123>` would be classified as a
// think tag and stripped from normal content.
let valid_open_boundary = match bytes.get(idx) {
Some(&b) => b == b'>' || b == b'/' || b.is_ascii_whitespace(),
None => false,
};
if !valid_open_boundary {
return None;
}
let mut quote: Option<u8> = None;
let mut last_non_ws: Option<u8> = None;
while let Some(&byte) = bytes.get(idx) {
match quote {
Some(quote_byte) => {
if byte == quote_byte {
quote = None;
}
}
None if matches!(byte, b'"' | b'\'') => {
quote = Some(byte);
last_non_ws = Some(byte);
}
None if byte == b'>' => {
let kind = if last_non_ws == Some(b'/') {
ThinkTag::SelfClosing
} else {
ThinkTag::Open
};
return Some((kind, idx + 1));
}
None if !byte.is_ascii_whitespace() => {
last_non_ws = Some(byte);
}
None => {}
}
idx += 1;
}
None
}
fn is_possible_partial_think_tag(suffix: &str) -> bool {
if contains_unquoted_gt(suffix) {
return false;
}
// Allow a trailing `/` so a chunk boundary that lands between `<think` and
// `>` in a self-closing `<think/>` (or `<thinking/>`) is still recognised
// as a partial tag and buffered until the `>` arrives in the next chunk.
static OPEN_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?is)^<(?:t(?:h(?:i(?:n(?:k(?:i(?:n(?:g)?)?)?)?)?)?)?)(?:\s.*|/)?$").unwrap()
});
static CLOSE_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?is)^</(?:t(?:h(?:i(?:n(?:k(?:i(?:n(?:g)?)?)?)?)?)?)?)(?:\s*)?$").unwrap()
});
OPEN_RE.is_match(suffix) || CLOSE_RE.is_match(suffix)
}
fn contains_unquoted_gt(text: &str) -> bool {
let mut quote: Option<u8> = None;
for &byte in text.as_bytes() {
match quote {
Some(quote_byte) => {
if byte == quote_byte {
quote = None;
}
}
None if matches!(byte, b'"' | b'\'') => quote = Some(byte),
None if byte == b'>' => return true,
None => {}
}
}
false
}
fn strip_xml_tags(text: &str) -> String {
static BLOCK_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?s)<([a-zA-Z][a-zA-Z0-9_]*)[^>]*>.*?</[a-zA-Z][a-zA-Z0-9_]*>").unwrap()
@ -968,6 +1238,265 @@ mod tests {
);
}
#[test]
fn test_split_think_blocks_extracts_inline_reasoning() {
assert_eq!(
split_think_blocks("<think>x</think>y"),
("y".to_string(), "x".to_string())
);
}
#[test]
fn test_split_think_blocks_is_case_insensitive() {
assert_eq!(
split_think_blocks("<THINK>x</think>y"),
("y".to_string(), "x".to_string())
);
}
#[test]
fn test_split_think_blocks_handles_multiple_blocks() {
assert_eq!(
split_think_blocks("<think>a</think>b<think>c</think>d"),
("bd".to_string(), "ac".to_string())
);
}
#[test]
fn test_split_think_blocks_without_tags() {
assert_eq!(
split_think_blocks("plain content"),
("plain content".to_string(), String::new())
);
}
#[test]
fn test_split_think_blocks_handles_attributes() {
assert_eq!(
split_think_blocks(r#"<think class="x">a</think>b"#),
("b".to_string(), "a".to_string())
);
}
#[test]
fn test_split_think_blocks_handles_quoted_gt_in_self_closing_attributes() {
for input in [
r#"<think data="a>b"/>Visible"#,
"<think data='a>b'/>Visible",
] {
assert_eq!(
split_think_blocks(input),
("Visible".to_string(), String::new()),
"mismatch for {input:?}"
);
}
}
#[test]
fn test_split_think_blocks_handles_quoted_gt_in_open_attributes() {
assert_eq!(
split_think_blocks(r#"<think data="a>b">Hidden</think>Visible"#),
("Visible".to_string(), "Hidden".to_string())
);
}
#[test]
fn test_split_think_blocks_handles_thinking_variant() {
assert_eq!(
split_think_blocks("<thinking>a</thinking>b"),
("b".to_string(), "a".to_string())
);
}
#[test]
fn test_think_filter_streaming_across_partial_tags() {
let mut filter = ThinkFilter::new();
let mut out = FilterOut::default();
for chunk in ["<thi", "nk>x</thi", "nk>y"] {
let partial = filter.push(chunk);
out.content.push_str(&partial.content);
out.thinking.push_str(&partial.thinking);
}
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, "y");
assert_eq!(out.thinking, "x");
}
#[test]
fn test_think_filter_preserves_non_think_tags() {
let mut filter = ThinkFilter::new();
let mut out = filter.push("<table>");
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, "<table>");
assert!(out.thinking.is_empty());
}
#[test]
fn test_think_filter_finish_treats_unterminated_think_as_thinking() {
let mut filter = ThinkFilter::new();
let mut out = filter.push("<think>unfinished");
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert!(out.content.is_empty());
assert_eq!(out.thinking, "unfinished");
}
#[test]
fn test_think_filter_preserves_tags_with_think_prefix() {
for input in [
"<thinking-mode>hello</thinking-mode>",
"<thinking123>payload</thinking123>",
"<thinker>note</thinker>",
] {
let mut filter = ThinkFilter::new();
let mut out = filter.push(input);
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, input, "content mismatch for {input:?}");
assert!(
out.thinking.is_empty(),
"unexpected thinking for {input:?}: {:?}",
out.thinking
);
}
}
#[test]
fn test_think_filter_accepts_think_with_attributes() {
let mut filter = ThinkFilter::new();
let mut out = filter.push("<think data-source=\"x\">hidden</think>visible");
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, "visible");
assert_eq!(out.thinking, "hidden");
}
#[test]
fn test_think_filter_treats_self_closing_as_noop() {
// `<think/>` carries no reasoning payload. It must not flip the filter
// into "inside_think" mode, and the tag itself must not leak into
// visible content.
for input in [
"before <think/> after",
"before <think /> after",
"before <thinking/> after",
"before <think data-source=\"x\"/> after",
] {
let mut filter = ThinkFilter::new();
let mut out = filter.push(input);
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(
out.content, "before after",
"content mismatch for {input:?}"
);
assert!(
out.thinking.is_empty(),
"unexpected thinking for {input:?}: {:?}",
out.thinking
);
}
}
#[test]
fn test_think_filter_self_closing_does_not_swallow_following_content() {
// Regression: a self-closing `<think/>` used to be classified as an
// Open tag, which incremented think_depth and routed everything after
// it into the thinking bucket for the rest of the stream.
let mut filter = ThinkFilter::new();
let mut out = filter.push("<think/>visible chunk 1");
let final_out = filter.push("visible chunk 2");
let tail_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
out.content.push_str(&tail_out.content);
out.thinking.push_str(&tail_out.thinking);
assert_eq!(out.content, "visible chunk 1visible chunk 2");
assert!(out.thinking.is_empty());
}
#[test]
fn test_think_filter_streaming_across_self_closing_boundary() {
// Regression: a chunk boundary between `<think` and `>` in a
// self-closing `<think/>` used to fall out of the partial-tag regex
// (which only allowed `<think<ws>...`), so the `<think/` prefix leaked
// into visible content before the `>` arrived.
for (a, b) in [
("before <think/", "> after"),
("before <thinking/", "> after"),
("head <think ", "/> tail"),
] {
let mut filter = ThinkFilter::new();
let mut out = filter.push(a);
let second = filter.push(b);
let final_out = filter.finish();
out.content.push_str(&second.content);
out.content.push_str(&final_out.content);
out.thinking.push_str(&second.thinking);
out.thinking.push_str(&final_out.thinking);
assert!(
!out.content.contains('<'),
"partial tag leaked into content for ({a:?}, {b:?}): {:?}",
out.content
);
assert!(
out.thinking.is_empty(),
"unexpected thinking for ({a:?}, {b:?}): {:?}",
out.thinking
);
}
}
#[test]
fn test_think_filter_streaming_across_quoted_attribute_boundary() {
let mut filter = ThinkFilter::new();
let mut out = filter.push(r#"<think data="a>b"#);
assert!(out.content.is_empty());
assert!(out.thinking.is_empty());
let second = filter.push(r#""/>Visible"#);
let final_out = filter.finish();
out.content.push_str(&second.content);
out.content.push_str(&final_out.content);
out.thinking.push_str(&second.thinking);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, "Visible");
assert!(out.thinking.is_empty());
}
#[test]
fn test_think_filter_self_closing_inside_open_block_closes_nothing() {
// `<think/>` inside an open `<think>` block is still a no-op: depth
// should stay at 1 until the real `</think>` arrives.
let mut filter = ThinkFilter::new();
let mut out = filter.push("before <think>hidden1 <think/> hidden2</think>visible");
let final_out = filter.finish();
out.content.push_str(&final_out.content);
out.thinking.push_str(&final_out.thinking);
assert_eq!(out.content, "before visible");
assert_eq!(out.thinking, "hidden1 hidden2");
}
#[test]
fn test_extract_short_title() {
assert_eq!(extract_short_title("List files"), "List files");

View file

@ -1,7 +1,7 @@
use crate::conversation::message::{Message, MessageContent, ProviderMetadata};
use crate::mcp_utils::extract_text_from_resource;
use crate::model::ModelConfig;
use crate::providers::base::{ProviderUsage, Usage};
use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usage};
use crate::providers::errors::ProviderError;
use crate::providers::utils::{
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
@ -518,9 +518,11 @@ pub fn response_to_message(response: &Value) -> anyhow::Result<Message> {
let reasoning_value = original
.get("reasoning_content")
.or_else(|| original.get("reasoning"));
let mut has_structured_thinking = false;
if let Some(reasoning_content) = reasoning_value {
if let Some(reasoning_str) = reasoning_content.as_str() {
if !reasoning_str.is_empty() {
has_structured_thinking = true;
content.push(MessageContent::thinking(reasoning_str, ""));
}
}
@ -528,7 +530,15 @@ pub fn response_to_message(response: &Value) -> anyhow::Result<Message> {
if let Some(text) = original.get("content") {
if let Some(text_str) = text.as_str() {
content.push(MessageContent::text(text_str));
let (cleaned, inline_thinking) = split_think_blocks(text_str);
if !has_structured_thinking && !inline_thinking.is_empty() {
content.push(MessageContent::thinking(inline_thinking, ""));
}
if !cleaned.is_empty() {
content.push(MessageContent::text(cleaned));
}
}
}
@ -822,7 +832,13 @@ where
let mut accumulated_reasoning: Vec<Value> = Vec::new();
let mut accumulated_reasoning_content = String::new();
let mut think_filter = ThinkFilter::new();
let mut saw_structured_reasoning = false;
let mut last_signature: Option<String> = None;
// Buffer inline <think>...</think> content until we know whether structured
// reasoning will arrive. Emitting it immediately and then receiving
// reasoning_content in a later chunk would produce duplicated reasoning.
let mut pending_inline_thinking = String::new();
'outer: while let Some(response) = stream.next().await {
let response_str = response?;
@ -846,6 +862,10 @@ where
}
if let Some(rc) = chunk.choices[0].delta.reasoning_text() {
accumulated_reasoning_content.push_str(rc);
if !rc.is_empty() {
saw_structured_reasoning = true;
pending_inline_thinking.clear();
}
}
}
@ -888,6 +908,10 @@ where
}
if let Some(rc) = tool_chunk.choices[0].delta.reasoning_text() {
accumulated_reasoning_content.push_str(rc);
if !rc.is_empty() {
saw_structured_reasoning = true;
pending_inline_thinking.clear();
}
}
if let Some(delta_tool_calls) = &tool_chunk.choices[0].delta.tool_calls {
for delta_call in delta_tool_calls {
@ -928,6 +952,37 @@ where
None
};
let filtered = think_filter.push("");
let mut flush_thinking = String::new();
if !saw_structured_reasoning {
flush_thinking.push_str(&pending_inline_thinking);
flush_thinking.push_str(&filtered.thinking);
}
pending_inline_thinking.clear();
if !filtered.content.is_empty() || !flush_thinking.is_empty() {
let mut filtered_contents = Vec::new();
if !filtered.content.is_empty() {
filtered_contents.push(MessageContent::text(filtered.content));
}
if !flush_thinking.is_empty() {
filtered_contents.push(MessageContent::thinking(flush_thinking, ""));
}
if !filtered_contents.is_empty() {
let mut msg = Message::new(
Role::Assistant,
chrono::Utc::now().timestamp(),
filtered_contents,
);
if let Some(id) = chunk.id.clone() {
msg = msg.with_id(id);
}
yield (Some(msg), None);
}
}
let mut contents = Vec::new();
if !accumulated_reasoning_content.is_empty() {
contents.push(MessageContent::thinking(&accumulated_reasoning_content, ""));
@ -1009,8 +1064,14 @@ where
}
if let Some(text) = text_content {
if !text.is_empty() {
content.push(MessageContent::text(&text));
let filtered = think_filter.push(&text);
if !saw_structured_reasoning && !filtered.thinking.is_empty() {
pending_inline_thinking.push_str(&filtered.thinking);
}
if !filtered.content.is_empty() {
content.push(MessageContent::text(filtered.content));
}
}
@ -1040,6 +1101,35 @@ where
yield (None, usage)
}
}
let filtered = think_filter.finish();
let mut trailing_thinking = String::new();
if !saw_structured_reasoning {
trailing_thinking.push_str(&pending_inline_thinking);
trailing_thinking.push_str(&filtered.thinking);
}
pending_inline_thinking.clear();
if !filtered.content.is_empty() || !trailing_thinking.is_empty() {
let mut content = Vec::new();
if !filtered.content.is_empty() {
content.push(MessageContent::text(filtered.content));
}
if !trailing_thinking.is_empty() {
content.push(MessageContent::thinking(trailing_thinking, ""));
}
yield (
Some(Message::new(
Role::Assistant,
chrono::Utc::now().timestamp(),
content,
)),
None,
)
}
}
}
@ -2329,6 +2419,85 @@ data: [DONE]"#;
panic!("Expected tool call message with nested extra_content metadata");
}
#[tokio::test]
async fn test_streaming_response_extracts_inline_think_blocks() -> anyhow::Result<()> {
let response_lines = concat!(
"data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"<thi\"},\"index\":0,\"finish_reason\":null}]}\n",
"data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"nk>x</thi\"},\"index\":0,\"finish_reason\":null}]}\n",
"data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"nk>y\"},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
"data: [DONE]\n"
);
let response_stream =
tokio_stream::iter(response_lines.lines().map(|line| Ok(line.to_string())));
let mut messages = std::pin::pin!(response_to_streaming_message(response_stream));
let mut text = String::new();
let mut thinking = String::new();
while let Some(result) = messages.next().await {
let (message, _) = result?;
if let Some(message) = message {
for item in message.content {
match item {
MessageContent::Text(text_content) => text.push_str(&text_content.text),
MessageContent::Thinking(thinking_content) => {
thinking.push_str(&thinking_content.thinking)
}
_ => {}
}
}
}
}
assert_eq!(text, "y");
assert_eq!(thinking, "x");
Ok(())
}
#[tokio::test]
async fn test_streaming_suppresses_inline_think_when_structured_reasoning_follows(
) -> anyhow::Result<()> {
// Inline <think>...</think> arrives in an early content chunk, then
// reasoning_content arrives in a later chunk. The inline thinking
// should be discarded in favor of the structured reasoning so users
// do not get duplicated reasoning output.
let response_lines = concat!(
"data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"<think>inline reasoning</think>Hi\"},\"index\":0,\"finish_reason\":null}]}\n",
"data: {\"id\":\"chunk-2\",\"choices\":[{\"delta\":{\"reasoning_content\":\"structured reasoning\"},\"index\":0,\"finish_reason\":null}]}\n",
"data: {\"id\":\"chunk-3\",\"choices\":[{\"delta\":{\"content\":\" there\"},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
"data: [DONE]\n"
);
let response_stream =
tokio_stream::iter(response_lines.lines().map(|line| Ok(line.to_string())));
let mut messages = std::pin::pin!(response_to_streaming_message(response_stream));
let mut text = String::new();
let mut thinking = String::new();
while let Some(result) = messages.next().await {
let (message, _) = result?;
if let Some(message) = message {
for item in message.content {
match item {
MessageContent::Text(text_content) => text.push_str(&text_content.text),
MessageContent::Thinking(thinking_content) => {
thinking.push_str(&thinking_content.thinking)
}
_ => {}
}
}
}
}
assert_eq!(text, "Hi there");
assert_eq!(thinking, "structured reasoning");
Ok(())
}
#[test]
fn test_response_to_message_with_reasoning_content() -> anyhow::Result<()> {
// Test capturing reasoning_content from DeepSeek reasoning models
@ -2367,6 +2536,66 @@ data: [DONE]"#;
Ok(())
}
#[test]
fn test_response_to_message_extracts_inline_think_blocks() -> anyhow::Result<()> {
let response = json!({
"choices": [{
"role": "assistant",
"message": {
"content": "<think>internal reasoning</think>Visible answer"
}
}]
});
let message = response_to_message(&response)?;
assert_eq!(message.content.len(), 2);
if let MessageContent::Thinking(thinking) = &message.content[0] {
assert_eq!(thinking.thinking, "internal reasoning");
} else {
panic!("Expected Thinking content, got {:?}", message.content[0]);
}
if let MessageContent::Text(text) = &message.content[1] {
assert_eq!(text.text, "Visible answer");
} else {
panic!("Expected Text content");
}
Ok(())
}
#[test]
fn test_response_to_message_prefers_structured_reasoning_over_inline_think(
) -> anyhow::Result<()> {
let response = json!({
"choices": [{
"role": "assistant",
"message": {
"reasoning_content": "structured reasoning",
"content": "<think>inline reasoning</think>Visible answer"
}
}]
});
let message = response_to_message(&response)?;
assert_eq!(message.content.len(), 2);
if let MessageContent::Thinking(thinking) = &message.content[0] {
assert_eq!(thinking.thinking, "structured reasoning");
} else {
panic!("Expected Thinking content");
}
if let MessageContent::Text(text) = &message.content[1] {
assert_eq!(text.text, "Visible answer");
} else {
panic!("Expected Text content");
}
Ok(())
}
#[test]
fn test_format_messages_with_reasoning_content() -> anyhow::Result<()> {
// Test that reasoning_content is properly included in formatted messages