feat(providers): strip chain-of-thought markers from custom provider output (#8635)

Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
2026-05-19 07:54:19 +00:00 · 2026-05-13 01:35:23 -07:00 · 2026-05-13 01:35:23 -07:00 · eeeee3ff10
commit eeeee3ff10
parent f59c45b7f8
2 changed files with 762 additions and 4 deletions
--- a/crates/goose/src/providers/base.rs
+++ b/crates/goose/src/providers/base.rs
@ -30,6 +30,276 @@ use std::pin::Pin;
 use std::sync::LazyLock;
 use std::sync::Mutex;

+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct FilterOut {
+    pub content: String,
+    pub thinking: String,
+}
+
+pub struct ThinkFilter {
+    buffer: String,
+    inside_think: bool,
+    think_depth: usize,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum ThinkTag {
+    Open,
+    Close,
+    // `<think/>` is XML-legal but carries no reasoning payload. Treat it as a
+    // no-op so we don't flip `inside_think` forever and swallow the rest of
+    // the stream into the thinking bucket.
+    SelfClosing,
+}
+
+enum BufferEvent {
+    Tag {
+        pos: usize,
+        end: usize,
+        kind: ThinkTag,
+    },
+    Partial(usize),
+}
+
+impl ThinkFilter {
+    pub fn new() -> Self {
+        Self {
+            buffer: String::new(),
+            inside_think: false,
+            think_depth: 0,
+        }
+    }
+
+    pub fn push(&mut self, chunk: &str) -> FilterOut {
+        self.buffer.push_str(chunk);
+        self.process_buffer()
+    }
+
+    pub fn finish(mut self) -> FilterOut {
+        let mut out = self.process_buffer();
+        if !self.buffer.is_empty() {
+            if self.inside_think {
+                out.thinking.push_str(&self.buffer);
+            } else {
+                out.content.push_str(&self.buffer);
+            }
+            self.buffer.clear();
+        }
+        out
+    }
+
+    fn process_buffer(&mut self) -> FilterOut {
+        let mut out = FilterOut::default();
+
+        loop {
+            match next_buffer_event(&self.buffer, self.inside_think) {
+                Some(BufferEvent::Tag { pos, end, kind }) => {
+                    if pos > 0 {
+                        let prefix = self.buffer.get(..pos).unwrap_or_default().to_string();
+                        if self.inside_think {
+                            out.thinking.push_str(&prefix);
+                        } else {
+                            out.content.push_str(&prefix);
+                        }
+                    }
+
+                    self.buffer.drain(..end);
+
+                    match kind {
+                        ThinkTag::Open => {
+                            self.think_depth += 1;
+                            self.inside_think = true;
+                        }
+                        ThinkTag::Close => {
+                            self.think_depth = self.think_depth.saturating_sub(1);
+                            self.inside_think = self.think_depth > 0;
+                        }
+                        ThinkTag::SelfClosing => {}
+                    }
+                }
+                Some(BufferEvent::Partial(pos)) => {
+                    if pos > 0 {
+                        let prefix = self.buffer.get(..pos).unwrap_or_default().to_string();
+                        if self.inside_think {
+                            out.thinking.push_str(&prefix);
+                        } else {
+                            out.content.push_str(&prefix);
+                        }
+                        self.buffer.drain(..pos);
+                    }
+                    break;
+                }
+                None => {
+                    if !self.buffer.is_empty() {
+                        if self.inside_think {
+                            out.thinking.push_str(&self.buffer);
+                        } else {
+                            out.content.push_str(&self.buffer);
+                        }
+                        self.buffer.clear();
+                    }
+                    break;
+                }
+            }
+        }
+
+        out
+    }
+}
+
+impl Default for ThinkFilter {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+pub fn split_think_blocks(text: &str) -> (String, String) {
+    let mut filter = ThinkFilter::new();
+    let mut out = filter.push(text);
+    let final_out = filter.finish();
+    out.content.push_str(&final_out.content);
+    out.thinking.push_str(&final_out.thinking);
+    (out.content, out.thinking)
+}
+
+fn next_buffer_event(buffer: &str, inside_think: bool) -> Option<BufferEvent> {
+    let mut search_from = 0;
+
+    while let Some(rel_pos) = buffer.get(search_from..).and_then(|rest| rest.find('<')) {
+        let pos = search_from + rel_pos;
+        let suffix = buffer.get(pos..).unwrap_or_default();
+
+        if let Some((kind, end)) = parse_think_tag(buffer, pos) {
+            if inside_think || matches!(kind, ThinkTag::Open | ThinkTag::SelfClosing) {
+                return Some(BufferEvent::Tag { pos, end, kind });
+            }
+        } else if !contains_unquoted_gt(suffix) && is_possible_partial_think_tag(suffix) {
+            return Some(BufferEvent::Partial(pos));
+        }
+
+        search_from = pos + 1;
+    }
+
+    None
+}
+
+fn parse_think_tag(buffer: &str, start: usize) -> Option<(ThinkTag, usize)> {
+    let bytes = buffer.as_bytes();
+    if bytes.get(start) != Some(&b'<') {
+        return None;
+    }
+
+    let mut idx = start + 1;
+    let is_close = if bytes.get(idx) == Some(&b'/') {
+        idx += 1;
+        true
+    } else {
+        false
+    };
+
+    let name_start = idx;
+    while bytes.get(idx).is_some_and(u8::is_ascii_alphabetic) {
+        idx += 1;
+    }
+
+    if idx == name_start {
+        return None;
+    }
+
+    let name = buffer.get(name_start..idx).unwrap_or_default();
+    let is_think = name.eq_ignore_ascii_case("think") || name.eq_ignore_ascii_case("thinking");
+    if !is_think {
+        return None;
+    }
+
+    if is_close {
+        while bytes.get(idx).is_some_and(u8::is_ascii_whitespace) {
+            idx += 1;
+        }
+        if bytes.get(idx) == Some(&b'>') {
+            return Some((ThinkTag::Close, idx + 1));
+        }
+        return None;
+    }
+
+    // Require a real tag boundary immediately after the name (>, /, or whitespace).
+    // Without this, `<thinking-mode>` or `<thinking123>` would be classified as a
+    // think tag and stripped from normal content.
+    let valid_open_boundary = match bytes.get(idx) {
+        Some(&b) => b == b'>' || b == b'/' || b.is_ascii_whitespace(),
+        None => false,
+    };
+    if !valid_open_boundary {
+        return None;
+    }
+
+    let mut quote: Option<u8> = None;
+    let mut last_non_ws: Option<u8> = None;
+    while let Some(&byte) = bytes.get(idx) {
+        match quote {
+            Some(quote_byte) => {
+                if byte == quote_byte {
+                    quote = None;
+                }
+            }
+            None if matches!(byte, b'"' | b'\'') => {
+                quote = Some(byte);
+                last_non_ws = Some(byte);
+            }
+            None if byte == b'>' => {
+                let kind = if last_non_ws == Some(b'/') {
+                    ThinkTag::SelfClosing
+                } else {
+                    ThinkTag::Open
+                };
+                return Some((kind, idx + 1));
+            }
+            None if !byte.is_ascii_whitespace() => {
+                last_non_ws = Some(byte);
+            }
+            None => {}
+        }
+        idx += 1;
+    }
+
+    None
+}
+
+fn is_possible_partial_think_tag(suffix: &str) -> bool {
+    if contains_unquoted_gt(suffix) {
+        return false;
+    }
+
+    // Allow a trailing `/` so a chunk boundary that lands between `<think` and
+    // `>` in a self-closing `<think/>` (or `<thinking/>`) is still recognised
+    // as a partial tag and buffered until the `>` arrives in the next chunk.
+    static OPEN_RE: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(r"(?is)^<(?:t(?:h(?:i(?:n(?:k(?:i(?:n(?:g)?)?)?)?)?)?)?)(?:\s.*|/)?$").unwrap()
+    });
+    static CLOSE_RE: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(r"(?is)^</(?:t(?:h(?:i(?:n(?:k(?:i(?:n(?:g)?)?)?)?)?)?)?)(?:\s*)?$").unwrap()
+    });
+
+    OPEN_RE.is_match(suffix) || CLOSE_RE.is_match(suffix)
+}
+
+fn contains_unquoted_gt(text: &str) -> bool {
+    let mut quote: Option<u8> = None;
+    for &byte in text.as_bytes() {
+        match quote {
+            Some(quote_byte) => {
+                if byte == quote_byte {
+                    quote = None;
+                }
+            }
+            None if matches!(byte, b'"' | b'\'') => quote = Some(byte),
+            None if byte == b'>' => return true,
+            None => {}
+        }
+    }
+    false
+}
+
 fn strip_xml_tags(text: &str) -> String {
    static BLOCK_RE: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r"(?s)<([a-zA-Z][a-zA-Z0-9_]*)[^>]*>.*?</[a-zA-Z][a-zA-Z0-9_]*>").unwrap()
@ -968,6 +1238,265 @@ mod tests {
        );
    }

+    #[test]
+    fn test_split_think_blocks_extracts_inline_reasoning() {
+        assert_eq!(
+            split_think_blocks("<think>x</think>y"),
+            ("y".to_string(), "x".to_string())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_is_case_insensitive() {
+        assert_eq!(
+            split_think_blocks("<THINK>x</think>y"),
+            ("y".to_string(), "x".to_string())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_handles_multiple_blocks() {
+        assert_eq!(
+            split_think_blocks("<think>a</think>b<think>c</think>d"),
+            ("bd".to_string(), "ac".to_string())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_without_tags() {
+        assert_eq!(
+            split_think_blocks("plain content"),
+            ("plain content".to_string(), String::new())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_handles_attributes() {
+        assert_eq!(
+            split_think_blocks(r#"<think class="x">a</think>b"#),
+            ("b".to_string(), "a".to_string())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_handles_quoted_gt_in_self_closing_attributes() {
+        for input in [
+            r#"<think data="a>b"/>Visible"#,
+            "<think data='a>b'/>Visible",
+        ] {
+            assert_eq!(
+                split_think_blocks(input),
+                ("Visible".to_string(), String::new()),
+                "mismatch for {input:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_split_think_blocks_handles_quoted_gt_in_open_attributes() {
+        assert_eq!(
+            split_think_blocks(r#"<think data="a>b">Hidden</think>Visible"#),
+            ("Visible".to_string(), "Hidden".to_string())
+        );
+    }
+
+    #[test]
+    fn test_split_think_blocks_handles_thinking_variant() {
+        assert_eq!(
+            split_think_blocks("<thinking>a</thinking>b"),
+            ("b".to_string(), "a".to_string())
+        );
+    }
+
+    #[test]
+    fn test_think_filter_streaming_across_partial_tags() {
+        let mut filter = ThinkFilter::new();
+        let mut out = FilterOut::default();
+
+        for chunk in ["<thi", "nk>x</thi", "nk>y"] {
+            let partial = filter.push(chunk);
+            out.content.push_str(&partial.content);
+            out.thinking.push_str(&partial.thinking);
+        }
+
+        let final_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert_eq!(out.content, "y");
+        assert_eq!(out.thinking, "x");
+    }
+
+    #[test]
+    fn test_think_filter_preserves_non_think_tags() {
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push("<table>");
+        let final_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert_eq!(out.content, "<table>");
+        assert!(out.thinking.is_empty());
+    }
+
+    #[test]
+    fn test_think_filter_finish_treats_unterminated_think_as_thinking() {
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push("<think>unfinished");
+        let final_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert!(out.content.is_empty());
+        assert_eq!(out.thinking, "unfinished");
+    }
+
+    #[test]
+    fn test_think_filter_preserves_tags_with_think_prefix() {
+        for input in [
+            "<thinking-mode>hello</thinking-mode>",
+            "<thinking123>payload</thinking123>",
+            "<thinker>note</thinker>",
+        ] {
+            let mut filter = ThinkFilter::new();
+            let mut out = filter.push(input);
+            let final_out = filter.finish();
+            out.content.push_str(&final_out.content);
+            out.thinking.push_str(&final_out.thinking);
+
+            assert_eq!(out.content, input, "content mismatch for {input:?}");
+            assert!(
+                out.thinking.is_empty(),
+                "unexpected thinking for {input:?}: {:?}",
+                out.thinking
+            );
+        }
+    }
+
+    #[test]
+    fn test_think_filter_accepts_think_with_attributes() {
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push("<think data-source=\"x\">hidden</think>visible");
+        let final_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert_eq!(out.content, "visible");
+        assert_eq!(out.thinking, "hidden");
+    }
+
+    #[test]
+    fn test_think_filter_treats_self_closing_as_noop() {
+        // `<think/>` carries no reasoning payload. It must not flip the filter
+        // into "inside_think" mode, and the tag itself must not leak into
+        // visible content.
+        for input in [
+            "before <think/> after",
+            "before <think /> after",
+            "before <thinking/> after",
+            "before <think data-source=\"x\"/> after",
+        ] {
+            let mut filter = ThinkFilter::new();
+            let mut out = filter.push(input);
+            let final_out = filter.finish();
+            out.content.push_str(&final_out.content);
+            out.thinking.push_str(&final_out.thinking);
+
+            assert_eq!(
+                out.content, "before  after",
+                "content mismatch for {input:?}"
+            );
+            assert!(
+                out.thinking.is_empty(),
+                "unexpected thinking for {input:?}: {:?}",
+                out.thinking
+            );
+        }
+    }
+
+    #[test]
+    fn test_think_filter_self_closing_does_not_swallow_following_content() {
+        // Regression: a self-closing `<think/>` used to be classified as an
+        // Open tag, which incremented think_depth and routed everything after
+        // it into the thinking bucket for the rest of the stream.
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push("<think/>visible chunk 1");
+        let final_out = filter.push("visible chunk 2");
+        let tail_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+        out.content.push_str(&tail_out.content);
+        out.thinking.push_str(&tail_out.thinking);
+
+        assert_eq!(out.content, "visible chunk 1visible chunk 2");
+        assert!(out.thinking.is_empty());
+    }
+
+    #[test]
+    fn test_think_filter_streaming_across_self_closing_boundary() {
+        // Regression: a chunk boundary between `<think` and `>` in a
+        // self-closing `<think/>` used to fall out of the partial-tag regex
+        // (which only allowed `<think<ws>...`), so the `<think/` prefix leaked
+        // into visible content before the `>` arrived.
+        for (a, b) in [
+            ("before <think/", "> after"),
+            ("before <thinking/", "> after"),
+            ("head <think ", "/> tail"),
+        ] {
+            let mut filter = ThinkFilter::new();
+            let mut out = filter.push(a);
+            let second = filter.push(b);
+            let final_out = filter.finish();
+            out.content.push_str(&second.content);
+            out.content.push_str(&final_out.content);
+            out.thinking.push_str(&second.thinking);
+            out.thinking.push_str(&final_out.thinking);
+
+            assert!(
+                !out.content.contains('<'),
+                "partial tag leaked into content for ({a:?}, {b:?}): {:?}",
+                out.content
+            );
+            assert!(
+                out.thinking.is_empty(),
+                "unexpected thinking for ({a:?}, {b:?}): {:?}",
+                out.thinking
+            );
+        }
+    }
+
+    #[test]
+    fn test_think_filter_streaming_across_quoted_attribute_boundary() {
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push(r#"<think data="a>b"#);
+        assert!(out.content.is_empty());
+        assert!(out.thinking.is_empty());
+
+        let second = filter.push(r#""/>Visible"#);
+        let final_out = filter.finish();
+        out.content.push_str(&second.content);
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&second.thinking);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert_eq!(out.content, "Visible");
+        assert!(out.thinking.is_empty());
+    }
+
+    #[test]
+    fn test_think_filter_self_closing_inside_open_block_closes_nothing() {
+        // `<think/>` inside an open `<think>` block is still a no-op: depth
+        // should stay at 1 until the real `</think>` arrives.
+        let mut filter = ThinkFilter::new();
+        let mut out = filter.push("before <think>hidden1 <think/> hidden2</think>visible");
+        let final_out = filter.finish();
+        out.content.push_str(&final_out.content);
+        out.thinking.push_str(&final_out.thinking);
+
+        assert_eq!(out.content, "before visible");
+        assert_eq!(out.thinking, "hidden1  hidden2");
+    }
+
    #[test]
    fn test_extract_short_title() {
        assert_eq!(extract_short_title("List files"), "List files");
--- a/crates/goose/src/providers/formats/openai.rs
+++ b/crates/goose/src/providers/formats/openai.rs
@ -1,7 +1,7 @@
 use crate::conversation::message::{Message, MessageContent, ProviderMetadata};
 use crate::mcp_utils::extract_text_from_resource;
 use crate::model::ModelConfig;
-use crate::providers::base::{ProviderUsage, Usage};
+use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usage};
 use crate::providers::errors::ProviderError;
 use crate::providers::utils::{
    convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
@ -518,9 +518,11 @@ pub fn response_to_message(response: &Value) -> anyhow::Result<Message> {
    let reasoning_value = original
        .get("reasoning_content")
        .or_else(|| original.get("reasoning"));
+    let mut has_structured_thinking = false;
    if let Some(reasoning_content) = reasoning_value {
        if let Some(reasoning_str) = reasoning_content.as_str() {
            if !reasoning_str.is_empty() {
+                has_structured_thinking = true;
                content.push(MessageContent::thinking(reasoning_str, ""));
            }
        }
@ -528,7 +530,15 @@ pub fn response_to_message(response: &Value) -> anyhow::Result<Message> {

    if let Some(text) = original.get("content") {
        if let Some(text_str) = text.as_str() {
-            content.push(MessageContent::text(text_str));
+            let (cleaned, inline_thinking) = split_think_blocks(text_str);
+
+            if !has_structured_thinking && !inline_thinking.is_empty() {
+                content.push(MessageContent::thinking(inline_thinking, ""));
+            }
+
+            if !cleaned.is_empty() {
+                content.push(MessageContent::text(cleaned));
+            }
        }
    }

@ -822,7 +832,13 @@ where

        let mut accumulated_reasoning: Vec<Value> = Vec::new();
        let mut accumulated_reasoning_content = String::new();
+        let mut think_filter = ThinkFilter::new();
+        let mut saw_structured_reasoning = false;
        let mut last_signature: Option<String> = None;
+        // Buffer inline <think>...</think> content until we know whether structured
+        // reasoning will arrive. Emitting it immediately and then receiving
+        // reasoning_content in a later chunk would produce duplicated reasoning.
+        let mut pending_inline_thinking = String::new();

        'outer: while let Some(response) = stream.next().await {
            let response_str = response?;
@ -846,6 +862,10 @@ where
                }
                if let Some(rc) = chunk.choices[0].delta.reasoning_text() {
                    accumulated_reasoning_content.push_str(rc);
+                    if !rc.is_empty() {
+                        saw_structured_reasoning = true;
+                        pending_inline_thinking.clear();
+                    }
                }
            }

@ -888,6 +908,10 @@ where
                                    }
                                    if let Some(rc) = tool_chunk.choices[0].delta.reasoning_text() {
                                        accumulated_reasoning_content.push_str(rc);
+                                        if !rc.is_empty() {
+                                            saw_structured_reasoning = true;
+                                            pending_inline_thinking.clear();
+                                        }
                                    }
                                    if let Some(delta_tool_calls) = &tool_chunk.choices[0].delta.tool_calls {
                                        for delta_call in delta_tool_calls {
@ -928,6 +952,37 @@ where
                    None
                };

+                let filtered = think_filter.push("");
+                let mut flush_thinking = String::new();
+                if !saw_structured_reasoning {
+                    flush_thinking.push_str(&pending_inline_thinking);
+                    flush_thinking.push_str(&filtered.thinking);
+                }
+                pending_inline_thinking.clear();
+                if !filtered.content.is_empty() || !flush_thinking.is_empty() {
+                    let mut filtered_contents = Vec::new();
+                    if !filtered.content.is_empty() {
+                        filtered_contents.push(MessageContent::text(filtered.content));
+                    }
+                    if !flush_thinking.is_empty() {
+                        filtered_contents.push(MessageContent::thinking(flush_thinking, ""));
+                    }
+
+                    if !filtered_contents.is_empty() {
+                        let mut msg = Message::new(
+                            Role::Assistant,
+                            chrono::Utc::now().timestamp(),
+                            filtered_contents,
+                        );
+
+                        if let Some(id) = chunk.id.clone() {
+                            msg = msg.with_id(id);
+                        }
+
+                        yield (Some(msg), None);
+                    }
+                }
+
                let mut contents = Vec::new();
                if !accumulated_reasoning_content.is_empty() {
                    contents.push(MessageContent::thinking(&accumulated_reasoning_content, ""));
@ -1009,8 +1064,14 @@ where
                }

                if let Some(text) = text_content {
-                    if !text.is_empty() {
-                        content.push(MessageContent::text(&text));
+                    let filtered = think_filter.push(&text);
+
+                    if !saw_structured_reasoning && !filtered.thinking.is_empty() {
+                        pending_inline_thinking.push_str(&filtered.thinking);
+                    }
+
+                    if !filtered.content.is_empty() {
+                        content.push(MessageContent::text(filtered.content));
                    }
                }

@ -1040,6 +1101,35 @@ where
                yield (None, usage)
            }
        }
+
+        let filtered = think_filter.finish();
+        let mut trailing_thinking = String::new();
+        if !saw_structured_reasoning {
+            trailing_thinking.push_str(&pending_inline_thinking);
+            trailing_thinking.push_str(&filtered.thinking);
+        }
+        pending_inline_thinking.clear();
+
+        if !filtered.content.is_empty() || !trailing_thinking.is_empty() {
+            let mut content = Vec::new();
+
+            if !filtered.content.is_empty() {
+                content.push(MessageContent::text(filtered.content));
+            }
+
+            if !trailing_thinking.is_empty() {
+                content.push(MessageContent::thinking(trailing_thinking, ""));
+            }
+
+            yield (
+                Some(Message::new(
+                    Role::Assistant,
+                    chrono::Utc::now().timestamp(),
+                    content,
+                )),
+                None,
+            )
+        }
    }
 }

@ -2329,6 +2419,85 @@ data: [DONE]"#;
        panic!("Expected tool call message with nested extra_content metadata");
    }

+    #[tokio::test]
+    async fn test_streaming_response_extracts_inline_think_blocks() -> anyhow::Result<()> {
+        let response_lines = concat!(
+            "data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"<thi\"},\"index\":0,\"finish_reason\":null}]}\n",
+            "data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"nk>x</thi\"},\"index\":0,\"finish_reason\":null}]}\n",
+            "data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"nk>y\"},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
+            "data: [DONE]\n"
+        );
+
+        let response_stream =
+            tokio_stream::iter(response_lines.lines().map(|line| Ok(line.to_string())));
+        let mut messages = std::pin::pin!(response_to_streaming_message(response_stream));
+
+        let mut text = String::new();
+        let mut thinking = String::new();
+
+        while let Some(result) = messages.next().await {
+            let (message, _) = result?;
+            if let Some(message) = message {
+                for item in message.content {
+                    match item {
+                        MessageContent::Text(text_content) => text.push_str(&text_content.text),
+                        MessageContent::Thinking(thinking_content) => {
+                            thinking.push_str(&thinking_content.thinking)
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+
+        assert_eq!(text, "y");
+        assert_eq!(thinking, "x");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_streaming_suppresses_inline_think_when_structured_reasoning_follows(
+    ) -> anyhow::Result<()> {
+        // Inline <think>...</think> arrives in an early content chunk, then
+        // reasoning_content arrives in a later chunk. The inline thinking
+        // should be discarded in favor of the structured reasoning so users
+        // do not get duplicated reasoning output.
+        let response_lines = concat!(
+            "data: {\"id\":\"chunk-1\",\"choices\":[{\"delta\":{\"content\":\"<think>inline reasoning</think>Hi\"},\"index\":0,\"finish_reason\":null}]}\n",
+            "data: {\"id\":\"chunk-2\",\"choices\":[{\"delta\":{\"reasoning_content\":\"structured reasoning\"},\"index\":0,\"finish_reason\":null}]}\n",
+            "data: {\"id\":\"chunk-3\",\"choices\":[{\"delta\":{\"content\":\" there\"},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
+            "data: [DONE]\n"
+        );
+
+        let response_stream =
+            tokio_stream::iter(response_lines.lines().map(|line| Ok(line.to_string())));
+        let mut messages = std::pin::pin!(response_to_streaming_message(response_stream));
+
+        let mut text = String::new();
+        let mut thinking = String::new();
+
+        while let Some(result) = messages.next().await {
+            let (message, _) = result?;
+            if let Some(message) = message {
+                for item in message.content {
+                    match item {
+                        MessageContent::Text(text_content) => text.push_str(&text_content.text),
+                        MessageContent::Thinking(thinking_content) => {
+                            thinking.push_str(&thinking_content.thinking)
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+
+        assert_eq!(text, "Hi there");
+        assert_eq!(thinking, "structured reasoning");
+
+        Ok(())
+    }
+
    #[test]
    fn test_response_to_message_with_reasoning_content() -> anyhow::Result<()> {
        // Test capturing reasoning_content from DeepSeek reasoning models
@ -2367,6 +2536,66 @@ data: [DONE]"#;
        Ok(())
    }

+    #[test]
+    fn test_response_to_message_extracts_inline_think_blocks() -> anyhow::Result<()> {
+        let response = json!({
+            "choices": [{
+                "role": "assistant",
+                "message": {
+                    "content": "<think>internal reasoning</think>Visible answer"
+                }
+            }]
+        });
+
+        let message = response_to_message(&response)?;
+        assert_eq!(message.content.len(), 2);
+
+        if let MessageContent::Thinking(thinking) = &message.content[0] {
+            assert_eq!(thinking.thinking, "internal reasoning");
+        } else {
+            panic!("Expected Thinking content, got {:?}", message.content[0]);
+        }
+
+        if let MessageContent::Text(text) = &message.content[1] {
+            assert_eq!(text.text, "Visible answer");
+        } else {
+            panic!("Expected Text content");
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_response_to_message_prefers_structured_reasoning_over_inline_think(
+    ) -> anyhow::Result<()> {
+        let response = json!({
+            "choices": [{
+                "role": "assistant",
+                "message": {
+                    "reasoning_content": "structured reasoning",
+                    "content": "<think>inline reasoning</think>Visible answer"
+                }
+            }]
+        });
+
+        let message = response_to_message(&response)?;
+        assert_eq!(message.content.len(), 2);
+
+        if let MessageContent::Thinking(thinking) = &message.content[0] {
+            assert_eq!(thinking.thinking, "structured reasoning");
+        } else {
+            panic!("Expected Thinking content");
+        }
+
+        if let MessageContent::Text(text) = &message.content[1] {
+            assert_eq!(text.text, "Visible answer");
+        } else {
+            panic!("Expected Text content");
+        }
+
+        Ok(())
+    }
+
    #[test]
    fn test_format_messages_with_reasoning_content() -> anyhow::Result<()> {
        // Test that reasoning_content is properly included in formatted messages