mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-05-01 21:00:44 +00:00
192 lines
6.1 KiB
Python
192 lines
6.1 KiB
Python
"""Think tag parser for extracting reasoning content from responses."""
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Optional, Tuple, Iterator, Any
|
|
from enum import Enum
|
|
|
|
|
|
class ContentType(Enum):
|
|
"""Type of content chunk."""
|
|
|
|
TEXT = "text"
|
|
THINKING = "thinking"
|
|
|
|
|
|
@dataclass
|
|
class ContentChunk:
|
|
"""A chunk of parsed content."""
|
|
|
|
type: ContentType
|
|
content: str
|
|
|
|
|
|
class ThinkTagParser:
|
|
"""
|
|
Streaming parser for <think>...</think> tags.
|
|
|
|
Handles partial tags at chunk boundaries by buffering.
|
|
"""
|
|
|
|
OPEN_TAG = "<think>"
|
|
CLOSE_TAG = "</think>"
|
|
OPEN_TAG_LEN = 7
|
|
CLOSE_TAG_LEN = 8
|
|
|
|
def __init__(self):
|
|
self._buffer: str = ""
|
|
self._in_think_tag: bool = False
|
|
|
|
@property
|
|
def in_think_mode(self) -> bool:
|
|
"""Whether currently inside a think tag."""
|
|
return self._in_think_tag
|
|
|
|
def feed(self, content: str) -> Iterator[ContentChunk]:
|
|
"""
|
|
Feed content and yield parsed chunks.
|
|
|
|
Handles partial tags by buffering content near potential tag boundaries.
|
|
"""
|
|
self._buffer += content
|
|
|
|
while self._buffer:
|
|
if not self._in_think_tag:
|
|
chunk = self._parse_outside_think()
|
|
if chunk:
|
|
yield chunk
|
|
else:
|
|
break
|
|
else:
|
|
chunk = self._parse_inside_think()
|
|
if chunk:
|
|
yield chunk
|
|
else:
|
|
break
|
|
|
|
def _parse_outside_think(self) -> Optional[ContentChunk]:
|
|
"""Parse content outside think tags."""
|
|
think_start = self._buffer.find(self.OPEN_TAG)
|
|
|
|
if think_start == -1:
|
|
# No tag found - check for partial tag at end
|
|
# We buffer any trailing '<' and subsequent characters that could be part of <think>
|
|
last_bracket = self._buffer.rfind("<")
|
|
if (
|
|
last_bracket != -1
|
|
and len(self._buffer) - last_bracket < self.OPEN_TAG_LEN
|
|
):
|
|
# Check if the partial string could be the start of <think>
|
|
potential_tag = self._buffer[last_bracket:]
|
|
if self.OPEN_TAG.startswith(potential_tag):
|
|
emit = self._buffer[:last_bracket]
|
|
self._buffer = self._buffer[last_bracket:]
|
|
if emit:
|
|
return ContentChunk(ContentType.TEXT, emit)
|
|
return None
|
|
|
|
# No partial tag found or it's irrelevant
|
|
emit = self._buffer
|
|
self._buffer = ""
|
|
if emit:
|
|
return ContentChunk(ContentType.TEXT, emit)
|
|
return None
|
|
else:
|
|
# Found <think> tag
|
|
pre_think = self._buffer[:think_start]
|
|
self._buffer = self._buffer[think_start + self.OPEN_TAG_LEN :]
|
|
self._in_think_tag = True
|
|
if pre_think:
|
|
return ContentChunk(ContentType.TEXT, pre_think)
|
|
# Continue parsing inside think tag
|
|
return self._parse_inside_think()
|
|
|
|
def _parse_inside_think(self) -> Optional[ContentChunk]:
|
|
"""Parse content inside think tags."""
|
|
think_end = self._buffer.find(self.CLOSE_TAG)
|
|
|
|
if think_end == -1:
|
|
# No closing tag - check for partial at end
|
|
last_bracket = self._buffer.rfind("<")
|
|
if (
|
|
last_bracket != -1
|
|
and len(self._buffer) - last_bracket < self.CLOSE_TAG_LEN
|
|
):
|
|
# Check if the partial string could be the start of </think>
|
|
potential_tag = self._buffer[last_bracket:]
|
|
if self.CLOSE_TAG.startswith(potential_tag):
|
|
emit = self._buffer[:last_bracket]
|
|
self._buffer = self._buffer[last_bracket:]
|
|
if emit:
|
|
return ContentChunk(ContentType.THINKING, emit)
|
|
return None
|
|
|
|
emit = self._buffer
|
|
self._buffer = ""
|
|
if emit:
|
|
return ContentChunk(ContentType.THINKING, emit)
|
|
return None
|
|
else:
|
|
# Found </think> tag
|
|
thinking_content = self._buffer[:think_end]
|
|
self._buffer = self._buffer[think_end + self.CLOSE_TAG_LEN :]
|
|
self._in_think_tag = False
|
|
if thinking_content:
|
|
return ContentChunk(ContentType.THINKING, thinking_content)
|
|
# Continue parsing outside think tag
|
|
return self._parse_outside_think()
|
|
|
|
def flush(self) -> Optional[ContentChunk]:
|
|
"""Flush any remaining buffered content."""
|
|
if self._buffer:
|
|
chunk_type = (
|
|
ContentType.THINKING if self._in_think_tag else ContentType.TEXT
|
|
)
|
|
content = self._buffer
|
|
self._buffer = ""
|
|
return ContentChunk(chunk_type, content)
|
|
return None
|
|
|
|
def reset(self):
|
|
"""Reset parser state."""
|
|
self._buffer = ""
|
|
self._in_think_tag = False
|
|
|
|
|
|
def extract_think_content(text: str) -> Tuple[Optional[str], str]:
|
|
"""
|
|
Extract thinking content from text (non-streaming).
|
|
|
|
Returns: (thinking_content, remaining_text)
|
|
"""
|
|
think_pattern = re.compile(r"<think>(.*?)</think>", re.DOTALL)
|
|
matches = think_pattern.findall(text)
|
|
|
|
if matches:
|
|
thinking = "\n".join(matches)
|
|
remaining = think_pattern.sub("", text).strip()
|
|
return thinking, remaining
|
|
|
|
return None, text
|
|
|
|
|
|
def extract_reasoning_from_delta(delta: Any) -> Optional[str]:
|
|
"""
|
|
Extract reasoning content from an OpenAI delta object.
|
|
|
|
Checks both 'reasoning_content' and 'reasoning_details' fields.
|
|
"""
|
|
if isinstance(delta, dict):
|
|
reasoning = delta.get("reasoning_content")
|
|
if reasoning:
|
|
return reasoning
|
|
|
|
reasoning_details = delta.get("reasoning_details")
|
|
if reasoning_details and isinstance(reasoning_details, list):
|
|
return "".join(
|
|
item.get("text", "")
|
|
for item in reasoning_details
|
|
if isinstance(item, dict)
|
|
)
|
|
|
|
return None
|