fix: strip <think> tags from chat responses

Add thinking content cleaning to notebook and source chat graphs.
Previously, models that output <think>...</think> tags (like DeepSeek)
or malformed variants without opening tags (like Nemotron) would leak
reasoning content into user-visible responses.

Changes:
- chat.py: Clean AI response content before returning messages
- source_chat.py: Same fix for source-specific chat
- text_utils.py: Handle malformed output where opening <think> tag
  is missing but </think> is present

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Justin Florentine 2025-12-18 16:31:23 -05:00
parent 65166d4d2a
commit 869664a10b
No known key found for this signature in database
3 changed files with 45 additions and 15 deletions

View file

@ -3,8 +3,10 @@ import sqlite3
from typing import Annotated, Dict, List, Optional
from ai_prompter import Prompter
from langchain_core.messages import SystemMessage
from langchain_core.messages import AIMessage, SystemMessage
from langchain_core.runnables import RunnableConfig
from open_notebook.utils import clean_thinking_content
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import END, START, StateGraph
from langgraph.graph.message import add_messages
@ -154,9 +156,14 @@ def call_model_with_source_context(
ai_message = model.invoke(payload)
# Clean thinking content from AI response (e.g., <think>...</think> tags)
content = ai_message.content if isinstance(ai_message.content, str) else str(ai_message.content)
cleaned_content = clean_thinking_content(content)
cleaned_message = AIMessage(content=cleaned_content)
# Update state with context information
return {
"messages": ai_message,
"messages": cleaned_message,
"source": source,
"insights": insights,
"context": formatted_context,