SurfSense/surfsense_backend/app/tasks/stream_connector_search_results.py
Utkarsh-Patel-13 92781e726c Updated Streaming Service to efficently stream content\
\
- Earlier for each chunk, whole message (with all annotations included)
  were streamed. Leading to extremely large data length.
- Fixed to only stream new chunk.
- Updated ANSWER part to be streamed as message content (following
  Vercel's Stream Protocol)\
- Fixed yield typo
2025-07-18 17:43:07 -07:00

90 lines
2.8 KiB
Python

from typing import Any, AsyncGenerator, List, Union
from uuid import UUID
from app.agents.researcher.graph import graph as researcher_graph
from app.agents.researcher.state import State
from app.services.streaming_service import StreamingService
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.researcher.configuration import SearchMode
async def stream_connector_search_results(
user_query: str,
user_id: Union[str, UUID],
search_space_id: int,
session: AsyncSession,
research_mode: str,
selected_connectors: List[str],
langchain_chat_history: List[Any],
search_mode_str: str,
document_ids_to_add_in_context: List[int]
) -> AsyncGenerator[str, None]:
"""
Stream connector search results to the client
Args:
user_query: The user's query
user_id: The user's ID (can be UUID object or string)
search_space_id: The search space ID
session: The database session
research_mode: The research mode
selected_connectors: List of selected connectors
Yields:
str: Formatted response strings
"""
streaming_service = StreamingService()
if research_mode == "REPORT_GENERAL":
NUM_SECTIONS = 1
elif research_mode == "REPORT_DEEP":
NUM_SECTIONS = 3
elif research_mode == "REPORT_DEEPER":
NUM_SECTIONS = 6
else:
# Default fallback
NUM_SECTIONS = 1
# Convert UUID to string if needed
user_id_str = str(user_id) if isinstance(user_id, UUID) else user_id
if search_mode_str == "CHUNKS":
search_mode = SearchMode.CHUNKS
elif search_mode_str == "DOCUMENTS":
search_mode = SearchMode.DOCUMENTS
# Sample configuration
config = {
"configurable": {
"user_query": user_query,
"num_sections": NUM_SECTIONS,
"connectors_to_search": selected_connectors,
"user_id": user_id_str,
"search_space_id": search_space_id,
"search_mode": search_mode,
"research_mode": research_mode,
"document_ids_to_add_in_context": document_ids_to_add_in_context
}
}
# Initialize state with database session and streaming service
initial_state = State(
db_session=session,
streaming_service=streaming_service,
chat_history=langchain_chat_history
)
# Run the graph directly
print("\nRunning the complete researcher workflow...")
# Use streaming with config parameter
async for chunk in researcher_graph.astream(
initial_state,
config=config,
stream_mode="custom",
):
if isinstance(chunk, dict):
if "yield_value" in chunk:
yield chunk["yield_value"]
yield streaming_service.format_completion()