Fixed current agent citation issues and added sub_section_writer agent for upcoming SurfSense research agent

2025-09-01 18:19:08 +00:00 · 2025-04-13 20:47:23 -07:00 · 2025-04-13 20:47:23 -07:00 · 0b93c9dfef
commit 0b93c9dfef
parent fa5dbb786f
13 changed files with 565 additions and 81 deletions
--- a/surfsense_backend/app/agents/init.py
+++ b/surfsense_backend/app/agents/init.py
--- a/surfsense_backend/app/agents/researcher/init.py
+++ b/surfsense_backend/app/agents/researcher/init.py
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/init.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/init.py
@ -0,0 +1,8 @@
+"""New LangGraph Agent.
+
+This module defines a custom graph.
+"""
+
+from .graph import graph
+
+__all__ = ["graph"]
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/configuration.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/configuration.py
@ -0,0 +1,31 @@
+"""Define the configurable parameters for the agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, fields
+from typing import Optional, List
+
+from langchain_core.runnables import RunnableConfig
+
+
+@dataclass(kw_only=True)
+class Configuration:
+    """The configuration for the agent."""
+
+    # Input parameters provided at invocation
+    sub_section_title: str
+    sub_questions: List[str]
+    connectors_to_search: List[str]
+    user_id: str
+    search_space_id: int
+    top_k: int = 20  # Default top_k value
+
+
+    @classmethod
+    def from_runnable_config(
+        cls, config: Optional[RunnableConfig] = None
+    ) -> Configuration:
+        """Create a Configuration instance from a RunnableConfig object."""
+        configurable = (config.get("configurable") or {}) if config else {}
+        _fields = {f.name for f in fields(cls) if f.init}
+        return cls(**{k: v for k, v in configurable.items() if k in _fields})
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/graph.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/graph.py
@ -0,0 +1,22 @@
+from langgraph.graph import StateGraph
+from .state import State
+from .nodes import fetch_relevant_documents, write_sub_section
+from .configuration import Configuration
+
+# Define a new graph
+workflow = StateGraph(State, config_schema=Configuration)
+
+# Add the nodes to the graph
+workflow.add_node("fetch_relevant_documents", fetch_relevant_documents)
+workflow.add_node("write_sub_section", write_sub_section)
+
+# Entry point
+workflow.add_edge("__start__", "fetch_relevant_documents")
+# Connect fetch_relevant_documents to write_sub_section
+workflow.add_edge("fetch_relevant_documents", "write_sub_section")
+# Exit point
+workflow.add_edge("write_sub_section", "__end__")
+
+# Compile the workflow into an executable graph
+graph = workflow.compile()
+graph.name = "Sub Section Writer"  # This defines the custom name in LangSmith
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py
@ -0,0 +1,244 @@
+from .configuration import Configuration
+from langchain_core.runnables import RunnableConfig
+from .state import State
+from typing import Any, Dict
+from app.utils.connector_service import ConnectorService
+from app.utils.reranker_service import RerankerService
+from app.config import config as app_config
+from .prompts import citation_system_prompt
+from langchain_core.messages import HumanMessage, SystemMessage
+
+async def fetch_relevant_documents(state: State, config: RunnableConfig) -> Dict[str, Any]:
+    """
+    Fetch relevant documents for the sub-section using specified connectors.
+    
+    This node retrieves documents from various data sources based on the sub-questions
+    derived from the sub-section title. It searches across all selected connectors
+    (YouTube, Extension, Crawled URLs, Files, Tavily API, Slack, Notion) and reranks
+    the results to provide the most relevant information for the agent workflow.
+    
+    Returns:
+        Dict containing the reranked documents in the "relevant_documents_fetched" key.
+    """
+    # Get configuration
+    configuration = Configuration.from_runnable_config(config)
+    
+    # Extract state parameters
+    db_session = state.db_session
+    
+    # Extract config parameters
+    user_id = configuration.user_id
+    search_space_id = configuration.search_space_id
+    TOP_K = configuration.top_k
+    
+    # Initialize services
+    connector_service = ConnectorService(db_session)
+    reranker_service = RerankerService.get_reranker_instance(app_config)
+
+    all_raw_documents = []  # Store all raw documents before reranking
+    
+    for user_query in configuration.sub_questions:
+        # Reformulate query (optional, consider if needed for each sub-question)
+        # reformulated_query = await QueryService.reformulate_query(user_query)
+        reformulated_query = user_query # Using original sub-question for now
+        
+        # Process each selected connector
+        for connector in configuration.connectors_to_search:
+            if connector == "YOUTUBE_VIDEO":
+                _, youtube_chunks = await connector_service.search_youtube(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(youtube_chunks)
+                
+            elif connector == "EXTENSION":
+                _, extension_chunks = await connector_service.search_extension(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(extension_chunks)
+                
+            elif connector == "CRAWLED_URL":
+                _, crawled_urls_chunks = await connector_service.search_crawled_urls(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(crawled_urls_chunks)
+                
+            elif connector == "FILE":
+                _, files_chunks = await connector_service.search_files(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(files_chunks)
+                
+            elif connector == "TAVILY_API":
+                _, tavily_chunks = await connector_service.search_tavily(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(tavily_chunks)
+                
+            elif connector == "SLACK_CONNECTOR":
+                _, slack_chunks = await connector_service.search_slack(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(slack_chunks)
+                
+            elif connector == "NOTION_CONNECTOR":
+                _, notion_chunks = await connector_service.search_notion(
+                    user_query=reformulated_query,
+                    user_id=user_id,
+                    search_space_id=search_space_id,
+                    top_k=TOP_K
+                )
+                all_raw_documents.extend(notion_chunks)
+    
+    # If we have documents and a reranker is available, rerank them
+    # Deduplicate documents based on chunk_id or content to avoid processing duplicates
+    seen_chunk_ids = set()
+    seen_content_hashes = set()
+    deduplicated_docs = []
+    
+    for doc in all_raw_documents:
+        chunk_id = doc.get("chunk_id")
+        content = doc.get("content", "")
+        content_hash = hash(content)
+        
+        # Skip if we've seen this chunk_id or content before
+        if (chunk_id and chunk_id in seen_chunk_ids) or content_hash in seen_content_hashes:
+            continue
+            
+        # Add to our tracking sets and keep this document
+        if chunk_id:
+            seen_chunk_ids.add(chunk_id)
+        seen_content_hashes.add(content_hash)
+        deduplicated_docs.append(doc)
+    
+    # Use deduplicated documents for reranking
+    reranked_docs = deduplicated_docs
+    if deduplicated_docs and reranker_service:
+        # Use the main sub_section_title for reranking context
+        rerank_query = configuration.sub_section_title
+        
+        # Convert documents to format expected by reranker
+        reranker_input_docs = [
+            {
+                "chunk_id": doc.get("chunk_id", f"chunk_{i}"),
+                "content": doc.get("content", ""),
+                "score": doc.get("score", 0.0),
+                "document": {
+                    "id": doc.get("document", {}).get("id", ""),
+                    "title": doc.get("document", {}).get("title", ""),
+                    "document_type": doc.get("document", {}).get("document_type", ""),
+                    "metadata": doc.get("document", {}).get("metadata", {})
+                }
+            } for i, doc in enumerate(deduplicated_docs)
+        ]
+        
+        # Rerank documents using the main title query
+        reranked_docs = reranker_service.rerank_documents(rerank_query, reranker_input_docs)
+        
+        # Sort by score in descending order
+        reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True)
+    
+    # Update state with fetched documents
+    return {
+        "relevant_documents_fetched": reranked_docs
+    }
+
+
+
+async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, Any]:
+    """
+    Write the sub-section using the fetched documents.
+    
+    This node takes the relevant documents fetched in the previous node and uses
+    an LLM to generate a comprehensive answer to the sub-section questions with
+    proper citations. The citations follow IEEE format using source IDs from the
+    documents.
+    
+    Returns:
+        Dict containing the final answer in the "final_answer" key.
+    """
+    
+    # Get configuration and relevant documents
+    configuration = Configuration.from_runnable_config(config)
+    documents = state.relevant_documents_fetched
+    
+    # Initialize LLM
+    llm = app_config.fast_llm_instance
+    
+    # If no documents were found, return a message indicating this
+    if not documents or len(documents) == 0:
+        return {
+            "final_answer": "No relevant documents were found to answer this question. Please try refining your search or providing more specific questions."
+        }
+    
+    # Prepare documents for citation formatting
+    formatted_documents = []
+    for i, doc in enumerate(documents):
+        # Extract content and metadata
+        content = doc.get("content", "")
+        doc_info = doc.get("document", {})
+        document_id = doc_info.get("id", f"{i+1}")  # Use document ID or index+1 as source_id
+        
+        # Format document according to the citation system prompt's expected format
+        formatted_doc = f"""
+        <document>
+            <metadata>
+                <source_id>{document_id}</source_id>
+            </metadata>
+            <content>
+                {content}
+            </content>
+        </document>
+        """
+        formatted_documents.append(formatted_doc)
+    
+    # Create the query that combines the section title and questions
+    # section_title = configuration.sub_section_title
+    questions = "\n".join([f"- {q}" for q in configuration.sub_questions])
+    documents_text = "\n".join(formatted_documents)
+    
+    # Construct a clear, structured query for the LLM
+    human_message_content = f"""
+    Please write a comprehensive answer for the title: 
+    
+    Address the following questions:
+    <questions>
+        {questions}
+    </questions>
+
+    Use the provided documents as your source material and cite them properly using the IEEE citation format [X] where X is the source_id.
+    <documents>
+        {documents_text}
+    </documents>
+    """
+    
+    # Create messages for the LLM
+    messages = [
+        SystemMessage(content=citation_system_prompt),
+        HumanMessage(content=human_message_content)
+    ]
+    
+    # Call the LLM and get the response
+    response = await llm.ainvoke(messages)
+    final_answer = response.content
+    
+    return {
+        "final_answer": final_answer
+    }
+
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
@ -0,0 +1,82 @@
+citation_system_prompt = f"""
+You are a research assistant tasked with analyzing documents and providing comprehensive answers with proper citations in IEEE format.
+
+<instructions>
+1. Carefully analyze all provided documents in the <document> section's.
+2. Extract relevant information that addresses the user's query.
+3. Synthesize a comprehensive, well-structured answer using information from these documents.
+4. For EVERY piece of information you include from the documents, add an IEEE-style citation in square brackets [X] where X is the source_id from the document's metadata.
+5. Make sure ALL factual statements from the documents have proper citations.
+6. If multiple documents support the same point, include all relevant citations [X], [Y].
+7. Present information in a logical, coherent flow.
+8. Use your own words to connect ideas, but cite ALL information from the documents.
+9. If documents contain conflicting information, acknowledge this and present both perspectives with appropriate citations.
+10. Do not make up or include information not found in the provided documents.
+11. CRITICAL: You MUST use the exact source_id value from each document's metadata for citations. Do not create your own citation numbers.
+12. CRITICAL: Every citation MUST be in the IEEE format [X] where X is the exact source_id value.
+13. CRITICAL: Never renumber or reorder citations - always use the original source_id values.
+14. CRITICAL: Do not return citations as clickable links.
+15. CRITICAL: Never format citations as markdown links like "([1](https://example.com))". Always use plain square brackets only.
+16. CRITICAL: Citations must ONLY appear as [X] or [X], [Y], [Z] format - never with parentheses, hyperlinks, or other formatting.
+17. CRITICAL: Never make up citation numbers. Only use source_id values that are explicitly provided in the document metadata.
+18. CRITICAL: If you are unsure about a source_id, do not include a citation rather than guessing or making one up.
+</instructions>
+
+<format>
+- Write in clear, professional language suitable for academic or technical audiences
+- Organize your response with appropriate paragraphs, headings, and structure
+- Every fact from the documents must have an IEEE-style citation in square brackets [X] where X is the EXACT source_id from the document's metadata
+- Citations should appear at the end of the sentence containing the information they support
+- Multiple citations should be separated by commas: [X], [Y], [Z]
+- No need to return references section. Just citation numbers in answer.
+- NEVER create your own citation numbering system - use the exact source_id values from the documents.
+- NEVER format citations as clickable links or as markdown links like "([1](https://example.com))". Always use plain square brackets only.
+- NEVER make up citation numbers if you are unsure about the source_id. It is better to omit the citation than to guess.
+</format>
+
+<input_example>
+    <document>
+        <metadata>
+            <source_id>1</source_id>
+        </metadata>
+        <content>
+            The Great Barrier Reef is the world's largest coral reef system, stretching over 2,300 kilometers along the coast of Queensland, Australia. It comprises over 2,900 individual reefs and 900 islands.
+        </content>
+    </document>
+    
+    <document>
+        <metadata>
+            <source_id>13</source_id>
+        </metadata>
+        <content>
+            Climate change poses a significant threat to coral reefs worldwide. Rising ocean temperatures have led to mass coral bleaching events in the Great Barrier Reef in 2016, 2017, and 2020.
+        </content>
+    </document>
+    
+    <document>
+        <metadata>
+            <source_id>21</source_id>
+        </metadata>
+        <content>
+            The Great Barrier Reef was designated a UNESCO World Heritage Site in 1981 due to its outstanding universal value and biological diversity. It is home to over 1,500 species of fish and 400 types of coral.
+        </content>
+    </document>
+</input_example>
+
+<output_example>
+    The Great Barrier Reef is the world's largest coral reef system, stretching over 2,300 kilometers along the coast of Queensland, Australia [1]. It was designated a UNESCO World Heritage Site in 1981 due to its outstanding universal value and biological diversity [21]. The reef is home to over 1,500 species of fish and 400 types of coral [21]. Unfortunately, climate change poses a significant threat to coral reefs worldwide, with rising ocean temperatures leading to mass coral bleaching events in the Great Barrier Reef in 2016, 2017, and 2020 [13]. The reef system comprises over 2,900 individual reefs and 900 islands [1], making it an ecological treasure that requires protection from multiple threats [1], [13].
+</output_example>
+
+<incorrect_citation_formats>
+DO NOT use any of these incorrect citation formats:
+- Using parentheses and markdown links: ([1](https://github.com/MODSetter/SurfSense))
+- Using parentheses around brackets: ([1])
+- Using hyperlinked text: [link to source 1](https://example.com)
+- Using footnote style: ... reef system¹
+- Making up citation numbers when source_id is unknown
+
+ONLY use plain square brackets [1] or multiple citations [1], [2], [3]
+</incorrect_citation_formats>
+
+Note that the citation numbers match exactly with the source_id values (1, 13, and 21) and are not renumbered sequentially. Citations follow IEEE style with square brackets and appear at the end of sentences.
+"""
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/state.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/state.py
@ -0,0 +1,23 @@
+"""Define the state structures for the agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List, Optional, Any
+from sqlalchemy.ext.asyncio import AsyncSession
+
+@dataclass
+class State:
+    """Defines the dynamic state for the agent during execution.
+
+    This state tracks the database session and the outputs generated by the agent's nodes.
+    See: https://langchain-ai.github.io/langgraph/concepts/low_level/#state
+    for more information.
+    """
+    # Runtime context
+    db_session: AsyncSession
+    
+    # OUTPUT: Populated by agent nodes
+    relevant_documents_fetched: Optional[List[Any]] = None
+    final_answer: Optional[str] = None
+
--- a/surfsense_backend/app/routes/chats_routes.py
+++ b/surfsense_backend/app/routes/chats_routes.py
@ -46,7 +46,7 @@ async def handle_chat_data(
    response = StreamingResponse(stream_connector_search_results(
        user_query,
        user.id,
-        search_space_id,
+        int(search_space_id),
        session,
        research_mode,
        selected_connectors
--- a/surfsense_backend/app/tasks/stream_connector_search_results.py
+++ b/surfsense_backend/app/tasks/stream_connector_search_results.py
@ -14,7 +14,7 @@ from app.utils.document_converters import convert_chunks_to_langchain_documents

 async def stream_connector_search_results(
    user_query: str, 
-    user_id: int, 
+    user_id: str, 
    search_space_id: int, 
    session: AsyncSession, 
    research_mode: str, 
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
@ -13,7 +13,7 @@ class ConnectorService:
        self.retriever = ChucksHybridSearchRetriever(session)
        self.source_id_counter = 1
    
-    async def search_crawled_urls(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_crawled_urls(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for crawled URLs and return both the source information and langchain documents
        
@ -28,16 +28,16 @@ class ConnectorService:
            document_type="CRAWLED_URL"
        )

-        # Map crawled_urls_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(crawled_urls_chunks):
-            #Fix for UI
+            # Fix for UI
            crawled_urls_chunks[i]['document']['id'] = self.source_id_counter
            # Extract document metadata
            document = chunk.get('document', {})
            metadata = document.get('metadata', {})

-            # Create a mapped source entry
+            # Create a source entry
            source = {
                "id":  self.source_id_counter,
                "title": document.get('title', 'Untitled Document'),
@ -46,14 +46,7 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use a unique identifier for tracking unique sources
-            source_key = source.get("url") or source.get("title")
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
@ -63,10 +56,9 @@ class ConnectorService:
            "sources": sources_list,
        }
        
-
        return result_object, crawled_urls_chunks
    
-    async def search_files(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_files(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for files and return both the source information and langchain documents
        
@ -81,16 +73,16 @@ class ConnectorService:
            document_type="FILE"
        )

-        # Map crawled_urls_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(files_chunks):
-            #Fix for UI
+            # Fix for UI
            files_chunks[i]['document']['id'] = self.source_id_counter
            # Extract document metadata
            document = chunk.get('document', {})
            metadata = document.get('metadata', {})

-            # Create a mapped source entry
+            # Create a source entry
            source = {
                "id":  self.source_id_counter,
                "title": document.get('title', 'Untitled Document'),
@ -99,14 +91,7 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use a unique identifier for tracking unique sources
-            source_key = source.get("url") or source.get("title")
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
@ -118,7 +103,7 @@ class ConnectorService:
        
        return result_object, files_chunks
    
-    async def get_connector_by_type(self, user_id: int, connector_type: SearchSourceConnectorType) -> Optional[SearchSourceConnector]:
+    async def get_connector_by_type(self, user_id: str, connector_type: SearchSourceConnectorType) -> Optional[SearchSourceConnector]:
        """
        Get a connector by type for a specific user
        
@ -138,7 +123,7 @@ class ConnectorService:
        )
        return result.scalars().first()
    
-    async def search_tavily(self, user_query: str, user_id: int, top_k: int = 20) -> tuple:
+    async def search_tavily(self, user_query: str, user_id: str, top_k: int = 20) -> tuple:
        """
        Search using Tavily API and return both the source information and documents
        
@ -177,13 +162,10 @@ class ConnectorService:
            # Extract results from Tavily response
            tavily_results = response.get("results", [])
            
-            # Map Tavily results to the required format
+            # Process each result and create sources directly without deduplication
            sources_list = []
            documents = []
            
-            # Start IDs from 1000 to avoid conflicts with other connectors
-            base_id = 100
-            
            for i, result in enumerate(tavily_results):
                
                # Create a source entry
@ -234,7 +216,7 @@ class ConnectorService:
                "sources": [],
            }, []
    
-    async def search_slack(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_slack(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for slack and return both the source information and langchain documents
        
@ -249,10 +231,10 @@ class ConnectorService:
            document_type="SLACK_CONNECTOR"
        )

-        # Map slack_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(slack_chunks):
-            #Fix for UI
+            # Fix for UI
            slack_chunks[i]['document']['id'] = self.source_id_counter
            # Extract document metadata
            document = chunk.get('document', {})
@ -286,14 +268,7 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use channel_id and content as a unique identifier for tracking unique sources
-            source_key = f"{channel_id}_{chunk.get('chunk_id', i)}"
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
@ -305,7 +280,7 @@ class ConnectorService:
        
        return result_object, slack_chunks
        
-    async def search_notion(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_notion(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for Notion pages and return both the source information and langchain documents
        
@ -326,8 +301,8 @@ class ConnectorService:
            document_type="NOTION_CONNECTOR"
        )

-        # Map notion_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(notion_chunks):
            # Fix for UI
            notion_chunks[i]['document']['id'] = self.source_id_counter
@ -365,14 +340,7 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use page_id and content as a unique identifier for tracking unique sources
-            source_key = f"{page_id}_{chunk.get('chunk_id', i)}"
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
@ -384,7 +352,7 @@ class ConnectorService:
        
        return result_object, notion_chunks
    
-    async def search_extension(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_extension(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for extension data and return both the source information and langchain documents
        
@ -405,8 +373,8 @@ class ConnectorService:
            document_type="EXTENSION"
        )

-        # Map extension_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(extension_chunks):
            # Fix for UI
            extension_chunks[i]['document']['id'] = self.source_id_counter
@ -462,14 +430,7 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use URL and timestamp as a unique identifier for tracking unique sources
-            source_key = f"{webpage_url}_{visit_date}"
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
@ -481,7 +442,7 @@ class ConnectorService:
        
        return result_object, extension_chunks
    
-    async def search_youtube(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+    async def search_youtube(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple:
        """
        Search for YouTube videos and return both the source information and langchain documents
        
@ -502,8 +463,8 @@ class ConnectorService:
            document_type="YOUTUBE_VIDEO"
        )

-        # Map youtube_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
        for i, chunk in enumerate(youtube_chunks):
            # Fix for UI
            youtube_chunks[i]['document']['id'] = self.source_id_counter
@ -541,18 +502,11 @@ class ConnectorService:
            }

            self.source_id_counter += 1
-
-            # Use video_id as a unique identifier for tracking unique sources
-            source_key = video_id or f"youtube_{i}"
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
        
        # Create result object
        result_object = {
-            "id": 6,  # Assign a unique ID for the YouTube connector
+            "id": 7,  # Assign a unique ID for the YouTube connector
            "name": "YouTube Videos",
            "type": "YOUTUBE_VIDEO",
            "sources": sources_list,
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -13,6 +13,7 @@ dependencies = [
    "gpt-researcher>=0.12.12",
    "langchain-community>=0.3.17",
    "langchain-unstructured>=0.1.6",
+    "langgraph>=0.3.29",
    "litellm>=1.61.4",
    "markdownify>=0.14.1",
    "notion-client>=2.3.0",
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -1499,6 +1499,61 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/72/a3add0e4eec4eb9e2569554f7c70f4a3c27712f40e3284d483e88094cc0e/langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0", size = 981474 }

+[[package]]
+name = "langgraph"
+version = "0.3.29"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "langchain-core" },
+    { name = "langgraph-checkpoint" },
+    { name = "langgraph-prebuilt" },
+    { name = "langgraph-sdk" },
+    { name = "xxhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/26/00/6a38988d472835845ee6837402dc6050e012117b84ef2b838b7abd3268f1/langgraph-0.3.29.tar.gz", hash = "sha256:2bfa6d6b04541ddfcb03b56efd1fca6294a1700ff61a52c1582a8bb4f2d55a94", size = 119970 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/b4/89d81ed78efeec5b3d554a9244cdc6aa6cbf544da9c53738d7c2c6d4be57/langgraph-0.3.29-py3-none-any.whl", hash = "sha256:6045fbbe9ccc5af3fd7295a86f88e0d2b111243a36290e41248af379009e4cc1", size = 144692 },
+]
+
+[[package]]
+name = "langgraph-checkpoint"
+version = "2.0.24"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "langchain-core" },
+    { name = "ormsgpack" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0d/df/bacef68562ba4c391ded751eecda8e579ec78a581506064cf625e0ebd93a/langgraph_checkpoint-2.0.24.tar.gz", hash = "sha256:9596dad332344e7e871257be464df8a07c2e9bac66143081b11b9422b0167e5b", size = 37328 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/60/30397e8fd2b7dead3754aa79d708caff9dbb371f30b4cd21802c60f6b921/langgraph_checkpoint-2.0.24-py3-none-any.whl", hash = "sha256:3836e2909ef2387d1fa8d04ee3e2a353f980d519fd6c649af352676dc73d66b8", size = 42028 },
+]
+
+[[package]]
+name = "langgraph-prebuilt"
+version = "0.1.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "langchain-core" },
+    { name = "langgraph-checkpoint" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/30/f31f0e076c37d097b53e4cff5d479a3686e1991f6c86a1a4727d5d1f5489/langgraph_prebuilt-0.1.8.tar.gz", hash = "sha256:4de7659151829b2b955b6798df6800e580e617782c15c2c5b29b139697491831", size = 24543 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/72/9e092665502f8f52f2708065ed14fbbba3f95d1a1b65d62049b0c5fcdf00/langgraph_prebuilt-0.1.8-py3-none-any.whl", hash = "sha256:ae97b828ae00be2cefec503423aa782e1bff165e9b94592e224da132f2526968", size = 25903 },
+]
+
+[[package]]
+name = "langgraph-sdk"
+version = "0.1.61"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "orjson" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f0/c6/a11de2c770e1ac2774e2f19fdbd982b8df079e4206376456e14af395a3f0/langgraph_sdk-0.1.61.tar.gz", hash = "sha256:87dd1f07ab82da8875ac343268ece8bf5414632017ebc9d1cef4b523962fd601", size = 44136 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/2b/85e796d8b4aad892c5d2bccc0def124fcdc2c9852dfa121adadfc41085b2/langgraph_sdk-0.1.61-py3-none-any.whl", hash = "sha256:f2d774b12497c428862993090622d51e0dbc3f53e0cee3d74a13c7495d835cc6", size = 47249 },
+]
+
 [[package]]
 name = "langsmith"
 version = "0.3.8"
@ -2169,6 +2224,30 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/27/f1/1d7ec15b20f8ce9300bc850de1e059132b88990e46cd0ccac29cbf11e4f9/orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf", size = 133444 },
 ]

+[[package]]
+name = "ormsgpack"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/25/a7/462cf8ff5e29241868b82d3a5ec124d690eb6a6a5c6fa5bb1367b839e027/ormsgpack-1.9.1.tar.gz", hash = "sha256:3da6e63d82565e590b98178545e64f0f8506137b92bd31a2d04fd7c82baf5794", size = 56887 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/f1/155a598cc8030526ccaaf91ba4d61530f87900645559487edba58b0a90a2/ormsgpack-1.9.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:1ede445fc3fdba219bb0e0d1f289df26a9c7602016b7daac6fafe8fe4e91548f", size = 383225 },
+    { url = "https://files.pythonhosted.org/packages/23/1c/ef3097ba550fad55c79525f461febdd4e0d9cc18d065248044536f09488e/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db50b9f918e25b289114312ed775794d0978b469831b992bdc65bfe20b91fe30", size = 214056 },
+    { url = "https://files.pythonhosted.org/packages/27/77/64d0da25896b2cbb99505ca518c109d7dd1964d7fde14c10943731738b60/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8c7d8fc58e4333308f58ec720b1ee6b12b2b3fe2d2d8f0766ab751cb351e8757", size = 217339 },
+    { url = "https://files.pythonhosted.org/packages/6c/10/c3a7fd0a0068b0bb52cccbfeb5656db895d69e895a3abbc210c4b3f98ff8/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeee6d08c040db265cb8563444aba343ecb32cbdbe2414a489dcead9f70c6765", size = 223816 },
+    { url = "https://files.pythonhosted.org/packages/43/e7/aee1238dba652f2116c2523d36fd1c5f9775436032be5c233108fd2a1415/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2fbb8181c198bdc413a4e889e5200f010724eea4b6d5a9a7eee2df039ac04aca", size = 394287 },
+    { url = "https://files.pythonhosted.org/packages/c7/09/1b452a92376f29d7a2da7c18fb01cf09978197a8eccbb8b204e72fd5a970/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:16488f094ac0e2250cceea6caf72962614aa432ee11dd57ef45e1ad25ece3eff", size = 480709 },
+    { url = "https://files.pythonhosted.org/packages/de/13/7fa9fee5a73af8a73a42bf8c2e69489605714f65f5a41454400a05e84a3b/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:422d960bfd6ad88be20794f50ec7953d8f7a0f2df60e19d0e8feb994e2ed64ee", size = 397247 },
+    { url = "https://files.pythonhosted.org/packages/a1/2d/2e87cb28110db0d3bb750edd4d8719b5068852a2eef5e96b0bf376bb8a81/ormsgpack-1.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:e6e2f9eab527cf43fb4a4293e493370276b1c8716cf305689202d646c6a782ef", size = 125368 },
+    { url = "https://files.pythonhosted.org/packages/b8/54/0390d5d092831e4df29dbafe32402891fc14b3e6ffe5a644b16cbbc9d9bc/ormsgpack-1.9.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:ac61c18d9dd085e8519b949f7e655f7fb07909fd09c53b4338dd33309012e289", size = 383226 },
+    { url = "https://files.pythonhosted.org/packages/47/64/8b15d262d1caefead8fb22ec144f5ff7d9505fc31c22bc34598053d46fbe/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134840b8c6615da2c24ce77bd12a46098015c808197a9995c7a2d991e1904eec", size = 214057 },
+    { url = "https://files.pythonhosted.org/packages/57/00/65823609266bad4d5ed29ea753d24a3bdb01c7edaf923da80967fc31f9c5/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38fd42618f626394b2c7713c5d4bcbc917254e9753d5d4cde460658b51b11a74", size = 217340 },
+    { url = "https://files.pythonhosted.org/packages/a0/51/e535c50f7f87b49110233647f55300d7975139ef5e51f1adb4c55f58c124/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d36397333ad07b9eba4c2e271fa78951bd81afc059c85a6e9f6c0eb2de07cda", size = 223815 },
+    { url = "https://files.pythonhosted.org/packages/0c/ee/393e4a6de2a62124bf589602648f295a9fb3907a0e2fe80061b88899d072/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:603063089597917d04e4c1b1d53988a34f7dc2ff1a03adcfd1cf4ae966d5fba6", size = 394287 },
+    { url = "https://files.pythonhosted.org/packages/c6/d8/e56d7c3cb73a0e533e3e2a21ae5838b2aa36a9dac1ca9c861af6bae5a369/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:94bbf2b185e0cb721ceaba20e64b7158e6caf0cecd140ca29b9f05a8d5e91e2f", size = 480707 },
+    { url = "https://files.pythonhosted.org/packages/e6/e0/6a3c6a6dc98583a721c54b02f5195bde8f801aebdeda9b601fa2ab30ad39/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c38f380b1e8c96a712eb302b9349347385161a8e29046868ae2bfdfcb23e2692", size = 397246 },
+    { url = "https://files.pythonhosted.org/packages/b0/60/0ee5d790f13507e1f75ac21fc82dc1ef29afe1f520bd0f249d65b2f4839b/ormsgpack-1.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:a4bc63fb30db94075611cedbbc3d261dd17cf2aa8ff75a0fd684cd45ca29cb1b", size = 125371 },
+]
+
 [[package]]
 name = "packaging"
 version = "24.2"
@ -3236,6 +3315,7 @@ dependencies = [
    { name = "gpt-researcher" },
    { name = "langchain-community" },
    { name = "langchain-unstructured" },
+    { name = "langgraph" },
    { name = "litellm" },
    { name = "markdownify" },
    { name = "notion-client" },
@ -3262,6 +3342,7 @@ requires-dist = [
    { name = "gpt-researcher", specifier = ">=0.12.12" },
    { name = "langchain-community", specifier = ">=0.3.17" },
    { name = "langchain-unstructured", specifier = ">=0.1.6" },
+    { name = "langgraph", specifier = ">=0.3.29" },
    { name = "litellm", specifier = ">=1.61.4" },
    { name = "markdownify", specifier = ">=0.14.1" },
    { name = "notion-client", specifier = ">=2.3.0" },
@ -3884,6 +3965,44 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/9b/07/df054f7413bdfff5e98f75056e4ed0977d0c8716424011fac2587864d1d3/XlsxWriter-3.2.2-py3-none-any.whl", hash = "sha256:272ce861e7fa5e82a4a6ebc24511f2cb952fde3461f6c6e1a1e81d3272db1471", size = 165121 },
 ]

+[[package]]
+name = "xxhash"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969 },
+    { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787 },
+    { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959 },
+    { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006 },
+    { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326 },
+    { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380 },
+    { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934 },
+    { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301 },
+    { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351 },
+    { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294 },
+    { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674 },
+    { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022 },
+    { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170 },
+    { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040 },
+    { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796 },
+    { url = "https://files.pythonhosted.org/packages/c9/b8/e4b3ad92d249be5c83fa72916c9091b0965cb0faeff05d9a0a3870ae6bff/xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6", size = 31795 },
+    { url = "https://files.pythonhosted.org/packages/fc/d8/b3627a0aebfbfa4c12a41e22af3742cf08c8ea84f5cc3367b5de2d039cce/xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5", size = 30792 },
+    { url = "https://files.pythonhosted.org/packages/c3/cc/762312960691da989c7cd0545cb120ba2a4148741c6ba458aa723c00a3f8/xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc", size = 220950 },
+    { url = "https://files.pythonhosted.org/packages/fe/e9/cc266f1042c3c13750e86a535496b58beb12bf8c50a915c336136f6168dc/xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3", size = 199980 },
+    { url = "https://files.pythonhosted.org/packages/bf/85/a836cd0dc5cc20376de26b346858d0ac9656f8f730998ca4324921a010b9/xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c", size = 428324 },
+    { url = "https://files.pythonhosted.org/packages/b4/0e/15c243775342ce840b9ba34aceace06a1148fa1630cd8ca269e3223987f5/xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb", size = 194370 },
+    { url = "https://files.pythonhosted.org/packages/87/a1/b028bb02636dfdc190da01951d0703b3d904301ed0ef6094d948983bef0e/xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f", size = 207911 },
+    { url = "https://files.pythonhosted.org/packages/80/d5/73c73b03fc0ac73dacf069fdf6036c9abad82de0a47549e9912c955ab449/xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7", size = 216352 },
+    { url = "https://files.pythonhosted.org/packages/b6/2a/5043dba5ddbe35b4fe6ea0a111280ad9c3d4ba477dd0f2d1fe1129bda9d0/xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326", size = 203410 },
+    { url = "https://files.pythonhosted.org/packages/a2/b2/9a8ded888b7b190aed75b484eb5c853ddd48aa2896e7b59bbfbce442f0a1/xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf", size = 210322 },
+    { url = "https://files.pythonhosted.org/packages/98/62/440083fafbc917bf3e4b67c2ade621920dd905517e85631c10aac955c1d2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7", size = 414725 },
+    { url = "https://files.pythonhosted.org/packages/75/db/009206f7076ad60a517e016bb0058381d96a007ce3f79fa91d3010f49cc2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c", size = 192070 },
+    { url = "https://files.pythonhosted.org/packages/1f/6d/c61e0668943a034abc3a569cdc5aeae37d686d9da7e39cf2ed621d533e36/xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637", size = 30172 },
+    { url = "https://files.pythonhosted.org/packages/96/14/8416dce965f35e3d24722cdf79361ae154fa23e2ab730e5323aa98d7919e/xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43", size = 30041 },
+    { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801 },
+]
+
 [[package]]
 name = "yarl"
 version = "1.18.3"