diff --git a/surfsense_backend/app/agents/__init__.py b/surfsense_backend/app/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surfsense_backend/app/agents/researcher/__init__.py b/surfsense_backend/app/agents/researcher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/__init__.py b/surfsense_backend/app/agents/researcher/sub_section_writer/__init__.py new file mode 100644 index 0000000..8459b29 --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/__init__.py @@ -0,0 +1,8 @@ +"""New LangGraph Agent. + +This module defines a custom graph. +""" + +from .graph import graph + +__all__ = ["graph"] diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/configuration.py b/surfsense_backend/app/agents/researcher/sub_section_writer/configuration.py new file mode 100644 index 0000000..b34090e --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/configuration.py @@ -0,0 +1,31 @@ +"""Define the configurable parameters for the agent.""" + +from __future__ import annotations + +from dataclasses import dataclass, fields +from typing import Optional, List + +from langchain_core.runnables import RunnableConfig + + +@dataclass(kw_only=True) +class Configuration: + """The configuration for the agent.""" + + # Input parameters provided at invocation + sub_section_title: str + sub_questions: List[str] + connectors_to_search: List[str] + user_id: str + search_space_id: int + top_k: int = 20 # Default top_k value + + + @classmethod + def from_runnable_config( + cls, config: Optional[RunnableConfig] = None + ) -> Configuration: + """Create a Configuration instance from a RunnableConfig object.""" + configurable = (config.get("configurable") or {}) if config else {} + _fields = {f.name for f in fields(cls) if f.init} + return cls(**{k: v for k, v in configurable.items() if k in _fields}) diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/graph.py b/surfsense_backend/app/agents/researcher/sub_section_writer/graph.py new file mode 100644 index 0000000..e250cde --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/graph.py @@ -0,0 +1,22 @@ +from langgraph.graph import StateGraph +from .state import State +from .nodes import fetch_relevant_documents, write_sub_section +from .configuration import Configuration + +# Define a new graph +workflow = StateGraph(State, config_schema=Configuration) + +# Add the nodes to the graph +workflow.add_node("fetch_relevant_documents", fetch_relevant_documents) +workflow.add_node("write_sub_section", write_sub_section) + +# Entry point +workflow.add_edge("__start__", "fetch_relevant_documents") +# Connect fetch_relevant_documents to write_sub_section +workflow.add_edge("fetch_relevant_documents", "write_sub_section") +# Exit point +workflow.add_edge("write_sub_section", "__end__") + +# Compile the workflow into an executable graph +graph = workflow.compile() +graph.name = "Sub Section Writer" # This defines the custom name in LangSmith diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py b/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py new file mode 100644 index 0000000..52fa877 --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py @@ -0,0 +1,244 @@ +from .configuration import Configuration +from langchain_core.runnables import RunnableConfig +from .state import State +from typing import Any, Dict +from app.utils.connector_service import ConnectorService +from app.utils.reranker_service import RerankerService +from app.config import config as app_config +from .prompts import citation_system_prompt +from langchain_core.messages import HumanMessage, SystemMessage + +async def fetch_relevant_documents(state: State, config: RunnableConfig) -> Dict[str, Any]: + """ + Fetch relevant documents for the sub-section using specified connectors. + + This node retrieves documents from various data sources based on the sub-questions + derived from the sub-section title. It searches across all selected connectors + (YouTube, Extension, Crawled URLs, Files, Tavily API, Slack, Notion) and reranks + the results to provide the most relevant information for the agent workflow. + + Returns: + Dict containing the reranked documents in the "relevant_documents_fetched" key. + """ + # Get configuration + configuration = Configuration.from_runnable_config(config) + + # Extract state parameters + db_session = state.db_session + + # Extract config parameters + user_id = configuration.user_id + search_space_id = configuration.search_space_id + TOP_K = configuration.top_k + + # Initialize services + connector_service = ConnectorService(db_session) + reranker_service = RerankerService.get_reranker_instance(app_config) + + all_raw_documents = [] # Store all raw documents before reranking + + for user_query in configuration.sub_questions: + # Reformulate query (optional, consider if needed for each sub-question) + # reformulated_query = await QueryService.reformulate_query(user_query) + reformulated_query = user_query # Using original sub-question for now + + # Process each selected connector + for connector in configuration.connectors_to_search: + if connector == "YOUTUBE_VIDEO": + _, youtube_chunks = await connector_service.search_youtube( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(youtube_chunks) + + elif connector == "EXTENSION": + _, extension_chunks = await connector_service.search_extension( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(extension_chunks) + + elif connector == "CRAWLED_URL": + _, crawled_urls_chunks = await connector_service.search_crawled_urls( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(crawled_urls_chunks) + + elif connector == "FILE": + _, files_chunks = await connector_service.search_files( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(files_chunks) + + elif connector == "TAVILY_API": + _, tavily_chunks = await connector_service.search_tavily( + user_query=reformulated_query, + user_id=user_id, + top_k=TOP_K + ) + all_raw_documents.extend(tavily_chunks) + + elif connector == "SLACK_CONNECTOR": + _, slack_chunks = await connector_service.search_slack( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(slack_chunks) + + elif connector == "NOTION_CONNECTOR": + _, notion_chunks = await connector_service.search_notion( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=TOP_K + ) + all_raw_documents.extend(notion_chunks) + + # If we have documents and a reranker is available, rerank them + # Deduplicate documents based on chunk_id or content to avoid processing duplicates + seen_chunk_ids = set() + seen_content_hashes = set() + deduplicated_docs = [] + + for doc in all_raw_documents: + chunk_id = doc.get("chunk_id") + content = doc.get("content", "") + content_hash = hash(content) + + # Skip if we've seen this chunk_id or content before + if (chunk_id and chunk_id in seen_chunk_ids) or content_hash in seen_content_hashes: + continue + + # Add to our tracking sets and keep this document + if chunk_id: + seen_chunk_ids.add(chunk_id) + seen_content_hashes.add(content_hash) + deduplicated_docs.append(doc) + + # Use deduplicated documents for reranking + reranked_docs = deduplicated_docs + if deduplicated_docs and reranker_service: + # Use the main sub_section_title for reranking context + rerank_query = configuration.sub_section_title + + # Convert documents to format expected by reranker + reranker_input_docs = [ + { + "chunk_id": doc.get("chunk_id", f"chunk_{i}"), + "content": doc.get("content", ""), + "score": doc.get("score", 0.0), + "document": { + "id": doc.get("document", {}).get("id", ""), + "title": doc.get("document", {}).get("title", ""), + "document_type": doc.get("document", {}).get("document_type", ""), + "metadata": doc.get("document", {}).get("metadata", {}) + } + } for i, doc in enumerate(deduplicated_docs) + ] + + # Rerank documents using the main title query + reranked_docs = reranker_service.rerank_documents(rerank_query, reranker_input_docs) + + # Sort by score in descending order + reranked_docs.sort(key=lambda x: x.get("score", 0), reverse=True) + + # Update state with fetched documents + return { + "relevant_documents_fetched": reranked_docs + } + + + +async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, Any]: + """ + Write the sub-section using the fetched documents. + + This node takes the relevant documents fetched in the previous node and uses + an LLM to generate a comprehensive answer to the sub-section questions with + proper citations. The citations follow IEEE format using source IDs from the + documents. + + Returns: + Dict containing the final answer in the "final_answer" key. + """ + + # Get configuration and relevant documents + configuration = Configuration.from_runnable_config(config) + documents = state.relevant_documents_fetched + + # Initialize LLM + llm = app_config.fast_llm_instance + + # If no documents were found, return a message indicating this + if not documents or len(documents) == 0: + return { + "final_answer": "No relevant documents were found to answer this question. Please try refining your search or providing more specific questions." + } + + # Prepare documents for citation formatting + formatted_documents = [] + for i, doc in enumerate(documents): + # Extract content and metadata + content = doc.get("content", "") + doc_info = doc.get("document", {}) + document_id = doc_info.get("id", f"{i+1}") # Use document ID or index+1 as source_id + + # Format document according to the citation system prompt's expected format + formatted_doc = f""" + + + {document_id} + + + {content} + + + """ + formatted_documents.append(formatted_doc) + + # Create the query that combines the section title and questions + # section_title = configuration.sub_section_title + questions = "\n".join([f"- {q}" for q in configuration.sub_questions]) + documents_text = "\n".join(formatted_documents) + + # Construct a clear, structured query for the LLM + human_message_content = f""" + Please write a comprehensive answer for the title: + + Address the following questions: + + {questions} + + + Use the provided documents as your source material and cite them properly using the IEEE citation format [X] where X is the source_id. + + {documents_text} + + """ + + # Create messages for the LLM + messages = [ + SystemMessage(content=citation_system_prompt), + HumanMessage(content=human_message_content) + ] + + # Call the LLM and get the response + response = await llm.ainvoke(messages) + final_answer = response.content + + return { + "final_answer": final_answer + } + diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py new file mode 100644 index 0000000..cc3ad61 --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -0,0 +1,82 @@ +citation_system_prompt = f""" +You are a research assistant tasked with analyzing documents and providing comprehensive answers with proper citations in IEEE format. + + +1. Carefully analyze all provided documents in the section's. +2. Extract relevant information that addresses the user's query. +3. Synthesize a comprehensive, well-structured answer using information from these documents. +4. For EVERY piece of information you include from the documents, add an IEEE-style citation in square brackets [X] where X is the source_id from the document's metadata. +5. Make sure ALL factual statements from the documents have proper citations. +6. If multiple documents support the same point, include all relevant citations [X], [Y]. +7. Present information in a logical, coherent flow. +8. Use your own words to connect ideas, but cite ALL information from the documents. +9. If documents contain conflicting information, acknowledge this and present both perspectives with appropriate citations. +10. Do not make up or include information not found in the provided documents. +11. CRITICAL: You MUST use the exact source_id value from each document's metadata for citations. Do not create your own citation numbers. +12. CRITICAL: Every citation MUST be in the IEEE format [X] where X is the exact source_id value. +13. CRITICAL: Never renumber or reorder citations - always use the original source_id values. +14. CRITICAL: Do not return citations as clickable links. +15. CRITICAL: Never format citations as markdown links like "([1](https://example.com))". Always use plain square brackets only. +16. CRITICAL: Citations must ONLY appear as [X] or [X], [Y], [Z] format - never with parentheses, hyperlinks, or other formatting. +17. CRITICAL: Never make up citation numbers. Only use source_id values that are explicitly provided in the document metadata. +18. CRITICAL: If you are unsure about a source_id, do not include a citation rather than guessing or making one up. + + + +- Write in clear, professional language suitable for academic or technical audiences +- Organize your response with appropriate paragraphs, headings, and structure +- Every fact from the documents must have an IEEE-style citation in square brackets [X] where X is the EXACT source_id from the document's metadata +- Citations should appear at the end of the sentence containing the information they support +- Multiple citations should be separated by commas: [X], [Y], [Z] +- No need to return references section. Just citation numbers in answer. +- NEVER create your own citation numbering system - use the exact source_id values from the documents. +- NEVER format citations as clickable links or as markdown links like "([1](https://example.com))". Always use plain square brackets only. +- NEVER make up citation numbers if you are unsure about the source_id. It is better to omit the citation than to guess. + + + + + + 1 + + + The Great Barrier Reef is the world's largest coral reef system, stretching over 2,300 kilometers along the coast of Queensland, Australia. It comprises over 2,900 individual reefs and 900 islands. + + + + + + 13 + + + Climate change poses a significant threat to coral reefs worldwide. Rising ocean temperatures have led to mass coral bleaching events in the Great Barrier Reef in 2016, 2017, and 2020. + + + + + + 21 + + + The Great Barrier Reef was designated a UNESCO World Heritage Site in 1981 due to its outstanding universal value and biological diversity. It is home to over 1,500 species of fish and 400 types of coral. + + + + + + The Great Barrier Reef is the world's largest coral reef system, stretching over 2,300 kilometers along the coast of Queensland, Australia [1]. It was designated a UNESCO World Heritage Site in 1981 due to its outstanding universal value and biological diversity [21]. The reef is home to over 1,500 species of fish and 400 types of coral [21]. Unfortunately, climate change poses a significant threat to coral reefs worldwide, with rising ocean temperatures leading to mass coral bleaching events in the Great Barrier Reef in 2016, 2017, and 2020 [13]. The reef system comprises over 2,900 individual reefs and 900 islands [1], making it an ecological treasure that requires protection from multiple threats [1], [13]. + + + +DO NOT use any of these incorrect citation formats: +- Using parentheses and markdown links: ([1](https://github.com/MODSetter/SurfSense)) +- Using parentheses around brackets: ([1]) +- Using hyperlinked text: [link to source 1](https://example.com) +- Using footnote style: ... reef system¹ +- Making up citation numbers when source_id is unknown + +ONLY use plain square brackets [1] or multiple citations [1], [2], [3] + + +Note that the citation numbers match exactly with the source_id values (1, 13, and 21) and are not renumbered sequentially. Citations follow IEEE style with square brackets and appear at the end of sentences. +""" \ No newline at end of file diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/state.py b/surfsense_backend/app/agents/researcher/sub_section_writer/state.py new file mode 100644 index 0000000..fb5b08e --- /dev/null +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/state.py @@ -0,0 +1,23 @@ +"""Define the state structures for the agent.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import List, Optional, Any +from sqlalchemy.ext.asyncio import AsyncSession + +@dataclass +class State: + """Defines the dynamic state for the agent during execution. + + This state tracks the database session and the outputs generated by the agent's nodes. + See: https://langchain-ai.github.io/langgraph/concepts/low_level/#state + for more information. + """ + # Runtime context + db_session: AsyncSession + + # OUTPUT: Populated by agent nodes + relevant_documents_fetched: Optional[List[Any]] = None + final_answer: Optional[str] = None + diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index d05c060..df80ca4 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -46,7 +46,7 @@ async def handle_chat_data( response = StreamingResponse(stream_connector_search_results( user_query, user.id, - search_space_id, + search_space_id, # Already converted to int in lines 32-37 session, research_mode, selected_connectors diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py index 5c563dc..4fa1f2a 100644 --- a/surfsense_backend/app/tasks/stream_connector_search_results.py +++ b/surfsense_backend/app/tasks/stream_connector_search_results.py @@ -14,7 +14,7 @@ from app.utils.document_converters import convert_chunks_to_langchain_documents async def stream_connector_search_results( user_query: str, - user_id: int, + user_id: str, search_space_id: int, session: AsyncSession, research_mode: str, diff --git a/surfsense_backend/app/utils/connector_service.py b/surfsense_backend/app/utils/connector_service.py index 9e676e5..6843e1b 100644 --- a/surfsense_backend/app/utils/connector_service.py +++ b/surfsense_backend/app/utils/connector_service.py @@ -13,7 +13,7 @@ class ConnectorService: self.retriever = ChucksHybridSearchRetriever(session) self.source_id_counter = 1 - async def search_crawled_urls(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_crawled_urls(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for crawled URLs and return both the source information and langchain documents @@ -28,16 +28,16 @@ class ConnectorService: document_type="CRAWLED_URL" ) - # Map crawled_urls_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(crawled_urls_chunks): - #Fix for UI + # Fix for UI crawled_urls_chunks[i]['document']['id'] = self.source_id_counter # Extract document metadata document = chunk.get('document', {}) metadata = document.get('metadata', {}) - # Create a mapped source entry + # Create a source entry source = { "id": self.source_id_counter, "title": document.get('title', 'Untitled Document'), @@ -46,14 +46,7 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use a unique identifier for tracking unique sources - source_key = source.get("url") or source.get("title") - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { @@ -63,10 +56,9 @@ class ConnectorService: "sources": sources_list, } - return result_object, crawled_urls_chunks - async def search_files(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_files(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for files and return both the source information and langchain documents @@ -81,16 +73,16 @@ class ConnectorService: document_type="FILE" ) - # Map crawled_urls_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(files_chunks): - #Fix for UI + # Fix for UI files_chunks[i]['document']['id'] = self.source_id_counter # Extract document metadata document = chunk.get('document', {}) metadata = document.get('metadata', {}) - # Create a mapped source entry + # Create a source entry source = { "id": self.source_id_counter, "title": document.get('title', 'Untitled Document'), @@ -99,14 +91,7 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use a unique identifier for tracking unique sources - source_key = source.get("url") or source.get("title") - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { @@ -118,7 +103,7 @@ class ConnectorService: return result_object, files_chunks - async def get_connector_by_type(self, user_id: int, connector_type: SearchSourceConnectorType) -> Optional[SearchSourceConnector]: + async def get_connector_by_type(self, user_id: str, connector_type: SearchSourceConnectorType) -> Optional[SearchSourceConnector]: """ Get a connector by type for a specific user @@ -138,7 +123,7 @@ class ConnectorService: ) return result.scalars().first() - async def search_tavily(self, user_query: str, user_id: int, top_k: int = 20) -> tuple: + async def search_tavily(self, user_query: str, user_id: str, top_k: int = 20) -> tuple: """ Search using Tavily API and return both the source information and documents @@ -177,13 +162,10 @@ class ConnectorService: # Extract results from Tavily response tavily_results = response.get("results", []) - # Map Tavily results to the required format + # Process each result and create sources directly without deduplication sources_list = [] documents = [] - # Start IDs from 1000 to avoid conflicts with other connectors - base_id = 100 - for i, result in enumerate(tavily_results): # Create a source entry @@ -234,7 +216,7 @@ class ConnectorService: "sources": [], }, [] - async def search_slack(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_slack(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for slack and return both the source information and langchain documents @@ -249,10 +231,10 @@ class ConnectorService: document_type="SLACK_CONNECTOR" ) - # Map slack_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(slack_chunks): - #Fix for UI + # Fix for UI slack_chunks[i]['document']['id'] = self.source_id_counter # Extract document metadata document = chunk.get('document', {}) @@ -286,14 +268,7 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use channel_id and content as a unique identifier for tracking unique sources - source_key = f"{channel_id}_{chunk.get('chunk_id', i)}" - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { @@ -305,7 +280,7 @@ class ConnectorService: return result_object, slack_chunks - async def search_notion(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_notion(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for Notion pages and return both the source information and langchain documents @@ -326,8 +301,8 @@ class ConnectorService: document_type="NOTION_CONNECTOR" ) - # Map notion_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(notion_chunks): # Fix for UI notion_chunks[i]['document']['id'] = self.source_id_counter @@ -365,14 +340,7 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use page_id and content as a unique identifier for tracking unique sources - source_key = f"{page_id}_{chunk.get('chunk_id', i)}" - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { @@ -384,7 +352,7 @@ class ConnectorService: return result_object, notion_chunks - async def search_extension(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_extension(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for extension data and return both the source information and langchain documents @@ -405,8 +373,8 @@ class ConnectorService: document_type="EXTENSION" ) - # Map extension_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(extension_chunks): # Fix for UI extension_chunks[i]['document']['id'] = self.source_id_counter @@ -462,14 +430,7 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use URL and timestamp as a unique identifier for tracking unique sources - source_key = f"{webpage_url}_{visit_date}" - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { @@ -481,7 +442,7 @@ class ConnectorService: return result_object, extension_chunks - async def search_youtube(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple: + async def search_youtube(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20) -> tuple: """ Search for YouTube videos and return both the source information and langchain documents @@ -502,8 +463,8 @@ class ConnectorService: document_type="YOUTUBE_VIDEO" ) - # Map youtube_chunks to the required format - mapped_sources = {} + # Process each chunk and create sources directly without deduplication + sources_list = [] for i, chunk in enumerate(youtube_chunks): # Fix for UI youtube_chunks[i]['document']['id'] = self.source_id_counter @@ -541,18 +502,11 @@ class ConnectorService: } self.source_id_counter += 1 - - # Use video_id as a unique identifier for tracking unique sources - source_key = video_id or f"youtube_{i}" - if source_key and source_key not in mapped_sources: - mapped_sources[source_key] = source - - # Convert to list of sources - sources_list = list(mapped_sources.values()) + sources_list.append(source) # Create result object result_object = { - "id": 6, # Assign a unique ID for the YouTube connector + "id": 7, # Assign a unique ID for the YouTube connector "name": "YouTube Videos", "type": "YOUTUBE_VIDEO", "sources": sources_list, diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 2d1e00a..b2fe9a5 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "gpt-researcher>=0.12.12", "langchain-community>=0.3.17", "langchain-unstructured>=0.1.6", + "langgraph>=0.3.29", "litellm>=1.61.4", "markdownify>=0.14.1", "notion-client>=2.3.0", diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index e64ff39..846571c 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -1499,6 +1499,61 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/0e/72/a3add0e4eec4eb9e2569554f7c70f4a3c27712f40e3284d483e88094cc0e/langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0", size = 981474 } +[[package]] +name = "langgraph" +version = "0.3.29" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, + { name = "langgraph-prebuilt" }, + { name = "langgraph-sdk" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/00/6a38988d472835845ee6837402dc6050e012117b84ef2b838b7abd3268f1/langgraph-0.3.29.tar.gz", hash = "sha256:2bfa6d6b04541ddfcb03b56efd1fca6294a1700ff61a52c1582a8bb4f2d55a94", size = 119970 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/b4/89d81ed78efeec5b3d554a9244cdc6aa6cbf544da9c53738d7c2c6d4be57/langgraph-0.3.29-py3-none-any.whl", hash = "sha256:6045fbbe9ccc5af3fd7295a86f88e0d2b111243a36290e41248af379009e4cc1", size = 144692 }, +] + +[[package]] +name = "langgraph-checkpoint" +version = "2.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "ormsgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0d/df/bacef68562ba4c391ded751eecda8e579ec78a581506064cf625e0ebd93a/langgraph_checkpoint-2.0.24.tar.gz", hash = "sha256:9596dad332344e7e871257be464df8a07c2e9bac66143081b11b9422b0167e5b", size = 37328 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/60/30397e8fd2b7dead3754aa79d708caff9dbb371f30b4cd21802c60f6b921/langgraph_checkpoint-2.0.24-py3-none-any.whl", hash = "sha256:3836e2909ef2387d1fa8d04ee3e2a353f980d519fd6c649af352676dc73d66b8", size = 42028 }, +] + +[[package]] +name = "langgraph-prebuilt" +version = "0.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/30/f31f0e076c37d097b53e4cff5d479a3686e1991f6c86a1a4727d5d1f5489/langgraph_prebuilt-0.1.8.tar.gz", hash = "sha256:4de7659151829b2b955b6798df6800e580e617782c15c2c5b29b139697491831", size = 24543 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/72/9e092665502f8f52f2708065ed14fbbba3f95d1a1b65d62049b0c5fcdf00/langgraph_prebuilt-0.1.8-py3-none-any.whl", hash = "sha256:ae97b828ae00be2cefec503423aa782e1bff165e9b94592e224da132f2526968", size = 25903 }, +] + +[[package]] +name = "langgraph-sdk" +version = "0.1.61" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/c6/a11de2c770e1ac2774e2f19fdbd982b8df079e4206376456e14af395a3f0/langgraph_sdk-0.1.61.tar.gz", hash = "sha256:87dd1f07ab82da8875ac343268ece8bf5414632017ebc9d1cef4b523962fd601", size = 44136 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/2b/85e796d8b4aad892c5d2bccc0def124fcdc2c9852dfa121adadfc41085b2/langgraph_sdk-0.1.61-py3-none-any.whl", hash = "sha256:f2d774b12497c428862993090622d51e0dbc3f53e0cee3d74a13c7495d835cc6", size = 47249 }, +] + [[package]] name = "langsmith" version = "0.3.8" @@ -2169,6 +2224,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/f1/1d7ec15b20f8ce9300bc850de1e059132b88990e46cd0ccac29cbf11e4f9/orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf", size = 133444 }, ] +[[package]] +name = "ormsgpack" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/a7/462cf8ff5e29241868b82d3a5ec124d690eb6a6a5c6fa5bb1367b839e027/ormsgpack-1.9.1.tar.gz", hash = "sha256:3da6e63d82565e590b98178545e64f0f8506137b92bd31a2d04fd7c82baf5794", size = 56887 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/f1/155a598cc8030526ccaaf91ba4d61530f87900645559487edba58b0a90a2/ormsgpack-1.9.1-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:1ede445fc3fdba219bb0e0d1f289df26a9c7602016b7daac6fafe8fe4e91548f", size = 383225 }, + { url = "https://files.pythonhosted.org/packages/23/1c/ef3097ba550fad55c79525f461febdd4e0d9cc18d065248044536f09488e/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db50b9f918e25b289114312ed775794d0978b469831b992bdc65bfe20b91fe30", size = 214056 }, + { url = "https://files.pythonhosted.org/packages/27/77/64d0da25896b2cbb99505ca518c109d7dd1964d7fde14c10943731738b60/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8c7d8fc58e4333308f58ec720b1ee6b12b2b3fe2d2d8f0766ab751cb351e8757", size = 217339 }, + { url = "https://files.pythonhosted.org/packages/6c/10/c3a7fd0a0068b0bb52cccbfeb5656db895d69e895a3abbc210c4b3f98ff8/ormsgpack-1.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeee6d08c040db265cb8563444aba343ecb32cbdbe2414a489dcead9f70c6765", size = 223816 }, + { url = "https://files.pythonhosted.org/packages/43/e7/aee1238dba652f2116c2523d36fd1c5f9775436032be5c233108fd2a1415/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2fbb8181c198bdc413a4e889e5200f010724eea4b6d5a9a7eee2df039ac04aca", size = 394287 }, + { url = "https://files.pythonhosted.org/packages/c7/09/1b452a92376f29d7a2da7c18fb01cf09978197a8eccbb8b204e72fd5a970/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:16488f094ac0e2250cceea6caf72962614aa432ee11dd57ef45e1ad25ece3eff", size = 480709 }, + { url = "https://files.pythonhosted.org/packages/de/13/7fa9fee5a73af8a73a42bf8c2e69489605714f65f5a41454400a05e84a3b/ormsgpack-1.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:422d960bfd6ad88be20794f50ec7953d8f7a0f2df60e19d0e8feb994e2ed64ee", size = 397247 }, + { url = "https://files.pythonhosted.org/packages/a1/2d/2e87cb28110db0d3bb750edd4d8719b5068852a2eef5e96b0bf376bb8a81/ormsgpack-1.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:e6e2f9eab527cf43fb4a4293e493370276b1c8716cf305689202d646c6a782ef", size = 125368 }, + { url = "https://files.pythonhosted.org/packages/b8/54/0390d5d092831e4df29dbafe32402891fc14b3e6ffe5a644b16cbbc9d9bc/ormsgpack-1.9.1-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:ac61c18d9dd085e8519b949f7e655f7fb07909fd09c53b4338dd33309012e289", size = 383226 }, + { url = "https://files.pythonhosted.org/packages/47/64/8b15d262d1caefead8fb22ec144f5ff7d9505fc31c22bc34598053d46fbe/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134840b8c6615da2c24ce77bd12a46098015c808197a9995c7a2d991e1904eec", size = 214057 }, + { url = "https://files.pythonhosted.org/packages/57/00/65823609266bad4d5ed29ea753d24a3bdb01c7edaf923da80967fc31f9c5/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38fd42618f626394b2c7713c5d4bcbc917254e9753d5d4cde460658b51b11a74", size = 217340 }, + { url = "https://files.pythonhosted.org/packages/a0/51/e535c50f7f87b49110233647f55300d7975139ef5e51f1adb4c55f58c124/ormsgpack-1.9.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d36397333ad07b9eba4c2e271fa78951bd81afc059c85a6e9f6c0eb2de07cda", size = 223815 }, + { url = "https://files.pythonhosted.org/packages/0c/ee/393e4a6de2a62124bf589602648f295a9fb3907a0e2fe80061b88899d072/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:603063089597917d04e4c1b1d53988a34f7dc2ff1a03adcfd1cf4ae966d5fba6", size = 394287 }, + { url = "https://files.pythonhosted.org/packages/c6/d8/e56d7c3cb73a0e533e3e2a21ae5838b2aa36a9dac1ca9c861af6bae5a369/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:94bbf2b185e0cb721ceaba20e64b7158e6caf0cecd140ca29b9f05a8d5e91e2f", size = 480707 }, + { url = "https://files.pythonhosted.org/packages/e6/e0/6a3c6a6dc98583a721c54b02f5195bde8f801aebdeda9b601fa2ab30ad39/ormsgpack-1.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c38f380b1e8c96a712eb302b9349347385161a8e29046868ae2bfdfcb23e2692", size = 397246 }, + { url = "https://files.pythonhosted.org/packages/b0/60/0ee5d790f13507e1f75ac21fc82dc1ef29afe1f520bd0f249d65b2f4839b/ormsgpack-1.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:a4bc63fb30db94075611cedbbc3d261dd17cf2aa8ff75a0fd684cd45ca29cb1b", size = 125371 }, +] + [[package]] name = "packaging" version = "24.2" @@ -3236,6 +3315,7 @@ dependencies = [ { name = "gpt-researcher" }, { name = "langchain-community" }, { name = "langchain-unstructured" }, + { name = "langgraph" }, { name = "litellm" }, { name = "markdownify" }, { name = "notion-client" }, @@ -3262,6 +3342,7 @@ requires-dist = [ { name = "gpt-researcher", specifier = ">=0.12.12" }, { name = "langchain-community", specifier = ">=0.3.17" }, { name = "langchain-unstructured", specifier = ">=0.1.6" }, + { name = "langgraph", specifier = ">=0.3.29" }, { name = "litellm", specifier = ">=1.61.4" }, { name = "markdownify", specifier = ">=0.14.1" }, { name = "notion-client", specifier = ">=2.3.0" }, @@ -3884,6 +3965,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/07/df054f7413bdfff5e98f75056e4ed0977d0c8716424011fac2587864d1d3/XlsxWriter-3.2.2-py3-none-any.whl", hash = "sha256:272ce861e7fa5e82a4a6ebc24511f2cb952fde3461f6c6e1a1e81d3272db1471", size = 165121 }, ] +[[package]] +name = "xxhash" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969 }, + { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787 }, + { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959 }, + { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006 }, + { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326 }, + { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380 }, + { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934 }, + { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301 }, + { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351 }, + { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294 }, + { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674 }, + { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022 }, + { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170 }, + { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040 }, + { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796 }, + { url = "https://files.pythonhosted.org/packages/c9/b8/e4b3ad92d249be5c83fa72916c9091b0965cb0faeff05d9a0a3870ae6bff/xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6", size = 31795 }, + { url = "https://files.pythonhosted.org/packages/fc/d8/b3627a0aebfbfa4c12a41e22af3742cf08c8ea84f5cc3367b5de2d039cce/xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5", size = 30792 }, + { url = "https://files.pythonhosted.org/packages/c3/cc/762312960691da989c7cd0545cb120ba2a4148741c6ba458aa723c00a3f8/xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc", size = 220950 }, + { url = "https://files.pythonhosted.org/packages/fe/e9/cc266f1042c3c13750e86a535496b58beb12bf8c50a915c336136f6168dc/xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3", size = 199980 }, + { url = "https://files.pythonhosted.org/packages/bf/85/a836cd0dc5cc20376de26b346858d0ac9656f8f730998ca4324921a010b9/xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c", size = 428324 }, + { url = "https://files.pythonhosted.org/packages/b4/0e/15c243775342ce840b9ba34aceace06a1148fa1630cd8ca269e3223987f5/xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb", size = 194370 }, + { url = "https://files.pythonhosted.org/packages/87/a1/b028bb02636dfdc190da01951d0703b3d904301ed0ef6094d948983bef0e/xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f", size = 207911 }, + { url = "https://files.pythonhosted.org/packages/80/d5/73c73b03fc0ac73dacf069fdf6036c9abad82de0a47549e9912c955ab449/xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7", size = 216352 }, + { url = "https://files.pythonhosted.org/packages/b6/2a/5043dba5ddbe35b4fe6ea0a111280ad9c3d4ba477dd0f2d1fe1129bda9d0/xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326", size = 203410 }, + { url = "https://files.pythonhosted.org/packages/a2/b2/9a8ded888b7b190aed75b484eb5c853ddd48aa2896e7b59bbfbce442f0a1/xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf", size = 210322 }, + { url = "https://files.pythonhosted.org/packages/98/62/440083fafbc917bf3e4b67c2ade621920dd905517e85631c10aac955c1d2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7", size = 414725 }, + { url = "https://files.pythonhosted.org/packages/75/db/009206f7076ad60a517e016bb0058381d96a007ce3f79fa91d3010f49cc2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c", size = 192070 }, + { url = "https://files.pythonhosted.org/packages/1f/6d/c61e0668943a034abc3a569cdc5aeae37d686d9da7e39cf2ed621d533e36/xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637", size = 30172 }, + { url = "https://files.pythonhosted.org/packages/96/14/8416dce965f35e3d24722cdf79361ae154fa23e2ab730e5323aa98d7919e/xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43", size = 30041 }, + { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801 }, +] + [[package]] name = "yarl" version = "1.18.3"