From db20ff1aea315224dbcc7dfa7412dc0cc3bf1e94 Mon Sep 17 00:00:00 2001 From: MkDev11 <94194147+MkDev11@users.noreply.github.com> Date: Thu, 5 Feb 2026 05:38:36 -0800 Subject: [PATCH] feat: add RAG toolkit for knowledge base queries (#1003) Co-authored-by: mkdev11 Co-authored-by: Wendong-Fan Co-authored-by: Wendong-Fan <133094783+Wendong-Fan@users.noreply.github.com> --- CONTRIBUTING.md | 3 + README.md | 2 +- README_CN.md | 2 +- README_JA.md | 2 +- README_PT-BR.md | 2 +- backend/app/agent/tools.py | 2 + backend/app/utils/toolkit/rag_toolkit.py | 364 ++++++++++++++++ backend/pyproject.toml | 2 + .../app/utils/toolkit/test_rag_toolkit.py | 217 ++++++++++ backend/uv.lock | 400 ++++++++++++++++++ docs/core/models/gemini.md | 2 +- docs/core/models/kimi.md | 2 +- docs/core/models/minimax.md | 2 +- server/README_CN.md | 6 +- server/README_EN.md | 6 +- server/README_PT-BR.md | 6 +- 16 files changed, 1004 insertions(+), 16 deletions(-) create mode 100644 backend/app/utils/toolkit/rag_toolkit.py create mode 100644 backend/tests/app/utils/toolkit/test_rag_toolkit.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index aaf5c8c1..9680c01b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -92,12 +92,14 @@ our coding standards. #### Code Review Checklist - Functionality + - Correctness: Does the code perform the intended task? Are edge cases handled? - Testing: Is there sufficient test coverage? Do all tests pass? - Security: Are there any security vulnerabilities introduced by the change? - Performance: Does the code introduce any performance regressions? - Code Quality + - Readability: Is the code easy to read and understand? Is it well-commented where necessary? - Maintainability: Is the code structured in a way that makes future changes easy? - Style: Does the code follow the project’s style guidelines? @@ -105,6 +107,7 @@ our coding standards. - Documentation: Are public methods, classes, and any complex logic well-documented? - Design + - Consistency: Does the code follow established design patterns and project architecture? - Modularity: Are the changes modular and self-contained? Does the code avoid unnecessary duplication? - Dependencies: Are dependencies minimized and used appropriately? diff --git a/README.md b/README.md index 65b33bfe..f6dc9eab 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ Built on [CAMEL-AI][camel-site]'s acclaimed open-source project, our system intr - [📄 Open Source License](#-open-source-license) - [🌐 Community & contact](#-community--contact) -#### +####
diff --git a/README_CN.md b/README_CN.md index a8eb2659..664266bf 100644 --- a/README_CN.md +++ b/README_CN.md @@ -74,7 +74,7 @@ - [📄 开源许可证](#-%E5%BC%80%E6%BA%90%E8%AE%B8%E5%8F%AF%E8%AF%81) - [🌐 社区与联系](#-%E7%A4%BE%E5%8C%BA%E4%B8%8E%E8%81%94%E7%B3%BB) -#### +####
diff --git a/README_JA.md b/README_JA.md index 740cc4df..7642cb52 100644 --- a/README_JA.md +++ b/README_JA.md @@ -73,7 +73,7 @@ - [📄 オープンソースライセンス](#-%E3%82%AA%E3%83%BC%E3%83%97%E3%83%B3%E3%82%BD%E3%83%BC%E3%82%B9%E3%83%A9%E3%82%A4%E3%82%BB%E3%83%B3%E3%82%B9) - [🌐 コミュニティ & お問い合わせ](#-%E3%82%B3%E3%83%9F%E3%83%A5%E3%83%8B%E3%83%86%E3%82%A3--%E3%81%8A%E5%95%8F%E3%81%84%E5%90%88%E3%82%8F%E3%81%9B) -#### +####
diff --git a/README_PT-BR.md b/README_PT-BR.md index 326d527e..0b8654b7 100644 --- a/README_PT-BR.md +++ b/README_PT-BR.md @@ -74,7 +74,7 @@ Construído sobre o aclamado projeto open source da [CAMEL-AI][camel-site], noss - [📄 Licença Open Source](#-licen%C3%A7a-open-source) - [🌐 Comunidade & Contato](#-comunidade--contato) -#### +####
diff --git a/backend/app/agent/tools.py b/backend/app/agent/tools.py index 4c0f7a1a..2a80cc00 100644 --- a/backend/app/agent/tools.py +++ b/backend/app/agent/tools.py @@ -35,6 +35,7 @@ from app.utils.toolkit.mcp_search_toolkit import McpSearchToolkit from app.utils.toolkit.notion_mcp_toolkit import NotionMCPToolkit from app.utils.toolkit.openai_image_toolkit import OpenAIImageToolkit from app.utils.toolkit.pptx_toolkit import PPTXToolkit +from app.utils.toolkit.rag_toolkit import RAGToolkit from app.utils.toolkit.reddit_toolkit import RedditToolkit from app.utils.toolkit.search_toolkit import SearchToolkit from app.utils.toolkit.slack_toolkit import SlackToolkit @@ -67,6 +68,7 @@ async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str): "mcp_search_toolkit": McpSearchToolkit, "notion_mcp_toolkit": NotionMCPToolkit, "pptx_toolkit": PPTXToolkit, + "rag_toolkit": RAGToolkit, "reddit_toolkit": RedditToolkit, "search_toolkit": SearchToolkit, "slack_toolkit": SlackToolkit, diff --git a/backend/app/utils/toolkit/rag_toolkit.py b/backend/app/utils/toolkit/rag_toolkit.py new file mode 100644 index 00000000..deea5bf2 --- /dev/null +++ b/backend/app/utils/toolkit/rag_toolkit.py @@ -0,0 +1,364 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + +import hashlib +import logging +import os +from pathlib import Path + +from camel.embeddings import BaseEmbedding, OpenAIEmbedding +from camel.retrievers import AutoRetriever, VectorRetriever +from camel.storages import BaseVectorStorage, QdrantStorage +from camel.toolkits import RetrievalToolkit +from camel.toolkits.function_tool import FunctionTool +from camel.types import StorageType + +from app.component.environment import env +from app.service.task import Agents +from app.utils.toolkit.abstract_toolkit import AbstractToolkit + +logger = logging.getLogger("rag_toolkit") + +# Default paths and constants +DEFAULT_RAG_STORAGE_PATH = "~/.eigent/rag_storage" +DEFAULT_COLLECTION_NAME = "default" +RAW_TEXT_SUBDIR = "raw_text" +DEFAULT_STORAGE_TYPE = StorageType.QDRANT +DEFAULT_EMBEDDING_DIM = 1536 # OpenAI text-embedding-ada-002 dimension + + +class RAGToolkit(AbstractToolkit): + """Generic RAG toolkit wrapping CAMEL's RetrievalToolkit. + + This toolkit provides RAG functionality with configurable storage: + - Raw text document support via add_document + query_knowledge_base + - File/URL retrieval via information_retrieval + - Configurable collection_name and storage_path for flexibility + + Task isolation and other application-specific concerns should be handled + at the orchestration layer by passing appropriate collection_name and + storage_path values. + """ + + agent_name: str = Agents.task_agent + + def __init__( + self, + api_task_id: str, + agent_name: str | None = None, + collection_name: str | None = None, + storage_path: str | Path | None = None, + storage_type: StorageType | None = None, + embedding_model: BaseEmbedding | None = None, + vector_dim: int | None = None, + ): + """Initialize RAGToolkit with configurable storage. + + Args: + api_task_id (str): Task ID for eigent integration. + agent_name (str | None): Optional agent name override. + collection_name (str | None): Name for the vector collection. + storage_path (str | Path | None): Path for vector storage. + storage_type (StorageType | None): Vector storage type (default: QDRANT). + embedding_model (BaseEmbedding | None): Custom embedding model. + vector_dim (int | None): Embedding dimension (required if custom model). + """ + self.api_task_id = api_task_id + if agent_name is not None: + self.agent_name = agent_name + + # Use provided paths or defaults + self._storage_path = ( + Path(storage_path) + if storage_path + else Path(os.path.expanduser(DEFAULT_RAG_STORAGE_PATH)) + ) + self._storage_path.mkdir(parents=True, exist_ok=True) + + self._collection_name = collection_name or DEFAULT_COLLECTION_NAME + self._storage_type = storage_type or DEFAULT_STORAGE_TYPE + self._custom_embedding_model = embedding_model + self._vector_dim = vector_dim or DEFAULT_EMBEDDING_DIM + + # Initialize CAMEL's AutoRetriever with configured storage + auto_retriever = AutoRetriever( + vector_storage_local_path=str(self._storage_path), + storage_type=self._storage_type, + ) + + # Wrap CAMEL's RetrievalToolkit using composition (for file/URL retrieval) + self._retrieval_toolkit = RetrievalToolkit( + auto_retriever=auto_retriever + ) + + # Lazy-initialized components for raw text support + self._embedding_model = None + self._vector_retriever = None + self._storage = None + + def _get_embedding_model(self): + """Lazily initialize embedding model.""" + if self._embedding_model is None: + if self._custom_embedding_model is not None: + self._embedding_model = self._custom_embedding_model + else: + api_key = env("OPENAI_API_KEY") + if not api_key: + raise ValueError( + "OPENAI_API_KEY required (or provide embedding_model)" + ) + self._embedding_model = OpenAIEmbedding(api_key=api_key) + return self._embedding_model + + def _get_storage(self): + """Lazily initialize vector storage for raw text.""" + if self._storage is None: + self._storage = self._create_storage( + vector_dim=self._vector_dim, + path=str(self._storage_path / RAW_TEXT_SUBDIR), + collection_name=self._collection_name, + ) + return self._storage + + def _create_storage( + self, vector_dim: int, path: str, collection_name: str + ) -> BaseVectorStorage: + """Create vector storage based on configured storage type.""" + if self._storage_type == StorageType.QDRANT: + return QdrantStorage( + vector_dim=vector_dim, + path=path, + collection_name=collection_name, + ) + raise ValueError(f"Unsupported storage type: {self._storage_type}") + + def _get_vector_retriever(self) -> VectorRetriever: + """Lazily initialize vector retriever for raw text.""" + if self._vector_retriever is None: + self._vector_retriever = VectorRetriever( + embedding_model=self._get_embedding_model(), + storage=self._get_storage(), + ) + return self._vector_retriever + + def information_retrieval( + self, + query: str, + contents: str | list[str], + top_k: int = 5, + similarity_threshold: float = 0.5, + ) -> str: + """Retrieves information from a local vector storage based on the query. + + This method connects to a task-isolated vector storage and retrieves + relevant information. Content is automatically indexed on first use. + + Args: + query (str): The question or query for which an answer is required. + contents: Local file paths, remote URLs, or string contents to search. + top_k: Number of top results to return (default: 5). + similarity_threshold: Minimum similarity score for results (default: 0.5). + + Returns: + The information retrieved in response to the query. + + Example: + information_retrieval( + query="What are the main features?", + contents="/path/to/document.pdf" + ) + """ + try: + result = self._retrieval_toolkit.information_retrieval( + query=query, + contents=contents, + top_k=top_k, + similarity_threshold=similarity_threshold, + ) + logger.info( + f"Retrieved information for query in collection {self._collection_name}" + ) + return result + except Exception as e: + logger.error(f"Failed to retrieve information: {e}", exc_info=True) + return f"Error retrieving information: {str(e)}" + + def add_document( + self, + content: str, + metadata: dict | None = None, + doc_id: str | None = None, + ) -> str: + """Add a raw text document to the knowledge base. + + This method allows adding text content directly without requiring a file. + Useful for adding API responses, conversation snippets, or any text data. + + Args: + content: The text content to add to the knowledge base. + metadata: Optional metadata to associate with the document + (e.g., source, title, date). + doc_id: Optional unique identifier for the document. + If not provided, a hash of the content will be used. + + Returns: + A confirmation message with the document ID. + + Example: + add_document( + content="Python is a programming language.", + metadata={"source": "wiki"}, + doc_id="doc-001" + ) + """ + try: + if not content or not content.strip(): + return "Error: Cannot add empty document" + + # Generate document ID if not provided + if doc_id is None: + doc_id = hashlib.md5( # noqa: S324 + content.encode(), usedforsecurity=False + ).hexdigest()[:12] + + # Prepare metadata + doc_metadata = metadata or {} + doc_metadata["doc_id"] = doc_id + doc_metadata["collection"] = self._collection_name + + # Get vector retriever and add content + retriever = self._get_vector_retriever() + retriever.process(content=content, extra_info=doc_metadata) + + logger.info( + f"Added document {doc_id} to collection {self._collection_name}" + ) + return ( + f"Successfully added document (ID: {doc_id}) to knowledge base" + ) + + except Exception as e: + logger.error(f"Failed to add document: {e}", exc_info=True) + return f"Error adding document: {str(e)}" + + def query_knowledge_base( + self, + query: str, + top_k: int = 5, + similarity_threshold: float = 0.5, + ) -> str: + """Query the knowledge base for relevant information from added documents. + + This queries documents previously added via add_document(). + For querying files/URLs, use information_retrieval() instead. + + Args: + query (str): The question or search query to find relevant documents. + top_k (int): Maximum number of relevant chunks to return (default: 5). + similarity_threshold (float): Minimum similarity score (default: 0.5). + + Returns: + Retrieved relevant text chunks from the knowledge base, + or a message if no relevant information is found. + + Example: + query_knowledge_base(query="What is Python?", top_k=3) + """ + try: + if not query or not query.strip(): + return "Error: Query cannot be empty" + + retriever = self._get_vector_retriever() + results = retriever.query( + query=query, + top_k=top_k, + similarity_threshold=similarity_threshold, + ) + + # Format results as a simple numbered list + formatted_results = [] + for i, result in enumerate(results, 1): + text = result.get("text", result.get("content", "")) + metadata = result.get("metadata", {}) + + result_text = f"{i}. {text}" + if metadata: + source = metadata.get("source", metadata.get("doc_id", "")) + if source: + result_text += f" (Source: {source})" + formatted_results.append(result_text) + + if not formatted_results: + return f"No relevant information found for query: {query}" + + logger.info( + f"Retrieved {len(results)} results for query in collection {self._collection_name}" + ) + return "\n\n".join(formatted_results) + + except Exception as e: + logger.error(f"Failed to query knowledge base: {e}", exc_info=True) + return f"Error querying knowledge base: {str(e)}" + + def list_knowledge_bases(self) -> str: + """List all available knowledge bases. + + Returns: + A list of available knowledge base collection names. + """ + try: + collections = [] + if self._storage_path.exists(): + for item in self._storage_path.iterdir(): + if item.is_dir(): + collections.append(item.name) + + if not collections: + return "No knowledge bases found. Use add_document or information_retrieval to create one." + + return "Available knowledge bases:\n" + "\n".join( + f"- {c}" for c in sorted(collections) + ) + + except Exception as e: + logger.error(f"Failed to list knowledge bases: {e}", exc_info=True) + return f"Error listing knowledge bases: {str(e)}" + + def get_tools(self) -> list[FunctionTool]: + """Return the list of tools provided by this toolkit. + + Note: list_knowledge_bases is not exposed as a tool since with task + isolation, each task has its own collection and listing all KBs + is not useful for the agent. + """ + return [ + FunctionTool(self.add_document), + FunctionTool(self.query_knowledge_base), + FunctionTool(self.information_retrieval), + ] + + @classmethod + def get_can_use_tools(cls, api_task_id: str) -> list[FunctionTool]: + """Return tools that can be used based on available configuration. + + Args: + api_task_id (str): Task ID for eigent integration. + """ + # Auto-derive collection name for task isolation + collection_name = f"task_{api_task_id}" + toolkit = RAGToolkit( + api_task_id=api_task_id, + collection_name=collection_name, + ) + return toolkit.get_tools() diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 0b3f9d35..71cb6ed3 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -20,6 +20,8 @@ dependencies = [ "nodejs-wheel>=22.18.0", "numpy>=1.23.0,<2.0.0", "debugpy>=1.8.17", + "qdrant-client>=1.16.2", + "unstructured>=0.18.27", "opentelemetry-api>=1.34.1", "opentelemetry-sdk>=1.34.1", "opentelemetry-exporter-otlp-proto-http>=1.34.1", diff --git a/backend/tests/app/utils/toolkit/test_rag_toolkit.py b/backend/tests/app/utils/toolkit/test_rag_toolkit.py new file mode 100644 index 00000000..6ab8f37a --- /dev/null +++ b/backend/tests/app/utils/toolkit/test_rag_toolkit.py @@ -0,0 +1,217 @@ +import shutil +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from app.utils.toolkit.rag_toolkit import RAGToolkit + + +@pytest.fixture +def temp_storage_path(): + """Create a temporary storage path for tests.""" + temp_dir = tempfile.mkdtemp() + yield Path(temp_dir) + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def toolkit(temp_storage_path): + """Create a RAGToolkit instance with mocked AutoRetriever.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task-123", + storage_path=temp_storage_path, + ) + return toolkit + + +def test_toolkit_initialization(temp_storage_path): + """Test RAGToolkit initializes correctly.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever") as mock_ar: + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task-456", + collection_name="test_collection", + storage_path=temp_storage_path, + ) + + assert toolkit.api_task_id == "test-task-456" + assert toolkit._storage_path == temp_storage_path + assert toolkit._collection_name == "test_collection" + assert temp_storage_path.exists() + mock_ar.assert_called_once() + call_kwargs = mock_ar.call_args[1] + assert ( + str(temp_storage_path) + in call_kwargs["vector_storage_local_path"] + ) + + +def test_toolkit_initialization_with_custom_agent(temp_storage_path): + """Test RAGToolkit with custom agent name.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + agent_name="custom_agent", + storage_path=temp_storage_path, + ) + + assert toolkit.agent_name == "custom_agent" + + +def test_list_knowledge_bases_empty(temp_storage_path): + """Test list_knowledge_bases when no KBs exist.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + + result = toolkit.list_knowledge_bases() + assert "No knowledge bases found" in result + + +def test_list_knowledge_bases_with_tasks(temp_storage_path): + """Test list_knowledge_bases when task directories exist.""" + (temp_storage_path / "task_123").mkdir() + (temp_storage_path / "task_456").mkdir() + + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + + result = toolkit.list_knowledge_bases() + assert "task_123" in result + assert "task_456" in result + + +def test_get_tools_returns_three_tools(toolkit): + """Test get_tools returns RAG tools.""" + tools = toolkit.get_tools() + + assert len(tools) == 3 + tool_names = [t.func.__name__ for t in tools] + assert "add_document" in tool_names + assert "query_knowledge_base" in tool_names + assert "information_retrieval" in tool_names + + +def test_get_can_use_tools_returns_tools(temp_storage_path): + """Test get_can_use_tools returns tools.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.object(RAGToolkit, "get_tools") as mock_get_tools: + mock_get_tools.return_value = [Mock(), Mock()] + tools = RAGToolkit.get_can_use_tools("test-task") + assert len(tools) == 2 + + +def test_get_can_use_tools_auto_derives_collection_name(temp_storage_path): + """Test get_can_use_tools auto-derives collection_name from api_task_id.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.object( + RAGToolkit, "__init__", return_value=None + ) as mock_init: + with patch.object(RAGToolkit, "get_tools", return_value=[]): + RAGToolkit.get_can_use_tools("test-task-123") + mock_init.assert_called_once_with( + api_task_id="test-task-123", + collection_name="task_test-task-123", + ) + + +def test_default_collection_name(temp_storage_path): + """Test default collection_name when not provided.""" + with patch("app.utils.toolkit.rag_toolkit.AutoRetriever"): + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + assert toolkit._collection_name == "default" + + +@patch("app.utils.toolkit.rag_toolkit.AutoRetriever") +def test_information_retrieval_success( + mock_auto_retriever_class, temp_storage_path +): + """Test successful information retrieval.""" + mock_auto_retriever = MagicMock() + mock_auto_retriever.run_vector_retriever.return_value = { + "text": ["Relevant content about the query"] + } + mock_auto_retriever_class.return_value = mock_auto_retriever + + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + + result = toolkit.information_retrieval( + query="What is the content?", + contents="/path/to/document.pdf", + top_k=5, + ) + + assert isinstance(result, str) + mock_auto_retriever.run_vector_retriever.assert_called_once() + + +@patch("app.utils.toolkit.rag_toolkit.AutoRetriever") +def test_information_retrieval_with_error( + mock_auto_retriever_class, temp_storage_path +): + """Test information retrieval handles errors gracefully.""" + mock_auto_retriever = MagicMock() + mock_auto_retriever.run_vector_retriever.side_effect = Exception( + "Test error" + ) + mock_auto_retriever_class.return_value = mock_auto_retriever + + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + + result = toolkit.information_retrieval( + query="What is the content?", + contents="/path/to/document.pdf", + ) + + assert "Error" in result + assert "Test error" in result + + +@patch("app.utils.toolkit.rag_toolkit.AutoRetriever") +def test_information_retrieval_with_list_contents( + mock_auto_retriever_class, temp_storage_path +): + """Test information retrieval with multiple content sources.""" + mock_auto_retriever = MagicMock() + mock_auto_retriever.run_vector_retriever.return_value = { + "text": ["Combined results from multiple sources"] + } + mock_auto_retriever_class.return_value = mock_auto_retriever + + with patch.dict("os.environ", {"OPENAI_API_KEY": "test-key"}): + toolkit = RAGToolkit( + api_task_id="test-task", + storage_path=temp_storage_path, + ) + + result = toolkit.information_retrieval( + query="What is the content?", + contents=["/path/to/doc1.pdf", "/path/to/doc2.pdf"], + ) + + assert isinstance(result, str) + mock_auto_retriever.run_vector_retriever.assert_called_once() diff --git a/backend/uv.lock b/backend/uv.lock index fb91fabd..65382c25 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -227,6 +227,8 @@ dependencies = [ { name = "pydantic-i18n" }, { name = "pydash" }, { name = "python-dotenv" }, + { name = "qdrant-client" }, + { name = "unstructured" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -257,6 +259,8 @@ requires-dist = [ { name = "pydantic-i18n", specifier = ">=0.4.5" }, { name = "pydash", specifier = ">=8.0.5" }, { name = "python-dotenv", specifier = ">=1.1.0" }, + { name = "qdrant-client", specifier = ">=1.16.2" }, + { name = "unstructured", specifier = ">=0.18.27" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.2" }, ] @@ -268,6 +272,15 @@ dev = [ { name = "pytest-asyncio", specifier = ">=1.1.0" }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, +] + [[package]] name = "beautifulsoup4" version = "4.14.3" @@ -527,6 +540,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/09/5f/08d8eed069fb2eb72d5c9cd693b1f79c12521798ef0cfa2068736c16a65c/currency_symbols-2.0.4-py3-none-any.whl", hash = "sha256:f0f381c517b08b862612b31e915b5e84aeeef8da9387bcb7a73a221628ae3310", size = 5278, upload-time = "2025-05-16T17:06:02.051Z" }, ] +[[package]] +name = "dataclasses-json" +version = "0.6.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marshmallow" }, + { name = "typing-inspect" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/a4/f71d9cf3a5ac257c993b5ca3f93df5f7fb395c725e7f1e6479d2514173c3/dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0", size = 32227, upload-time = "2024-06-09T16:20:19.103Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686, upload-time = "2024-06-09T16:20:16.715Z" }, +] + [[package]] name = "datasets" version = "3.6.0" @@ -619,6 +645,15 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/4a/8e/5a01644697b03016de339ef444cfff28367f92984dc74eddaab1ed60eada/docx-0.2.4.tar.gz", hash = "sha256:9d7595eac6e86cda0b7136a2995318d039c1f3eaa368a3300805abbbe5dc8877", size = 54925, upload-time = "2014-02-06T10:02:49.394Z" } +[[package]] +name = "emoji" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/78/0d2db9382c92a163d7095fc08efff7800880f830a152cfced40161e7638d/emoji-2.15.0.tar.gz", hash = "sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4", size = 615483, upload-time = "2025-09-21T12:13:02.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" }, +] + [[package]] name = "et-xmlfile" version = "2.0.0" @@ -708,6 +743,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, ] +[[package]] +name = "filetype" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020, upload-time = "2022-11-02T17:34:04.141Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" }, +] + [[package]] name = "flatbuffers" version = "25.12.19" @@ -873,6 +917,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" }, ] +[[package]] +name = "grpcio" +version = "1.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182, upload-time = "2025-10-21T16:23:12.106Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/00/8163a1beeb6971f66b4bbe6ac9457b97948beba8dd2fc8e1281dce7f79ec/grpcio-1.76.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a", size = 5843567, upload-time = "2025-10-21T16:20:52.829Z" }, + { url = "https://files.pythonhosted.org/packages/10/c1/934202f5cf335e6d852530ce14ddb0fef21be612ba9ecbbcbd4d748ca32d/grpcio-1.76.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c", size = 11848017, upload-time = "2025-10-21T16:20:56.705Z" }, + { url = "https://files.pythonhosted.org/packages/11/0b/8dec16b1863d74af6eb3543928600ec2195af49ca58b16334972f6775663/grpcio-1.76.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465", size = 6412027, upload-time = "2025-10-21T16:20:59.3Z" }, + { url = "https://files.pythonhosted.org/packages/d7/64/7b9e6e7ab910bea9d46f2c090380bab274a0b91fb0a2fe9b0cd399fffa12/grpcio-1.76.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48", size = 7075913, upload-time = "2025-10-21T16:21:01.645Z" }, + { url = "https://files.pythonhosted.org/packages/68/86/093c46e9546073cefa789bd76d44c5cb2abc824ca62af0c18be590ff13ba/grpcio-1.76.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da", size = 6615417, upload-time = "2025-10-21T16:21:03.844Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b6/5709a3a68500a9c03da6fb71740dcdd5ef245e39266461a03f31a57036d8/grpcio-1.76.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397", size = 7199683, upload-time = "2025-10-21T16:21:06.195Z" }, + { url = "https://files.pythonhosted.org/packages/91/d3/4b1f2bf16ed52ce0b508161df3a2d186e4935379a159a834cb4a7d687429/grpcio-1.76.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749", size = 8163109, upload-time = "2025-10-21T16:21:08.498Z" }, + { url = "https://files.pythonhosted.org/packages/5c/61/d9043f95f5f4cf085ac5dd6137b469d41befb04bd80280952ffa2a4c3f12/grpcio-1.76.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00", size = 7626676, upload-time = "2025-10-21T16:21:10.693Z" }, + { url = "https://files.pythonhosted.org/packages/36/95/fd9a5152ca02d8881e4dd419cdd790e11805979f499a2e5b96488b85cf27/grpcio-1.76.0-cp311-cp311-win32.whl", hash = "sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054", size = 3997688, upload-time = "2025-10-21T16:21:12.746Z" }, + { url = "https://files.pythonhosted.org/packages/60/9c/5c359c8d4c9176cfa3c61ecd4efe5affe1f38d9bae81e81ac7186b4c9cc8/grpcio-1.76.0-cp311-cp311-win_amd64.whl", hash = "sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d", size = 4709315, upload-time = "2025-10-21T16:21:15.26Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -882,6 +947,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hf-xet" version = "1.2.0" @@ -897,6 +975,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "html5lib" version = "1.1" @@ -966,6 +1053,9 @@ wheels = [ ] [package.optional-dependencies] +http2 = [ + { name = "h2" }, +] socks = [ { name = "socksio" }, ] @@ -1012,6 +1102,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "identify" version = "2.6.16" @@ -1112,6 +1211,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/01/857d4608f5edb0664aa791a3d45702e1a5bcfff9934da74035e7b9803846/jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd2097de91cf03eaa27b3cbdb969addf83f0179c6afc41bbc4513705e013c65d", size = 347212, upload-time = "2025-11-09T20:49:15.643Z" }, ] +[[package]] +name = "joblib" +version = "1.5.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, +] + [[package]] name = "jsonschema" version = "4.26.0" @@ -1139,6 +1247,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] +[[package]] +name = "langdetect" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/72/a3add0e4eec4eb9e2569554f7c70f4a3c27712f40e3284d483e88094cc0e/langdetect-1.0.9.tar.gz", hash = "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0", size = 981474, upload-time = "2021-05-07T07:54:13.562Z" } + +[[package]] +name = "llvmlite" +version = "0.46.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/74/cd/08ae687ba099c7e3d21fe2ea536500563ef1943c5105bf6ab4ee3829f68e/llvmlite-0.46.0.tar.gz", hash = "sha256:227c9fd6d09dce2783c18b754b7cd9d9b3b3515210c46acc2d3c5badd9870ceb", size = 193456, upload-time = "2025-12-08T18:15:36.295Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/a1/2ad4b2367915faeebe8447f0a057861f646dbf5fbbb3561db42c65659cf3/llvmlite-0.46.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82f3d39b16f19aa1a56d5fe625883a6ab600d5cc9ea8906cca70ce94cabba067", size = 37232766, upload-time = "2025-12-08T18:14:48.836Z" }, + { url = "https://files.pythonhosted.org/packages/12/b5/99cf8772fdd846c07da4fd70f07812a3c8fd17ea2409522c946bb0f2b277/llvmlite-0.46.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a3df43900119803bbc52720e758c76f316a9a0f34612a886862dfe0a5591a17e", size = 56275175, upload-time = "2025-12-08T18:14:51.604Z" }, + { url = "https://files.pythonhosted.org/packages/38/f2/ed806f9c003563732da156139c45d970ee435bd0bfa5ed8de87ba972b452/llvmlite-0.46.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de183fefc8022d21b0aa37fc3e90410bc3524aed8617f0ff76732fc6c3af5361", size = 55128630, upload-time = "2025-12-08T18:14:55.107Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/8f5a37a65fc9b7b17408508145edd5f86263ad69c19d3574e818f533a0eb/llvmlite-0.46.0-cp311-cp311-win_amd64.whl", hash = "sha256:e8b10bc585c58bdffec9e0c309bb7d51be1f2f15e169a4b4d42f2389e431eb93", size = 38138652, upload-time = "2025-12-08T18:14:58.171Z" }, +] + [[package]] name = "lxml" version = "6.0.2" @@ -1247,6 +1376,18 @@ all = [ { name = "youtube-transcript-api" }, ] +[[package]] +name = "marshmallow" +version = "3.26.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/79/de6c16cc902f4fc372236926b0ce2ab7845268dcc30fb2fbb7f71b418631/marshmallow-3.26.2.tar.gz", hash = "sha256:bbe2adb5a03e6e3571b573f42527c6fe926e17467833660bebd11593ab8dfd57", size = 222095, upload-time = "2025-12-22T06:53:53.309Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73", size = 50964, upload-time = "2025-12-22T06:53:51.801Z" }, +] + [[package]] name = "mcp" version = "1.25.0" @@ -1390,6 +1531,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + +[[package]] +name = "nltk" +version = "3.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "joblib" }, + { name = "regex" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -1427,6 +1592,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/f1/0578d65b4e3dc572967fd702221ea1f42e1e60accfb6b0dd8d8f15410139/nodejs_wheel_binaries-24.13.0-py2.py3-none-win_arm64.whl", hash = "sha256:2e3431d869d6b2dbeef1d469ad0090babbdcc8baaa72c01dd3cc2c6121c96af5", size = 39054688, upload-time = "2026-01-14T11:05:30.739Z" }, ] +[[package]] +name = "numba" +version = "0.63.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "llvmlite" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/60/0145d479b2209bd8fdae5f44201eceb8ce5a23e0ed54c71f57db24618665/numba-0.63.1.tar.gz", hash = "sha256:b320aa675d0e3b17b40364935ea52a7b1c670c9037c39cf92c49502a75902f4b", size = 2761666, upload-time = "2025-12-10T02:57:39.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/90/5f8614c165d2e256fbc6c57028519db6f32e4982475a372bbe550ea0454c/numba-0.63.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b33db00f18ccc790ee9911ce03fcdfe9d5124637d1ecc266f5ae0df06e02fec3", size = 2680501, upload-time = "2025-12-10T02:57:09.797Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9d/d0afc4cf915edd8eadd9b2ab5b696242886ee4f97720d9322650d66a88c6/numba-0.63.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d31ea186a78a7c0f6b1b2a3fe68057fdb291b045c52d86232b5383b6cf4fc25", size = 3744945, upload-time = "2025-12-10T02:57:11.697Z" }, + { url = "https://files.pythonhosted.org/packages/05/a9/d82f38f2ab73f3be6f838a826b545b80339762ee8969c16a8bf1d39395a8/numba-0.63.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed3bb2fbdb651d6aac394388130a7001aab6f4541837123a4b4ab8b02716530c", size = 3450827, upload-time = "2025-12-10T02:57:13.709Z" }, + { url = "https://files.pythonhosted.org/packages/18/3f/a9b106e93c5bd7434e65f044bae0d204e20aa7f7f85d72ceb872c7c04216/numba-0.63.1-cp311-cp311-win_amd64.whl", hash = "sha256:1ecbff7688f044b1601be70113e2fb1835367ee0b28ffa8f3adf3a05418c5c87", size = 2747262, upload-time = "2025-12-10T02:57:15.664Z" }, +] + [[package]] name = "numpy" version = "1.26.4" @@ -1699,6 +1880,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "portalocker" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644, upload-time = "2025-06-14T13:20:40.03Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424, upload-time = "2025-06-14T13:20:38.083Z" }, +] + [[package]] name = "pre-commit" version = "4.5.1" @@ -1974,6 +2167,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pypdf" +version = "6.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b8/bb/a44bab1ac3c54dbcf653d7b8bcdee93dddb2d3bf025a3912cacb8149a2f2/pypdf-6.6.2.tar.gz", hash = "sha256:0a3ea3b3303982333404e22d8f75d7b3144f9cf4b2970b96856391a516f9f016", size = 5281850, upload-time = "2026-01-26T11:57:55.964Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/be/549aaf1dfa4ab4aed29b09703d2fb02c4366fc1f05e880948c296c5764b9/pypdf-6.6.2-py3-none-any.whl", hash = "sha256:44c0c9811cfb3b83b28f1c3d054531d5b8b81abaedee0d8cb403650d023832ba", size = 329132, upload-time = "2026-01-26T11:57:54.099Z" }, +] + +[[package]] +name = "pypdfium2" +version = "5.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/83/173dab58beb6c7e772b838199014c173a2436018dd7cfde9bbf4a3be15da/pypdfium2-5.3.0.tar.gz", hash = "sha256:2873ffc95fcb01f329257ebc64a5fdce44b36447b6b171fe62f7db5dc3269885", size = 268742, upload-time = "2026-01-05T16:29:03.02Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/a4/6bb5b5918c7fc236ec426be8a0205a984fe0a26ae23d5e4dd497398a6571/pypdfium2-5.3.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:885df6c78d41600cb086dc0c76b912d165b5bd6931ca08138329ea5a991b3540", size = 2763287, upload-time = "2026-01-05T16:28:24.21Z" }, + { url = "https://files.pythonhosted.org/packages/3e/64/24b41b906006bf07099b095f0420ee1f01a3a83a899f3e3731e4da99c06a/pypdfium2-5.3.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:6e53dee6b333ee77582499eff800300fb5aa0c7eb8f52f95ccb5ca35ebc86d48", size = 2303285, upload-time = "2026-01-05T16:28:26.274Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c0/3ec73f4ded83ba6c02acf6e9d228501759d5d74fe57f1b93849ab92dcc20/pypdfium2-5.3.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ce4466bdd62119fe25a5f74d107acc9db8652062bf217057630c6ff0bb419523", size = 2816066, upload-time = "2026-01-05T16:28:28.099Z" }, + { url = "https://files.pythonhosted.org/packages/62/ca/e553b3b8b5c2cdc3d955cc313493ac27bbe63fc22624769d56ded585dd5e/pypdfium2-5.3.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:cc2647fd03db42b8a56a8835e8bc7899e604e2042cd6fedeea53483185612907", size = 2945545, upload-time = "2026-01-05T16:28:29.489Z" }, + { url = "https://files.pythonhosted.org/packages/a1/56/615b776071e95c8570d579038256d0c77969ff2ff381e427be4ab8967f44/pypdfium2-5.3.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35e205f537ddb4069e4b4e22af7ffe84fcf2d686c3fee5e5349f73268a0ef1ca", size = 2979892, upload-time = "2026-01-05T16:28:31.088Z" }, + { url = "https://files.pythonhosted.org/packages/df/10/27114199b765bdb7d19a9514c07036ad2fc3a579b910e7823ba167ead6de/pypdfium2-5.3.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5795298f44050797ac030994fc2525ea35d2d714efe70058e0ee22e5f613f27", size = 2765738, upload-time = "2026-01-05T16:28:33.18Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d7/2a3afa35e6c205a4f6264c33b8d2f659707989f93c30b336aa58575f66fa/pypdfium2-5.3.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7cd43dfceb77137e69e74c933d41506da1dddaff70f3a794fb0ad0d73e90d75", size = 3064338, upload-time = "2026-01-05T16:28:34.731Z" }, + { url = "https://files.pythonhosted.org/packages/a2/f1/6658755cf6e369bb51d0bccb81c51c300404fbe67c2f894c90000b6442dd/pypdfium2-5.3.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5956867558fd3a793e58691cf169718864610becb765bfe74dd83f05cbf1ae3", size = 3415059, upload-time = "2026-01-05T16:28:37.313Z" }, + { url = "https://files.pythonhosted.org/packages/f5/34/f86482134fa641deb1f524c45ec7ebd6fc8d404df40c5657ddfce528593e/pypdfium2-5.3.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ff1071e9a782625822658dfe6e29e3a644a66960f8713bb17819f5a0ac5987", size = 2998517, upload-time = "2026-01-05T16:28:38.873Z" }, + { url = "https://files.pythonhosted.org/packages/09/34/40ab99425dcf503c172885904c5dc356c052bfdbd085f9f3cc920e0b8b25/pypdfium2-5.3.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f319c46ead49d289ab8c1ed2ea63c91e684f35bdc4cf4dc52191c441182ac481", size = 3673154, upload-time = "2026-01-05T16:28:40.347Z" }, + { url = "https://files.pythonhosted.org/packages/a5/67/0f7532f80825a7728a5cbff3f1104857f8f9fe49ebfd6cb25582a89ae8e1/pypdfium2-5.3.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6dc67a186da0962294321cace6ccc0a4d212dbc5e9522c640d35725a812324b8", size = 2965002, upload-time = "2026-01-05T16:28:42.143Z" }, + { url = "https://files.pythonhosted.org/packages/ce/6c/c03d2a3d6621b77aac9604bce1c060de2af94950448787298501eac6c6a2/pypdfium2-5.3.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0ad0afd3d2b5b54d86287266fd6ae3fef0e0a1a3df9d2c4984b3e3f8f70e6330", size = 4130530, upload-time = "2026-01-05T16:28:44.264Z" }, + { url = "https://files.pythonhosted.org/packages/af/39/9ad1f958cbe35d4693ae87c09ebafda4bb3e4709c7ccaec86c1a829163a3/pypdfium2-5.3.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1afe35230dc3951b3e79b934c0c35a2e79e2372d06503fce6cf1926d2a816f47", size = 3746568, upload-time = "2026-01-05T16:28:45.897Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e2/4d32310166c2d6955d924737df8b0a3e3efc8d133344a98b10f96320157d/pypdfium2-5.3.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:00385793030cadce08469085cd21b168fd8ff981b009685fef3103bdc5fc4686", size = 4336683, upload-time = "2026-01-05T16:28:47.584Z" }, + { url = "https://files.pythonhosted.org/packages/14/ea/38c337ff12a8cec4b00fd4fdb0a63a70597a344581e20b02addbd301ab56/pypdfium2-5.3.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:d911e82676398949697fef80b7f412078df14d725a91c10e383b727051530285", size = 4375030, upload-time = "2026-01-05T16:28:49.5Z" }, + { url = "https://files.pythonhosted.org/packages/a1/77/9d8de90c35d2fc383be8819bcde52f5821dacbd7404a0225e4010b99d080/pypdfium2-5.3.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:ca1dc625ed347fac3d9002a3ed33d521d5803409bd572e7b3f823c12ab2ef58f", size = 3928914, upload-time = "2026-01-05T16:28:51.433Z" }, + { url = "https://files.pythonhosted.org/packages/a5/39/9d4a6fbd78fcb6803b0ea5e4952a31d6182a0aaa2609cfcd0eb88446fdb8/pypdfium2-5.3.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:ea4f9db2d3575f22cd41f4c7a855240ded842f135e59a961b5b1351a65ce2b6e", size = 4997777, upload-time = "2026-01-05T16:28:53.589Z" }, + { url = "https://files.pythonhosted.org/packages/9d/38/cdd4ed085c264234a59ad32df1dfe432c77a7403da2381e0fcc1ba60b74e/pypdfium2-5.3.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0ea24409613df350223c6afc50911c99dca0d43ddaf2616c5a1ebdffa3e1bcb5", size = 4179895, upload-time = "2026-01-05T16:28:55.322Z" }, + { url = "https://files.pythonhosted.org/packages/93/4c/d2f40145c9012482699664f615d7ae540a346c84f68a8179449e69dcc4d8/pypdfium2-5.3.0-py3-none-win32.whl", hash = "sha256:5bf695d603f9eb8fdd7c1786add5cf420d57fbc81df142ed63c029ce29614df9", size = 2993570, upload-time = "2026-01-05T16:28:58.37Z" }, + { url = "https://files.pythonhosted.org/packages/2c/dc/1388ea650020c26ef3f68856b9227e7f153dcaf445e7e4674a0b8f26891e/pypdfium2-5.3.0-py3-none-win_amd64.whl", hash = "sha256:8365af22a39d4373c265f8e90e561cd64d4ddeaf5e6a66546a8caed216ab9574", size = 3102340, upload-time = "2026-01-05T16:28:59.933Z" }, + { url = "https://files.pythonhosted.org/packages/c8/71/a433668d33999b3aeb2c2dda18aaf24948e862ea2ee148078a35daac6c1c/pypdfium2-5.3.0-py3-none-win_arm64.whl", hash = "sha256:0b2c6bf825e084d91d34456be54921da31e9199d9530b05435d69d1a80501a12", size = 2940987, upload-time = "2026-01-05T16:29:01.511Z" }, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -2046,6 +2277,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, ] +[[package]] +name = "python-iso639" +version = "2026.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/da/701fc47ea3b0579a8ae489d50d5b54f2ef3aeb7768afd31db1d1cfe9f24e/python_iso639-2026.1.31.tar.gz", hash = "sha256:55a1612c15e5fbd3a1fa269a309cbf1e7c13019356e3d6f75bb435ed44c45ddb", size = 174144, upload-time = "2026-01-31T15:04:48.105Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/3a/03ee682b04099e6b02b591955851b0347deb2e3691ae850112000c54ba12/python_iso639-2026.1.31-py3-none-any.whl", hash = "sha256:b2c48fa1300af1299dff4f1e1995ad1059996ed9f22270ea2d6d6bdc5fb03d4c", size = 167757, upload-time = "2026-01-31T15:04:46.458Z" }, +] + +[[package]] +name = "python-magic" +version = "0.4.27" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677, upload-time = "2022-06-07T20:16:59.508Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" }, +] + [[package]] name = "python-multipart" version = "0.0.21" @@ -2055,6 +2304,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/76/03af049af4dcee5d27442f71b6924f01f3efb5d2bd34f23fcd563f2cc5f5/python_multipart-0.0.21-py3-none-any.whl", hash = "sha256:cf7a6713e01c87aa35387f4774e812c4361150938d20d232800f75ffcf266090", size = 24541, upload-time = "2025-12-17T09:24:21.153Z" }, ] +[[package]] +name = "python-oxmsg" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "olefile" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a2/4e/869f34faedbc968796d2c7e9837dede079c9cb9750917356b1f1eda926e9/python_oxmsg-0.0.2.tar.gz", hash = "sha256:a6aff4deb1b5975d44d49dab1d9384089ffeec819e19c6940bc7ffbc84775fad", size = 34713, upload-time = "2025-02-03T17:13:47.415Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/67/f56c69a98c7eb244025845506387d0f961681657c9fcd8b2d2edd148f9d2/python_oxmsg-0.0.2-py3-none-any.whl", hash = "sha256:22be29b14c46016bcd05e34abddfd8e05ee82082f53b82753d115da3fc7d0355", size = 31455, upload-time = "2025-02-03T17:13:46.061Z" }, +] + [[package]] name = "python-pptx" version = "1.0.2" @@ -2106,6 +2369,48 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" }, ] +[[package]] +name = "qdrant-client" +version = "1.16.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "httpx", extra = ["http2"] }, + { name = "numpy" }, + { name = "portalocker" }, + { name = "protobuf" }, + { name = "pydantic" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/7d/3cd10e26ae97b35cf856ca1dc67576e42414ae39502c51165bb36bb1dff8/qdrant_client-1.16.2.tar.gz", hash = "sha256:ca4ef5f9be7b5eadeec89a085d96d5c723585a391eb8b2be8192919ab63185f0", size = 331112, upload-time = "2025-12-12T10:58:30.866Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/13/8ce16f808297e16968269de44a14f4fef19b64d9766be1d6ba5ba78b579d/qdrant_client-1.16.2-py3-none-any.whl", hash = "sha256:442c7ef32ae0f005e88b5d3c0783c63d4912b97ae756eb5e052523be682f17d3", size = 377186, upload-time = "2025-12-12T10:58:29.282Z" }, +] + +[[package]] +name = "rapidfuzz" +version = "3.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/25/5b0a33ad3332ee1213068c66f7c14e9e221be90bab434f0cb4defa9d6660/rapidfuzz-3.14.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dea2d113e260a5da0c4003e0a5e9fdf24a9dc2bb9eaa43abd030a1e46ce7837d", size = 1953885, upload-time = "2025-11-01T11:52:47.75Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ab/f1181f500c32c8fcf7c966f5920c7e56b9b1d03193386d19c956505c312d/rapidfuzz-3.14.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e6c31a4aa68cfa75d7eede8b0ed24b9e458447db604c2db53f358be9843d81d3", size = 1390200, upload-time = "2025-11-01T11:52:49.491Z" }, + { url = "https://files.pythonhosted.org/packages/14/2a/0f2de974ececad873865c6bb3ea3ad07c976ac293d5025b2d73325aac1d4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02821366d928e68ddcb567fed8723dad7ea3a979fada6283e6914d5858674850", size = 1389319, upload-time = "2025-11-01T11:52:51.224Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/309d8f3a0bb3031fd9b667174cc4af56000645298af7c2931be5c3d14bb4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfe8df315ab4e6db4e1be72c5170f8e66021acde22cd2f9d04d2058a9fd8162e", size = 3178495, upload-time = "2025-11-01T11:52:53.005Z" }, + { url = "https://files.pythonhosted.org/packages/10/b7/f9c44a99269ea5bf6fd6a40b84e858414b6e241288b9f2b74af470d222b1/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:769f31c60cd79420188fcdb3c823227fc4a6deb35cafec9d14045c7f6743acae", size = 1228443, upload-time = "2025-11-01T11:52:54.991Z" }, + { url = "https://files.pythonhosted.org/packages/f2/0a/3b3137abac7f19c9220e14cd7ce993e35071a7655e7ef697785a3edfea1a/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54fa03062124e73086dae66a3451c553c1e20a39c077fd704dc7154092c34c63", size = 2411998, upload-time = "2025-11-01T11:52:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b6/983805a844d44670eaae63831024cdc97ada4e9c62abc6b20703e81e7f9b/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:834d1e818005ed0d4ae38f6b87b86fad9b0a74085467ece0727d20e15077c094", size = 2530120, upload-time = "2025-11-01T11:52:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/b4/cc/2c97beb2b1be2d7595d805682472f1b1b844111027d5ad89b65e16bdbaaa/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:948b00e8476a91f510dd1ec07272efc7d78c275d83b630455559671d4e33b678", size = 4283129, upload-time = "2025-11-01T11:53:00.188Z" }, + { url = "https://files.pythonhosted.org/packages/4d/03/2f0e5e94941045aefe7eafab72320e61285c07b752df9884ce88d6b8b835/rapidfuzz-3.14.3-cp311-cp311-win32.whl", hash = "sha256:43d0305c36f504232f18ea04e55f2059bb89f169d3119c4ea96a0e15b59e2a91", size = 1724224, upload-time = "2025-11-01T11:53:02.149Z" }, + { url = "https://files.pythonhosted.org/packages/cf/99/5fa23e204435803875daefda73fd61baeabc3c36b8fc0e34c1705aab8c7b/rapidfuzz-3.14.3-cp311-cp311-win_amd64.whl", hash = "sha256:ef6bf930b947bd0735c550683939a032090f1d688dfd8861d6b45307b96fd5c5", size = 1544259, upload-time = "2025-11-01T11:53:03.66Z" }, + { url = "https://files.pythonhosted.org/packages/48/35/d657b85fcc615a42661b98ac90ce8e95bd32af474603a105643963749886/rapidfuzz-3.14.3-cp311-cp311-win_arm64.whl", hash = "sha256:f3eb0ff3b75d6fdccd40b55e7414bb859a1cda77c52762c9c82b85569f5088e7", size = 814734, upload-time = "2025-11-01T11:53:05.008Z" }, + { url = "https://files.pythonhosted.org/packages/c9/33/b5bd6475c7c27164b5becc9b0e3eb978f1e3640fea590dd3dced6006ee83/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7cf174b52cb3ef5d49e45d0a1133b7e7d0ecf770ed01f97ae9962c5c91d97d23", size = 1888499, upload-time = "2025-11-01T11:54:42.094Z" }, + { url = "https://files.pythonhosted.org/packages/30/d2/89d65d4db4bb931beade9121bc71ad916b5fa9396e807d11b33731494e8e/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:442cba39957a008dfc5bdef21a9c3f4379e30ffb4e41b8555dbaf4887eca9300", size = 1336747, upload-time = "2025-11-01T11:54:43.957Z" }, + { url = "https://files.pythonhosted.org/packages/85/33/cd87d92b23f0b06e8914a61cea6850c6d495ca027f669fab7a379041827a/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1faa0f8f76ba75fd7b142c984947c280ef6558b5067af2ae9b8729b0a0f99ede", size = 1352187, upload-time = "2025-11-01T11:54:45.518Z" }, + { url = "https://files.pythonhosted.org/packages/22/20/9d30b4a1ab26aac22fff17d21dec7e9089ccddfe25151d0a8bb57001dc3d/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e6eefec45625c634926a9fd46c9e4f31118ac8f3156fff9494422cee45207e6", size = 3101472, upload-time = "2025-11-01T11:54:47.255Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ad/fa2d3e5c29a04ead7eaa731c7cd1f30f9ec3c77b3a578fdf90280797cbcb/rapidfuzz-3.14.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56fefb4382bb12250f164250240b9dd7772e41c5c8ae976fd598a32292449cc5", size = 1511361, upload-time = "2025-11-01T11:54:49.057Z" }, +] + [[package]] name = "readabilipy" version = "0.3.0" @@ -2200,6 +2505,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/bb/5deac77a9af870143c684ab46a7934038a53eb4aa975bc0687ed6ca2c610/requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5", size = 23892, upload-time = "2022-01-29T18:52:22.279Z" }, ] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + [[package]] name = "rpds-py" version = "0.30.0" @@ -2434,6 +2751,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" }, +] + [[package]] name = "typing-inspection" version = "0.4.2" @@ -2455,6 +2785,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] +[[package]] +name = "unstructured" +version = "0.18.31" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "beautifulsoup4" }, + { name = "charset-normalizer" }, + { name = "dataclasses-json" }, + { name = "emoji" }, + { name = "filetype" }, + { name = "html5lib" }, + { name = "langdetect" }, + { name = "lxml" }, + { name = "nltk" }, + { name = "numba" }, + { name = "numpy" }, + { name = "psutil" }, + { name = "python-iso639" }, + { name = "python-magic" }, + { name = "python-oxmsg" }, + { name = "rapidfuzz" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, + { name = "unstructured-client" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/5f/64285bd69a538bc28753f1423fcaa9d64cd79a9e7c097171b1f0d27e9cdb/unstructured-0.18.31.tar.gz", hash = "sha256:af4bbe32d1894ae6e755f0da6fc0dd307a1d0adeebe0e7cc6278f6cf744339ca", size = 1707700, upload-time = "2026-01-27T15:33:05.378Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/4a/9c43f39d9e443c9bc3f2e379b305bca27110adc653b071221b3132c18de5/unstructured-0.18.31-py3-none-any.whl", hash = "sha256:fab4641176cb9b192ed38048758aa0d9843121d03626d18f42275afb31e5b2d3", size = 1794889, upload-time = "2026-01-27T15:33:03.136Z" }, +] + +[[package]] +name = "unstructured-client" +version = "0.42.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiofiles" }, + { name = "cryptography" }, + { name = "httpcore" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "pypdf" }, + { name = "pypdfium2" }, + { name = "requests-toolbelt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/3e/dd81a2065e50b5b013c9d12a0b6346f86b3252d43a65269a72761e234bcb/unstructured_client-0.42.10.tar.gz", hash = "sha256:e516299c27178865dbd4e2bbd6f00a820ddd40323b2578f303106732fc576217", size = 94726, upload-time = "2026-02-03T18:01:50.776Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/f9/bb9b9e7df245549e2daae58b54fdd612f016111c5b06df3c66965ac8545e/unstructured_client-0.42.10-py3-none-any.whl", hash = "sha256:0034ddcd988e17db83080db26fb36f23c24ace34afedeb267dab245029f8f7a2", size = 220161, upload-time = "2026-02-03T18:01:49.487Z" }, +] + [[package]] name = "uritemplate" version = "4.2.0" @@ -2601,6 +2983,24 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2", size = 27748, upload-time = "2014-11-15T15:59:49.808Z" } +[[package]] +name = "wrapt" +version = "2.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/37/ae31f40bec90de2f88d9597d0b5281e23ffe85b893a47ca5d9c05c63a4f6/wrapt-2.1.1.tar.gz", hash = "sha256:5fdcb09bf6db023d88f312bd0767594b414655d58090fc1c46b3414415f67fac", size = 81329, upload-time = "2026-02-03T02:12:13.786Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/a8/9254e4da74b30a105935197015b18b31b7a298bf046e67d8952ef74967bd/wrapt-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c366434a7fb914c7a5de508ed735ef9c133367114e1a7cb91dfb5cd806a1549", size = 60554, upload-time = "2026-02-03T02:11:13.038Z" }, + { url = "https://files.pythonhosted.org/packages/9e/a1/378579880cc7af226354054a2c255f69615b379d8adad482bfe2f22a0dc2/wrapt-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d6a2068bd2e1e19e5a317c8c0b288267eec4e7347c36bc68a6e378a39f19ee7", size = 61491, upload-time = "2026-02-03T02:12:56.077Z" }, + { url = "https://files.pythonhosted.org/packages/dc/72/957b51c56acca35701665878ad31626182199fc4afecfe67dea072210f95/wrapt-2.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:891ab4713419217b2aed7dd106c9200f64e6a82226775a0d2ebd6bef2ebd1747", size = 113949, upload-time = "2026-02-03T02:11:04.516Z" }, + { url = "https://files.pythonhosted.org/packages/cd/74/36bbebb4a3d2ae9c3e6929639721f8606cd0710a82a777c371aa69e36504/wrapt-2.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8ef36a0df38d2dc9d907f6617f89e113c5892e0a35f58f45f75901af0ce7d81", size = 115989, upload-time = "2026-02-03T02:12:19.398Z" }, + { url = "https://files.pythonhosted.org/packages/ae/0d/f1177245a083c7be284bc90bddfe5aece32cdd5b858049cb69ce001a0e8d/wrapt-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76e9af3ebd86f19973143d4d592cbf3e970cf3f66ddee30b16278c26ae34b8ab", size = 115242, upload-time = "2026-02-03T02:11:08.111Z" }, + { url = "https://files.pythonhosted.org/packages/62/3e/3b7cf5da27e59df61b1eae2d07dd03ff5d6f75b5408d694873cca7a8e33c/wrapt-2.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ff562067485ebdeaef2fa3fe9b1876bc4e7b73762e0a01406ad81e2076edcebf", size = 113676, upload-time = "2026-02-03T02:12:41.026Z" }, + { url = "https://files.pythonhosted.org/packages/f7/65/8248d3912c705f2c66f81cb97c77436f37abcbedb16d633b5ab0d795d8cd/wrapt-2.1.1-cp311-cp311-win32.whl", hash = "sha256:9e60a30aa0909435ec4ea2a3c53e8e1b50ac9f640c0e9fe3f21fd248a22f06c5", size = 57863, upload-time = "2026-02-03T02:12:18.112Z" }, + { url = "https://files.pythonhosted.org/packages/6b/31/d29310ab335f71f00c50466153b3dc985aaf4a9fc03263e543e136859541/wrapt-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:7d79954f51fcf84e5ec4878ab4aea32610d70145c5bbc84b3370eabfb1e096c2", size = 60224, upload-time = "2026-02-03T02:12:29.289Z" }, + { url = "https://files.pythonhosted.org/packages/0c/90/a6ec319affa6e2894962a0cb9d73c67f88af1a726d15314bfb5c88b8a08d/wrapt-2.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:d3ffc6b0efe79e08fd947605fd598515aebefe45e50432dc3b5cd437df8b1ada", size = 58643, upload-time = "2026-02-03T02:12:43.022Z" }, + { url = "https://files.pythonhosted.org/packages/c4/da/5a086bf4c22a41995312db104ec2ffeee2cf6accca9faaee5315c790377d/wrapt-2.1.1-py3-none-any.whl", hash = "sha256:3b0f4629eb954394a3d7c7a1c8cca25f0b07cefe6aa8545e862e9778152de5b7", size = 43886, upload-time = "2026-02-03T02:11:45.048Z" }, +] + [[package]] name = "xlrd" version = "2.0.2" diff --git a/docs/core/models/gemini.md b/docs/core/models/gemini.md index ef241789..c07df96e 100644 --- a/docs/core/models/gemini.md +++ b/docs/core/models/gemini.md @@ -48,7 +48,7 @@ Click on the Gemini Config card and fill in the following fields: ![Gemini 4 Pn](/docs/images/gemini_3.png) ---- +______________________________________________________________________ > **Video Tutorial:** Prefer a visual guide? Watch the full configuration video > here. diff --git a/docs/core/models/kimi.md b/docs/core/models/kimi.md index dfe2c8b7..bb55b886 100644 --- a/docs/core/models/kimi.md +++ b/docs/core/models/kimi.md @@ -48,4 +48,4 @@ Click on the Moonshot card and fill in the following fields: ![Kimi 4 Pn](/docs/images/kimi_3.png) ---- +______________________________________________________________________ diff --git a/docs/core/models/minimax.md b/docs/core/models/minimax.md index c60a044a..6e942549 100644 --- a/docs/core/models/minimax.md +++ b/docs/core/models/minimax.md @@ -47,4 +47,4 @@ Click on the Minimax Config card and fill in the following fields: ![Minimax 4 Pn](/docs/images/minimax_3.png) ---- +______________________________________________________________________ diff --git a/server/README_CN.md b/server/README_CN.md index f0fb305b..7e9ce222 100644 --- a/server/README_CN.md +++ b/server/README_CN.md @@ -20,7 +20,7 @@ 说明:上述数据均保存在 Docker 中的本地 PostgreSQL 卷中(见“数据持久化”),不经我们云端。若你配置了外部模型或远程 MCP,则相应请求会发往你指定的第三方服务。 ---- +______________________________________________________________________ ### 快速开始(Docker 推荐) @@ -93,7 +93,7 @@ docker logs -f eigent_postgres | cat 提示:若拉取镜像缓慢,可在 Docker Desktop 配置国内镜像加速后重试。 ---- +______________________________________________________________________ ### 开发模式(可选) @@ -118,7 +118,7 @@ docker logs -f eigent_postgres | cat uv run uvicorn main:api --reload --port 3001 --host 0.0.0.0 ``` ---- +______________________________________________________________________ ### 其它 diff --git a/server/README_EN.md b/server/README_EN.md index 32e95d14..82242146 100644 --- a/server/README_EN.md +++ b/server/README_EN.md @@ -20,7 +20,7 @@ Note: All the above data is stored in the local PostgreSQL volume in Docker (see “Data Persistence” below). If you configure external models or remote MCP, requests go to the third-party services you specify. ---- +______________________________________________________________________ ### Quick Start (Docker) @@ -91,7 +91,7 @@ docker logs -f eigent_api | cat docker logs -f eigent_postgres | cat ``` ---- +______________________________________________________________________ ### Developer Mode (Optional) @@ -110,7 +110,7 @@ export database_url=postgresql://postgres:123456@localhost:5432/eigent uv run uvicorn main:api --reload --port 3001 --host 0.0.0.0 ``` ---- +______________________________________________________________________ ### Others diff --git a/server/README_PT-BR.md b/server/README_PT-BR.md index 44050a62..83beefda 100644 --- a/server/README_PT-BR.md +++ b/server/README_PT-BR.md @@ -20,7 +20,7 @@ Nota: Todos os dados acima são armazenados no volume PostgreSQL local no Docker (veja "Persistência de Dados" abaixo). Se você configurar modelos externos ou MCP remoto, as solicitações vão para os serviços de terceiros que você especificar. ---- +______________________________________________________________________ ### Início Rápido (Docker) @@ -91,7 +91,7 @@ docker logs -f eigent_api | cat docker logs -f eigent_postgres | cat ``` ---- +______________________________________________________________________ ### Modo Desenvolvedor (Opcional) @@ -110,7 +110,7 @@ export database_url=postgresql://postgres:123456@localhost:5432/eigent uv run uvicorn main:api --reload --port 3001 --host 0.0.0.0 ``` ---- +______________________________________________________________________ ### Outros