From 931fafa4032aeb9c0a74e3038cd8d196dfe20858 Mon Sep 17 00:00:00 2001 From: "MSI\\ModSetter" Date: Mon, 21 Jul 2025 06:19:37 -0700 Subject: [PATCH] refactor: Remove deprecated document processing services and update imports - Deleted the document_processing module and its associated docling_service. - Updated imports in documents_routes.py and background_tasks.py to reflect the new service structure. - Ensured compatibility with the task logging system by adjusting type hints for log entries. --- surfsense_backend/app/routes/documents_routes.py | 10 ++++++---- .../{document_processing => }/docling_service.py | 0 .../app/services/document_processing/__init__.py | 1 - surfsense_backend/app/tasks/background_tasks.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) rename surfsense_backend/app/services/{document_processing => }/docling_service.py (100%) delete mode 100644 surfsense_backend/app/services/document_processing/__init__.py diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 5c2a0a0..4d3e852 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Depends, BackgroundTasks, UploadFile, Form, HTTPE from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from typing import List -from app.db import get_async_session, User, SearchSpace, Document, DocumentType +from app.db import Log, get_async_session, User, SearchSpace, Document, DocumentType from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead from app.users import current_active_user from app.utils.check_ownership import check_ownership @@ -11,6 +11,8 @@ from app.tasks.background_tasks import add_received_markdown_file_document, add_ from app.config import config as app_config # Force asyncio to use standard event loop before unstructured imports import asyncio + +from surfsense_backend.app.services.task_logging_service import TaskLoggingService try: asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) except RuntimeError: @@ -136,8 +138,8 @@ async def process_file_in_background( search_space_id: int, user_id: str, session: AsyncSession, - task_logger: 'TaskLoggingService', - log_entry: 'Log' + task_logger: TaskLoggingService, + log_entry: Log ): try: # Check if the file is a markdown or text file @@ -383,7 +385,7 @@ async def process_file_in_background( ) # Use Docling service for document processing - from app.services.document_processing.docling_service import create_docling_service + from app.services.docling_service import create_docling_service # Create Docling service docling_service = create_docling_service() diff --git a/surfsense_backend/app/services/document_processing/docling_service.py b/surfsense_backend/app/services/docling_service.py similarity index 100% rename from surfsense_backend/app/services/document_processing/docling_service.py rename to surfsense_backend/app/services/docling_service.py diff --git a/surfsense_backend/app/services/document_processing/__init__.py b/surfsense_backend/app/services/document_processing/__init__.py deleted file mode 100644 index 0c86000..0000000 --- a/surfsense_backend/app/services/document_processing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Document processing services for SurfSense \ No newline at end of file diff --git a/surfsense_backend/app/tasks/background_tasks.py b/surfsense_backend/app/tasks/background_tasks.py index d405861..9599619 100644 --- a/surfsense_backend/app/tasks/background_tasks.py +++ b/surfsense_backend/app/tasks/background_tasks.py @@ -682,7 +682,7 @@ async def add_received_file_document_using_docling( raise RuntimeError(f"No long context LLM configured for user {user_id}") # Generate summary using chunked processing for large documents - from app.services.document_processing.docling_service import create_docling_service + from app.services.docling_service import create_docling_service docling_service = create_docling_service() summary_content = await docling_service.process_large_document_summary(