refactor: Remove deprecated document processing services and update imports

- Deleted the document_processing module and its associated docling_service.
- Updated imports in documents_routes.py and background_tasks.py to reflect the new service structure.
- Ensured compatibility with the task logging system by adjusting type hints for log entries.
This commit is contained in:
MSI\ModSetter 2025-07-21 06:19:37 -07:00
parent 621590c049
commit 931fafa403
4 changed files with 7 additions and 6 deletions

View file

@ -3,7 +3,7 @@ from fastapi import APIRouter, Depends, BackgroundTasks, UploadFile, Form, HTTPE
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select from sqlalchemy.future import select
from typing import List from typing import List
from app.db import get_async_session, User, SearchSpace, Document, DocumentType from app.db import Log, get_async_session, User, SearchSpace, Document, DocumentType
from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
from app.users import current_active_user from app.users import current_active_user
from app.utils.check_ownership import check_ownership from app.utils.check_ownership import check_ownership
@ -11,6 +11,8 @@ from app.tasks.background_tasks import add_received_markdown_file_document, add_
from app.config import config as app_config from app.config import config as app_config
# Force asyncio to use standard event loop before unstructured imports # Force asyncio to use standard event loop before unstructured imports
import asyncio import asyncio
from surfsense_backend.app.services.task_logging_service import TaskLoggingService
try: try:
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
except RuntimeError: except RuntimeError:
@ -136,8 +138,8 @@ async def process_file_in_background(
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
session: AsyncSession, session: AsyncSession,
task_logger: 'TaskLoggingService', task_logger: TaskLoggingService,
log_entry: 'Log' log_entry: Log
): ):
try: try:
# Check if the file is a markdown or text file # Check if the file is a markdown or text file
@ -383,7 +385,7 @@ async def process_file_in_background(
) )
# Use Docling service for document processing # Use Docling service for document processing
from app.services.document_processing.docling_service import create_docling_service from app.services.docling_service import create_docling_service
# Create Docling service # Create Docling service
docling_service = create_docling_service() docling_service = create_docling_service()

View file

@ -1 +0,0 @@
# Document processing services for SurfSense

View file

@ -682,7 +682,7 @@ async def add_received_file_document_using_docling(
raise RuntimeError(f"No long context LLM configured for user {user_id}") raise RuntimeError(f"No long context LLM configured for user {user_id}")
# Generate summary using chunked processing for large documents # Generate summary using chunked processing for large documents
from app.services.document_processing.docling_service import create_docling_service from app.services.docling_service import create_docling_service
docling_service = create_docling_service() docling_service = create_docling_service()
summary_content = await docling_service.process_large_document_summary( summary_content = await docling_service.process_large_document_summary(