Fixed all ruff lint and formatting errors

2025-09-10 14:28:57 +00:00 · 2025-07-24 14:43:48 -07:00 · 2025-07-24 14:43:48 -07:00 · d359a59f6d
commit d359a59f6d
parent 0a03c42cc5
85 changed files with 5520 additions and 3870 deletions
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -1,23 +1,35 @@
-from litellm import atranscription
-from fastapi import APIRouter, Depends, BackgroundTasks, UploadFile, Form, HTTPException
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-from typing import List
-from app.db import Log, get_async_session, User, SearchSpace, Document, DocumentType
-from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
-from app.users import current_active_user
-from app.utils.check_ownership import check_ownership
-from app.tasks.background_tasks import add_received_markdown_file_document, add_extension_received_document, add_received_file_document_using_unstructured, add_crawled_url_document, add_youtube_video_document, add_received_file_document_using_llamacloud, add_received_file_document_using_docling
-from app.config import config as app_config
 # Force asyncio to use standard event loop before unstructured imports
 import asyncio

+from fastapi import APIRouter, BackgroundTasks, Depends, Form, HTTPException, UploadFile
+from litellm import atranscription
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.config import config as app_config
+from app.db import Document, DocumentType, Log, SearchSpace, User, get_async_session
+from app.schemas import DocumentRead, DocumentsCreate, DocumentUpdate
 from app.services.task_logging_service import TaskLoggingService
+from app.tasks.background_tasks import (
+    add_crawled_url_document,
+    add_extension_received_document,
+    add_received_file_document_using_docling,
+    add_received_file_document_using_llamacloud,
+    add_received_file_document_using_unstructured,
+    add_received_markdown_file_document,
+    add_youtube_video_document,
+)
+from app.users import current_active_user
+from app.utils.check_ownership import check_ownership
+
 try:
    asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
-except RuntimeError:
+except RuntimeError as e:
+    print("Error setting event loop policy", e)
    pass
+
 import os
+
 os.environ["UNSTRUCTURED_HAS_PATCHED_LOOP"] = "1"


@ -29,7 +41,7 @@ async def create_documents(
    request: DocumentsCreate,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
-    fastapi_background_tasks: BackgroundTasks = BackgroundTasks()
+    fastapi_background_tasks: BackgroundTasks = BackgroundTasks(),
 ):
    try:
        # Check if the user owns the search space
@ -41,7 +53,7 @@ async def create_documents(
                    process_extension_document_with_new_session,
                    individual_document,
                    request.search_space_id,
-                    str(user.id)
+                    str(user.id),
                )
        elif request.document_type == DocumentType.CRAWLED_URL:
            for url in request.content:
@ -49,7 +61,7 @@ async def create_documents(
                    process_crawled_url_with_new_session,
                    url,
                    request.search_space_id,
-                    str(user.id)
+                    str(user.id),
                )
        elif request.document_type == DocumentType.YOUTUBE_VIDEO:
            for url in request.content:
@ -57,13 +69,10 @@ async def create_documents(
                    process_youtube_video_with_new_session,
                    url,
                    request.search_space_id,
-                    str(user.id)
+                    str(user.id),
                )
        else:
-            raise HTTPException(
-                status_code=400,
-                detail="Invalid document type"
-            )
+            raise HTTPException(status_code=400, detail="Invalid document type")

        await session.commit()
        return {"message": "Documents processed successfully"}
@ -72,18 +81,17 @@ async def create_documents(
    except Exception as e:
        await session.rollback()
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to process documents: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to process documents: {e!s}"
+        ) from e


@router.post("/documents/fileupload")
-async def create_documents(
+async def create_documents_file_upload(
    files: list[UploadFile],
    search_space_id: int = Form(...),
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
-    fastapi_background_tasks: BackgroundTasks = BackgroundTasks()
+    fastapi_background_tasks: BackgroundTasks = BackgroundTasks(),
 ):
    try:
        await check_ownership(session, SearchSpace, search_space_id, user)
@ -94,31 +102,32 @@ async def create_documents(
        for file in files:
            try:
                # Save file to a temporary location to avoid stream issues
-                import tempfile
-                import aiofiles
                import os
+                import tempfile

                # Create temp file
-                with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=os.path.splitext(file.filename)[1]
+                ) as temp_file:
                    temp_path = temp_file.name

                # Write uploaded file to temp file
                content = await file.read()
                with open(temp_path, "wb") as f:
                    f.write(content)
-         
+
                fastapi_background_tasks.add_task(
                    process_file_in_background_with_new_session,
                    temp_path,
                    file.filename,
                    search_space_id,
-                    str(user.id)
+                    str(user.id),
                )
            except Exception as e:
                raise HTTPException(
                    status_code=422,
-                    detail=f"Failed to process file {file.filename}: {str(e)}"
-                )
+                    detail=f"Failed to process file {file.filename}: {e!s}",
+                ) from e

        await session.commit()
        return {"message": "Files uploaded for processing"}
@ -127,9 +136,8 @@ async def create_documents(
    except Exception as e:
        await session.rollback()
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to upload files: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to upload files: {e!s}"
+        ) from e


 async def process_file_in_background(
@ -139,64 +147,71 @@ async def process_file_in_background(
    user_id: str,
    session: AsyncSession,
    task_logger: TaskLoggingService,
-    log_entry: Log
+    log_entry: Log,
 ):
    try:
        # Check if the file is a markdown or text file
-        if filename.lower().endswith(('.md', '.markdown', '.txt')):
+        if filename.lower().endswith((".md", ".markdown", ".txt")):
            await task_logger.log_task_progress(
                log_entry,
                f"Processing markdown/text file: {filename}",
-                {"file_type": "markdown", "processing_stage": "reading_file"}
+                {"file_type": "markdown", "processing_stage": "reading_file"},
            )
-            
+
            # For markdown files, read the content directly
-            with open(file_path, 'r', encoding='utf-8') as f:
+            with open(file_path, encoding="utf-8") as f:
                markdown_content = f.read()

            # Clean up the temp file
            import os
+
            try:
                os.unlink(file_path)
-            except:
+            except Exception as e:
+                print("Error deleting temp file", e)
                pass

            await task_logger.log_task_progress(
                log_entry,
                f"Creating document from markdown content: {filename}",
-                {"processing_stage": "creating_document", "content_length": len(markdown_content)}
+                {
+                    "processing_stage": "creating_document",
+                    "content_length": len(markdown_content),
+                },
            )

            # Process markdown directly through specialized function
            result = await add_received_markdown_file_document(
-                session,
-                filename,
-                markdown_content,
-                search_space_id,
-                user_id
+                session, filename, markdown_content, search_space_id, user_id
            )
-            
+
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed markdown file: {filename}",
-                    {"document_id": result.id, "content_hash": result.content_hash, "file_type": "markdown"}
+                    {
+                        "document_id": result.id,
+                        "content_hash": result.content_hash,
+                        "file_type": "markdown",
+                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"Markdown file already exists (duplicate): {filename}",
-                    {"duplicate_detected": True, "file_type": "markdown"}
+                    {"duplicate_detected": True, "file_type": "markdown"},
                )
-                
+
        # Check if the file is an audio file
-        elif filename.lower().endswith(('.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm')):
+        elif filename.lower().endswith(
+            (".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")
+        ):
            await task_logger.log_task_progress(
                log_entry,
                f"Processing audio file for transcription: {filename}",
-                {"file_type": "audio", "processing_stage": "starting_transcription"}
+                {"file_type": "audio", "processing_stage": "starting_transcription"},
            )
-            
+
            # Open the audio file for transcription
            with open(file_path, "rb") as audio_file:
                # Use LiteLLM for audio transcription
@ -205,65 +220,76 @@ async def process_file_in_background(
                        model=app_config.STT_SERVICE,
                        file=audio_file,
                        api_base=app_config.STT_SERVICE_API_BASE,
-                        api_key=app_config.STT_SERVICE_API_KEY
+                        api_key=app_config.STT_SERVICE_API_KEY,
                    )
                else:
                    transcription_response = await atranscription(
                        model=app_config.STT_SERVICE,
                        api_key=app_config.STT_SERVICE_API_KEY,
-                        file=audio_file
+                        file=audio_file,
                    )

                # Extract the transcribed text
                transcribed_text = transcription_response.get("text", "")

                # Add metadata about the transcription
-                transcribed_text = f"# Transcription of {filename}\n\n{transcribed_text}"
+                transcribed_text = (
+                    f"# Transcription of {filename}\n\n{transcribed_text}"
+                )

            await task_logger.log_task_progress(
                log_entry,
                f"Transcription completed, creating document: {filename}",
-                {"processing_stage": "transcription_complete", "transcript_length": len(transcribed_text)}
+                {
+                    "processing_stage": "transcription_complete",
+                    "transcript_length": len(transcribed_text),
+                },
            )

            # Clean up the temp file
            try:
                os.unlink(file_path)
-            except:
+            except Exception as e:
+                print("Error deleting temp file", e)
                pass

            # Process transcription as markdown document
            result = await add_received_markdown_file_document(
-                session,
-                filename,
-                transcribed_text,
-                search_space_id,
-                user_id
+                session, filename, transcribed_text, search_space_id, user_id
            )
-            
+
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully transcribed and processed audio file: {filename}",
-                    {"document_id": result.id, "content_hash": result.content_hash, "file_type": "audio", "transcript_length": len(transcribed_text)}
+                    {
+                        "document_id": result.id,
+                        "content_hash": result.content_hash,
+                        "file_type": "audio",
+                        "transcript_length": len(transcribed_text),
+                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"Audio file transcript already exists (duplicate): {filename}",
-                    {"duplicate_detected": True, "file_type": "audio"}
+                    {"duplicate_detected": True, "file_type": "audio"},
                )
-                
+
        else:
            if app_config.ETL_SERVICE == "UNSTRUCTURED":
                await task_logger.log_task_progress(
                    log_entry,
                    f"Processing file with Unstructured ETL: {filename}",
-                    {"file_type": "document", "etl_service": "UNSTRUCTURED", "processing_stage": "loading"}
+                    {
+                        "file_type": "document",
+                        "etl_service": "UNSTRUCTURED",
+                        "processing_stage": "loading",
+                    },
                )
-                
+
                from langchain_unstructured import UnstructuredLoader
-                
+
                # Process the file
                loader = UnstructuredLoader(
                    file_path,
@ -280,212 +306,257 @@ async def process_file_in_background(
                await task_logger.log_task_progress(
                    log_entry,
                    f"Unstructured ETL completed, creating document: {filename}",
-                    {"processing_stage": "etl_complete", "elements_count": len(docs)}
+                    {"processing_stage": "etl_complete", "elements_count": len(docs)},
                )

                # Clean up the temp file
                import os
+
                try:
                    os.unlink(file_path)
-                except:
+                except Exception as e:
+                    print("Error deleting temp file", e)
                    pass

                # Pass the documents to the existing background task
                result = await add_received_file_document_using_unstructured(
-                    session,
-                    filename,
-                    docs,
-                    search_space_id,
-                    user_id
+                    session, filename, docs, search_space_id, user_id
                )
-                
+
                if result:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with Unstructured: {filename}",
-                        {"document_id": result.id, "content_hash": result.content_hash, "file_type": "document", "etl_service": "UNSTRUCTURED"}
+                        {
+                            "document_id": result.id,
+                            "content_hash": result.content_hash,
+                            "file_type": "document",
+                            "etl_service": "UNSTRUCTURED",
+                        },
                    )
                else:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Document already exists (duplicate): {filename}",
-                        {"duplicate_detected": True, "file_type": "document", "etl_service": "UNSTRUCTURED"}
+                        {
+                            "duplicate_detected": True,
+                            "file_type": "document",
+                            "etl_service": "UNSTRUCTURED",
+                        },
                    )
-                    
+
            elif app_config.ETL_SERVICE == "LLAMACLOUD":
                await task_logger.log_task_progress(
                    log_entry,
                    f"Processing file with LlamaCloud ETL: {filename}",
-                    {"file_type": "document", "etl_service": "LLAMACLOUD", "processing_stage": "parsing"}
+                    {
+                        "file_type": "document",
+                        "etl_service": "LLAMACLOUD",
+                        "processing_stage": "parsing",
+                    },
                )
-                
+
                from llama_cloud_services import LlamaParse
                from llama_cloud_services.parse.utils import ResultType

-                
                # Create LlamaParse parser instance
                parser = LlamaParse(
                    api_key=app_config.LLAMA_CLOUD_API_KEY,
                    num_workers=1,  # Use single worker for file processing
                    verbose=True,
                    language="en",
-                    result_type=ResultType.MD
+                    result_type=ResultType.MD,
                )
-                
+
                # Parse the file asynchronously
                result = await parser.aparse(file_path)
-                
+
                # Clean up the temp file
                import os
+
                try:
                    os.unlink(file_path)
-                except:
+                except Exception as e:
+                    print("Error deleting temp file", e)
                    pass
-                
+
                # Get markdown documents from the result
-                markdown_documents = await result.aget_markdown_documents(split_by_page=False)
-                
+                markdown_documents = await result.aget_markdown_documents(
+                    split_by_page=False
+                )
+
                await task_logger.log_task_progress(
                    log_entry,
                    f"LlamaCloud parsing completed, creating documents: {filename}",
-                    {"processing_stage": "parsing_complete", "documents_count": len(markdown_documents)}
+                    {
+                        "processing_stage": "parsing_complete",
+                        "documents_count": len(markdown_documents),
+                    },
                )
-                
+
                for doc in markdown_documents:
                    # Extract text content from the markdown documents
                    markdown_content = doc.text
-                    
+
                    # Process the documents using our LlamaCloud background task
                    doc_result = await add_received_file_document_using_llamacloud(
                        session,
                        filename,
                        llamacloud_markdown_document=markdown_content,
                        search_space_id=search_space_id,
-                        user_id=user_id
+                        user_id=user_id,
                    )
-                    
+
                if doc_result:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with LlamaCloud: {filename}",
-                        {"document_id": doc_result.id, "content_hash": doc_result.content_hash, "file_type": "document", "etl_service": "LLAMACLOUD"}
+                        {
+                            "document_id": doc_result.id,
+                            "content_hash": doc_result.content_hash,
+                            "file_type": "document",
+                            "etl_service": "LLAMACLOUD",
+                        },
                    )
                else:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Document already exists (duplicate): {filename}",
-                        {"duplicate_detected": True, "file_type": "document", "etl_service": "LLAMACLOUD"}
+                        {
+                            "duplicate_detected": True,
+                            "file_type": "document",
+                            "etl_service": "LLAMACLOUD",
+                        },
                    )
-                    
+
            elif app_config.ETL_SERVICE == "DOCLING":
                await task_logger.log_task_progress(
                    log_entry,
                    f"Processing file with Docling ETL: {filename}",
-                    {"file_type": "document", "etl_service": "DOCLING", "processing_stage": "parsing"}
+                    {
+                        "file_type": "document",
+                        "etl_service": "DOCLING",
+                        "processing_stage": "parsing",
+                    },
                )
-                
+
                # Use Docling service for document processing
                from app.services.docling_service import create_docling_service
-                
+
                # Create Docling service
                docling_service = create_docling_service()
-                
+
                # Process the document
                result = await docling_service.process_document(file_path, filename)
-                
+
                # Clean up the temp file
                import os
+
                try:
                    os.unlink(file_path)
-                except:
+                except Exception as e:
+                    print("Error deleting temp file", e)
                    pass
-                
+
                await task_logger.log_task_progress(
                    log_entry,
                    f"Docling parsing completed, creating document: {filename}",
-                    {"processing_stage": "parsing_complete", "content_length": len(result['content'])}
+                    {
+                        "processing_stage": "parsing_complete",
+                        "content_length": len(result["content"]),
+                    },
                )
-                
+
                # Process the document using our Docling background task
                doc_result = await add_received_file_document_using_docling(
                    session,
                    filename,
-                    docling_markdown_document=result['content'],
+                    docling_markdown_document=result["content"],
                    search_space_id=search_space_id,
-                    user_id=user_id
+                    user_id=user_id,
                )
-                
+
                if doc_result:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Successfully processed file with Docling: {filename}",
-                        {"document_id": doc_result.id, "content_hash": doc_result.content_hash, "file_type": "document", "etl_service": "DOCLING"}
+                        {
+                            "document_id": doc_result.id,
+                            "content_hash": doc_result.content_hash,
+                            "file_type": "document",
+                            "etl_service": "DOCLING",
+                        },
                    )
                else:
                    await task_logger.log_task_success(
                        log_entry,
                        f"Document already exists (duplicate): {filename}",
-                        {"duplicate_detected": True, "file_type": "document", "etl_service": "DOCLING"}
+                        {
+                            "duplicate_detected": True,
+                            "file_type": "document",
+                            "etl_service": "DOCLING",
+                        },
                    )
    except Exception as e:
        await task_logger.log_task_failure(
            log_entry,
            f"Failed to process file: {filename}",
            str(e),
-            {"error_type": type(e).__name__, "filename": filename}
+            {"error_type": type(e).__name__, "filename": filename},
        )
        import logging
-        logging.error(f"Error processing file in background: {str(e)}")
+
+        logging.error(f"Error processing file in background: {e!s}")
        raise  # Re-raise so the wrapper can also handle it


-@router.get("/documents/", response_model=List[DocumentRead])
+@router.get("/documents/", response_model=list[DocumentRead])
 async def read_documents(
    skip: int = 0,
    limit: int = 300,
-    search_space_id: int = None,
+    search_space_id: int | None = None,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user)
+    user: User = Depends(current_active_user),
 ):
    try:
-        query = select(Document).join(SearchSpace).filter(
-            SearchSpace.user_id == user.id)
+        query = (
+            select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
+        )

        # Filter by search_space_id if provided
        if search_space_id is not None:
            query = query.filter(Document.search_space_id == search_space_id)

-        result = await session.execute(
-            query.offset(skip).limit(limit)
-        )
+        result = await session.execute(query.offset(skip).limit(limit))
        db_documents = result.scalars().all()

        # Convert database objects to API-friendly format
        api_documents = []
        for doc in db_documents:
-            api_documents.append(DocumentRead(
-                id=doc.id,
-                title=doc.title,
-                document_type=doc.document_type,
-                document_metadata=doc.document_metadata,
-                content=doc.content,
-                created_at=doc.created_at,
-                search_space_id=doc.search_space_id
-            ))
+            api_documents.append(
+                DocumentRead(
+                    id=doc.id,
+                    title=doc.title,
+                    document_type=doc.document_type,
+                    document_metadata=doc.document_metadata,
+                    content=doc.content,
+                    created_at=doc.created_at,
+                    search_space_id=doc.search_space_id,
+                )
+            )

        return api_documents
    except Exception as e:
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to fetch documents: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to fetch documents: {e!s}"
+        ) from e


@router.get("/documents/{document_id}", response_model=DocumentRead)
 async def read_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user)
+    user: User = Depends(current_active_user),
 ):
    try:
        result = await session.execute(
@ -497,8 +568,7 @@ async def read_document(

        if not document:
            raise HTTPException(
-                status_code=404,
-                detail=f"Document with id {document_id} not found"
+                status_code=404, detail=f"Document with id {document_id} not found"
            )

        # Convert database object to API-friendly format
@ -509,13 +579,12 @@ async def read_document(
            document_metadata=document.document_metadata,
            content=document.content,
            created_at=document.created_at,
-            search_space_id=document.search_space_id
+            search_space_id=document.search_space_id,
        )
    except Exception as e:
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to fetch document: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to fetch document: {e!s}"
+        ) from e


@router.put("/documents/{document_id}", response_model=DocumentRead)
@ -523,7 +592,7 @@ async def update_document(
    document_id: int,
    document_update: DocumentUpdate,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user)
+    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
@ -536,8 +605,7 @@ async def update_document(

        if not db_document:
            raise HTTPException(
-                status_code=404,
-                detail=f"Document with id {document_id} not found"
+                status_code=404, detail=f"Document with id {document_id} not found"
            )

        update_data = document_update.model_dump(exclude_unset=True)
@ -554,23 +622,22 @@ async def update_document(
            document_metadata=db_document.document_metadata,
            content=db_document.content,
            created_at=db_document.created_at,
-            search_space_id=db_document.search_space_id
+            search_space_id=db_document.search_space_id,
        )
    except HTTPException:
        raise
    except Exception as e:
        await session.rollback()
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to update document: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to update document: {e!s}"
+        ) from e


@router.delete("/documents/{document_id}", response_model=dict)
 async def delete_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
-    user: User = Depends(current_active_user)
+    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
@ -583,8 +650,7 @@ async def delete_document(

        if not document:
            raise HTTPException(
-                status_code=404,
-                detail=f"Document with id {document_id} not found"
+                status_code=404, detail=f"Document with id {document_id} not found"
            )

        await session.delete(document)
@ -595,15 +661,12 @@ async def delete_document(
    except Exception as e:
        await session.rollback()
        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to delete document: {str(e)}"
-        )
+            status_code=500, detail=f"Failed to delete document: {e!s}"
+        ) from e


 async def process_extension_document_with_new_session(
-    individual_document,
-    search_space_id: int,
-    user_id: str
+    individual_document, search_space_id: int, user_id: str
 ):
    """Create a new session and process extension document."""
    from app.db import async_session_maker
@ -612,7 +675,7 @@ async def process_extension_document_with_new_session(
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
-        
+
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_extension_document",
@ -622,40 +685,41 @@ async def process_extension_document_with_new_session(
                "document_type": "EXTENSION",
                "url": individual_document.metadata.VisitedWebPageURL,
                "title": individual_document.metadata.VisitedWebPageTitle,
-                "user_id": user_id
-            }
+                "user_id": user_id,
+            },
        )
-        
+
        try:
-            result = await add_extension_received_document(session, individual_document, search_space_id, user_id)
-            
+            result = await add_extension_received_document(
+                session, individual_document, search_space_id, user_id
+            )
+
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed extension document: {individual_document.metadata.VisitedWebPageTitle}",
-                    {"document_id": result.id, "content_hash": result.content_hash}
+                    {"document_id": result.id, "content_hash": result.content_hash},
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"Extension document already exists (duplicate): {individual_document.metadata.VisitedWebPageTitle}",
-                    {"duplicate_detected": True}
+                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process extension document: {individual_document.metadata.VisitedWebPageTitle}",
                str(e),
-                {"error_type": type(e).__name__}
+                {"error_type": type(e).__name__},
            )
            import logging
-            logging.error(f"Error processing extension document: {str(e)}")
+
+            logging.error(f"Error processing extension document: {e!s}")


 async def process_crawled_url_with_new_session(
-    url: str,
-    search_space_id: int,
-    user_id: str
+    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process crawled URL."""
    from app.db import async_session_maker
@ -664,50 +728,50 @@ async def process_crawled_url_with_new_session(
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
-        
+
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_crawled_url",
            source="document_processor",
            message=f"Starting URL crawling and processing for: {url}",
-            metadata={
-                "document_type": "CRAWLED_URL",
-                "url": url,
-                "user_id": user_id
-            }
+            metadata={"document_type": "CRAWLED_URL", "url": url, "user_id": user_id},
        )
-        
+
        try:
-            result = await add_crawled_url_document(session, url, search_space_id, user_id)
-            
+            result = await add_crawled_url_document(
+                session, url, search_space_id, user_id
+            )
+
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully crawled and processed URL: {url}",
-                    {"document_id": result.id, "title": result.title, "content_hash": result.content_hash}
+                    {
+                        "document_id": result.id,
+                        "title": result.title,
+                        "content_hash": result.content_hash,
+                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"URL document already exists (duplicate): {url}",
-                    {"duplicate_detected": True}
+                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to crawl URL: {url}",
                str(e),
-                {"error_type": type(e).__name__}
+                {"error_type": type(e).__name__},
            )
            import logging
-            logging.error(f"Error processing crawled URL: {str(e)}")
+
+            logging.error(f"Error processing crawled URL: {e!s}")


 async def process_file_in_background_with_new_session(
-    file_path: str,
-    filename: str,
-    search_space_id: int,
-    user_id: str
+    file_path: str, filename: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process file."""
    from app.db import async_session_maker
@ -716,7 +780,7 @@ async def process_file_in_background_with_new_session(
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
-        
+
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_file_upload",
@ -726,29 +790,36 @@ async def process_file_in_background_with_new_session(
                "document_type": "FILE",
                "filename": filename,
                "file_path": file_path,
-                "user_id": user_id
-            }
+                "user_id": user_id,
+            },
        )
-        
+
        try:
-            await process_file_in_background(file_path, filename, search_space_id, user_id, session, task_logger, log_entry)
-            
+            await process_file_in_background(
+                file_path,
+                filename,
+                search_space_id,
+                user_id,
+                session,
+                task_logger,
+                log_entry,
+            )
+
            # Note: success/failure logging is handled within process_file_in_background
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process file: {filename}",
                str(e),
-                {"error_type": type(e).__name__}
+                {"error_type": type(e).__name__},
            )
            import logging
-            logging.error(f"Error processing file: {str(e)}")
+
+            logging.error(f"Error processing file: {e!s}")


 async def process_youtube_video_with_new_session(
-    url: str,
-    search_space_id: int,
-    user_id: str
+    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process YouTube video."""
    from app.db import async_session_maker
@ -757,42 +828,43 @@ async def process_youtube_video_with_new_session(
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
-        
+
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_youtube_video",
            source="document_processor",
            message=f"Starting YouTube video processing for: {url}",
-            metadata={
-                "document_type": "YOUTUBE_VIDEO",
-                "url": url,
-                "user_id": user_id
-            }
+            metadata={"document_type": "YOUTUBE_VIDEO", "url": url, "user_id": user_id},
        )
-        
+
        try:
-            result = await add_youtube_video_document(session, url, search_space_id, user_id)
-            
+            result = await add_youtube_video_document(
+                session, url, search_space_id, user_id
+            )
+
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed YouTube video: {result.title}",
-                    {"document_id": result.id, "video_id": result.document_metadata.get("video_id"), "content_hash": result.content_hash}
+                    {
+                        "document_id": result.id,
+                        "video_id": result.document_metadata.get("video_id"),
+                        "content_hash": result.content_hash,
+                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"YouTube video document already exists (duplicate): {url}",
-                    {"duplicate_detected": True}
+                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process YouTube video: {url}",
                str(e),
-                {"error_type": type(e).__name__}
+                {"error_type": type(e).__name__},
            )
            import logging
-            logging.error(f"Error processing YouTube video: {str(e)}")
-

+            logging.error(f"Error processing YouTube video: {e!s}")