Merge pull request #286 from MODSetter/dev

feat: added jump to source referencing of citations
2025-09-02 02:29:08 +00:00 · 2025-08-23 19:40:46 -07:00 · 2025-08-23 19:40:46 -07:00 · bc89959d2f
commit bc89959d2f
parent 9b91bea51d 660d1cb444
9 changed files with 819 additions and 564 deletions
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -5,10 +5,24 @@ from fastapi import APIRouter, BackgroundTasks, Depends, Form, HTTPException, Up
 from litellm import atranscription
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 from sqlalchemy.orm import selectinload
 from app.config import config as app_config
-from app.db import Document, DocumentType, Log, SearchSpace, User, get_async_session
+from app.db import (
-from app.schemas import DocumentRead, DocumentsCreate, DocumentUpdate
+    Chunk,
    Document,
    DocumentType,
    Log,
    SearchSpace,
    User,
    get_async_session,
 )
 from app.schemas import (
    DocumentRead,
    DocumentsCreate,
    DocumentUpdate,
    DocumentWithChunksRead,
 )
 from app.services.task_logging_service import TaskLoggingService
 from app.tasks.document_processors import (
    add_crawled_url_document,
@ -140,6 +154,423 @@ async def create_documents_file_upload(
        ) from e
@router.get("/documents/", response_model=list[DocumentRead])
 async def read_documents(
    skip: int = 0,
    limit: int = 3000,
    search_space_id: int | None = None,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        query = (
            select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
        )
        # Filter by search_space_id if provided
        if search_space_id is not None:
            query = query.filter(Document.search_space_id == search_space_id)
        result = await session.execute(query.offset(skip).limit(limit))
        db_documents = result.scalars().all()
        # Convert database objects to API-friendly format
        api_documents = []
        for doc in db_documents:
            api_documents.append(
                DocumentRead(
                    id=doc.id,
                    title=doc.title,
                    document_type=doc.document_type,
                    document_metadata=doc.document_metadata,
                    content=doc.content,
                    created_at=doc.created_at,
                    search_space_id=doc.search_space_id,
                )
            )
        return api_documents
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Failed to fetch documents: {e!s}"
        ) from e
@router.get("/documents/{document_id}", response_model=DocumentRead)
 async def read_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        document = result.scalars().first()
        if not document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        # Convert database object to API-friendly format
        return DocumentRead(
            id=document.id,
            title=document.title,
            document_type=document.document_type,
            document_metadata=document.document_metadata,
            content=document.content,
            created_at=document.created_at,
            search_space_id=document.search_space_id,
        )
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Failed to fetch document: {e!s}"
        ) from e
@router.put("/documents/{document_id}", response_model=DocumentRead)
 async def update_document(
    document_id: int,
    document_update: DocumentUpdate,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        db_document = result.scalars().first()
        if not db_document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        update_data = document_update.model_dump(exclude_unset=True)
        for key, value in update_data.items():
            setattr(db_document, key, value)
        await session.commit()
        await session.refresh(db_document)
        # Convert to DocumentRead for response
        return DocumentRead(
            id=db_document.id,
            title=db_document.title,
            document_type=db_document.document_type,
            document_metadata=db_document.document_metadata,
            content=db_document.content,
            created_at=db_document.created_at,
            search_space_id=db_document.search_space_id,
        )
    except HTTPException:
        raise
    except Exception as e:
        await session.rollback()
        raise HTTPException(
            status_code=500, detail=f"Failed to update document: {e!s}"
        ) from e
@router.delete("/documents/{document_id}", response_model=dict)
 async def delete_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        document = result.scalars().first()
        if not document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        await session.delete(document)
        await session.commit()
        return {"message": "Document deleted successfully"}
    except HTTPException:
        raise
    except Exception as e:
        await session.rollback()
        raise HTTPException(
            status_code=500, detail=f"Failed to delete document: {e!s}"
        ) from e
@router.get("/documents/by-chunk/{chunk_id}", response_model=DocumentWithChunksRead)
 async def get_document_by_chunk_id(
    chunk_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    """
    Retrieves a document based on a chunk ID, including all its chunks ordered by creation time.
    The document's embedding and chunk embeddings are excluded from the response.
    """
    try:
        # First, get the chunk and verify it exists
        chunk_result = await session.execute(select(Chunk).filter(Chunk.id == chunk_id))
        chunk = chunk_result.scalars().first()
        if not chunk:
            raise HTTPException(
                status_code=404, detail=f"Chunk with id {chunk_id} not found"
            )
        # Get the associated document and verify ownership
        document_result = await session.execute(
            select(Document)
            .options(selectinload(Document.chunks))
            .join(SearchSpace)
            .filter(Document.id == chunk.document_id, SearchSpace.user_id == user.id)
        )
        document = document_result.scalars().first()
        if not document:
            raise HTTPException(
                status_code=404,
                detail="Document not found or you don't have access to it",
            )
        # Sort chunks by creation time
        sorted_chunks = sorted(document.chunks, key=lambda x: x.created_at)
        # Return the document with its chunks
        return DocumentWithChunksRead(
            id=document.id,
            title=document.title,
            document_type=document.document_type,
            document_metadata=document.document_metadata,
            content=document.content,
            created_at=document.created_at,
            search_space_id=document.search_space_id,
            chunks=sorted_chunks,
        )
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Failed to retrieve document: {e!s}"
        ) from e
 async def process_extension_document_with_new_session(
    individual_document, search_space_id: int, user_id: str
 ):
    """Create a new session and process extension document."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_extension_document",
            source="document_processor",
            message=f"Starting processing of extension document from {individual_document.metadata.VisitedWebPageTitle}",
            metadata={
                "document_type": "EXTENSION",
                "url": individual_document.metadata.VisitedWebPageURL,
                "title": individual_document.metadata.VisitedWebPageTitle,
                "user_id": user_id,
            },
        )
        try:
            result = await add_extension_received_document(
                session, individual_document, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed extension document: {individual_document.metadata.VisitedWebPageTitle}",
                    {"document_id": result.id, "content_hash": result.content_hash},
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"Extension document already exists (duplicate): {individual_document.metadata.VisitedWebPageTitle}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process extension document: {individual_document.metadata.VisitedWebPageTitle}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing extension document: {e!s}")
 async def process_crawled_url_with_new_session(
    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process crawled URL."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_crawled_url",
            source="document_processor",
            message=f"Starting URL crawling and processing for: {url}",
            metadata={"document_type": "CRAWLED_URL", "url": url, "user_id": user_id},
        )
        try:
            result = await add_crawled_url_document(
                session, url, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully crawled and processed URL: {url}",
                    {
                        "document_id": result.id,
                        "title": result.title,
                        "content_hash": result.content_hash,
                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"URL document already exists (duplicate): {url}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to crawl URL: {url}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing crawled URL: {e!s}")
 async def process_file_in_background_with_new_session(
    file_path: str, filename: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process file."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_file_upload",
            source="document_processor",
            message=f"Starting file processing for: {filename}",
            metadata={
                "document_type": "FILE",
                "filename": filename,
                "file_path": file_path,
                "user_id": user_id,
            },
        )
        try:
            await process_file_in_background(
                file_path,
                filename,
                search_space_id,
                user_id,
                session,
                task_logger,
                log_entry,
            )
            # Note: success/failure logging is handled within process_file_in_background
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process file: {filename}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing file: {e!s}")
 async def process_youtube_video_with_new_session(
    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process YouTube video."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_youtube_video",
            source="document_processor",
            message=f"Starting YouTube video processing for: {url}",
            metadata={"document_type": "YOUTUBE_VIDEO", "url": url, "user_id": user_id},
        )
        try:
            result = await add_youtube_video_document(
                session, url, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed YouTube video: {result.title}",
                    {
                        "document_id": result.id,
                        "video_id": result.document_metadata.get("video_id"),
                        "content_hash": result.content_hash,
                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"YouTube video document already exists (duplicate): {url}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process YouTube video: {url}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing YouTube video: {e!s}")
 async def process_file_in_background(
    file_path: str,
    filename: str,
@ -508,363 +939,3 @@ async def process_file_in_background(
        logging.error(f"Error processing file in background: {e!s}")
        raise  # Re-raise so the wrapper can also handle it
@router.get("/documents/", response_model=list[DocumentRead])
 async def read_documents(
    skip: int = 0,
    limit: int = 3000,
    search_space_id: int | None = None,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        query = (
            select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
        )
        # Filter by search_space_id if provided
        if search_space_id is not None:
            query = query.filter(Document.search_space_id == search_space_id)
        result = await session.execute(query.offset(skip).limit(limit))
        db_documents = result.scalars().all()
        # Convert database objects to API-friendly format
        api_documents = []
        for doc in db_documents:
            api_documents.append(
                DocumentRead(
                    id=doc.id,
                    title=doc.title,
                    document_type=doc.document_type,
                    document_metadata=doc.document_metadata,
                    content=doc.content,
                    created_at=doc.created_at,
                    search_space_id=doc.search_space_id,
                )
            )
        return api_documents
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Failed to fetch documents: {e!s}"
        ) from e
@router.get("/documents/{document_id}", response_model=DocumentRead)
 async def read_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        document = result.scalars().first()
        if not document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        # Convert database object to API-friendly format
        return DocumentRead(
            id=document.id,
            title=document.title,
            document_type=document.document_type,
            document_metadata=document.document_metadata,
            content=document.content,
            created_at=document.created_at,
            search_space_id=document.search_space_id,
        )
    except Exception as e:
        raise HTTPException(
            status_code=500, detail=f"Failed to fetch document: {e!s}"
        ) from e
@router.put("/documents/{document_id}", response_model=DocumentRead)
 async def update_document(
    document_id: int,
    document_update: DocumentUpdate,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        db_document = result.scalars().first()
        if not db_document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        update_data = document_update.model_dump(exclude_unset=True)
        for key, value in update_data.items():
            setattr(db_document, key, value)
        await session.commit()
        await session.refresh(db_document)
        # Convert to DocumentRead for response
        return DocumentRead(
            id=db_document.id,
            title=db_document.title,
            document_type=db_document.document_type,
            document_metadata=db_document.document_metadata,
            content=db_document.content,
            created_at=db_document.created_at,
            search_space_id=db_document.search_space_id,
        )
    except HTTPException:
        raise
    except Exception as e:
        await session.rollback()
        raise HTTPException(
            status_code=500, detail=f"Failed to update document: {e!s}"
        ) from e
@router.delete("/documents/{document_id}", response_model=dict)
 async def delete_document(
    document_id: int,
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    try:
        # Query the document directly instead of using read_document function
        result = await session.execute(
            select(Document)
            .join(SearchSpace)
            .filter(Document.id == document_id, SearchSpace.user_id == user.id)
        )
        document = result.scalars().first()
        if not document:
            raise HTTPException(
                status_code=404, detail=f"Document with id {document_id} not found"
            )
        await session.delete(document)
        await session.commit()
        return {"message": "Document deleted successfully"}
    except HTTPException:
        raise
    except Exception as e:
        await session.rollback()
        raise HTTPException(
            status_code=500, detail=f"Failed to delete document: {e!s}"
        ) from e
 async def process_extension_document_with_new_session(
    individual_document, search_space_id: int, user_id: str
 ):
    """Create a new session and process extension document."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_extension_document",
            source="document_processor",
            message=f"Starting processing of extension document from {individual_document.metadata.VisitedWebPageTitle}",
            metadata={
                "document_type": "EXTENSION",
                "url": individual_document.metadata.VisitedWebPageURL,
                "title": individual_document.metadata.VisitedWebPageTitle,
                "user_id": user_id,
            },
        )
        try:
            result = await add_extension_received_document(
                session, individual_document, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed extension document: {individual_document.metadata.VisitedWebPageTitle}",
                    {"document_id": result.id, "content_hash": result.content_hash},
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"Extension document already exists (duplicate): {individual_document.metadata.VisitedWebPageTitle}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process extension document: {individual_document.metadata.VisitedWebPageTitle}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing extension document: {e!s}")
 async def process_crawled_url_with_new_session(
    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process crawled URL."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_crawled_url",
            source="document_processor",
            message=f"Starting URL crawling and processing for: {url}",
            metadata={"document_type": "CRAWLED_URL", "url": url, "user_id": user_id},
        )
        try:
            result = await add_crawled_url_document(
                session, url, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully crawled and processed URL: {url}",
                    {
                        "document_id": result.id,
                        "title": result.title,
                        "content_hash": result.content_hash,
                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"URL document already exists (duplicate): {url}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to crawl URL: {url}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing crawled URL: {e!s}")
 async def process_file_in_background_with_new_session(
    file_path: str, filename: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process file."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_file_upload",
            source="document_processor",
            message=f"Starting file processing for: {filename}",
            metadata={
                "document_type": "FILE",
                "filename": filename,
                "file_path": file_path,
                "user_id": user_id,
            },
        )
        try:
            await process_file_in_background(
                file_path,
                filename,
                search_space_id,
                user_id,
                session,
                task_logger,
                log_entry,
            )
            # Note: success/failure logging is handled within process_file_in_background
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process file: {filename}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing file: {e!s}")
 async def process_youtube_video_with_new_session(
    url: str, search_space_id: int, user_id: str
 ):
    """Create a new session and process YouTube video."""
    from app.db import async_session_maker
    from app.services.task_logging_service import TaskLoggingService
    async with async_session_maker() as session:
        # Initialize task logging service
        task_logger = TaskLoggingService(session, search_space_id)
        # Log task start
        log_entry = await task_logger.log_task_start(
            task_name="process_youtube_video",
            source="document_processor",
            message=f"Starting YouTube video processing for: {url}",
            metadata={"document_type": "YOUTUBE_VIDEO", "url": url, "user_id": user_id},
        )
        try:
            result = await add_youtube_video_document(
                session, url, search_space_id, user_id
            )
            if result:
                await task_logger.log_task_success(
                    log_entry,
                    f"Successfully processed YouTube video: {result.title}",
                    {
                        "document_id": result.id,
                        "video_id": result.document_metadata.get("video_id"),
                        "content_hash": result.content_hash,
                    },
                )
            else:
                await task_logger.log_task_success(
                    log_entry,
                    f"YouTube video document already exists (duplicate): {url}",
                    {"duplicate_detected": True},
                )
        except Exception as e:
            await task_logger.log_task_failure(
                log_entry,
                f"Failed to process YouTube video: {url}",
                str(e),
                {"error_type": type(e).__name__},
            )
            import logging
            logging.error(f"Error processing YouTube video: {e!s}")
--- a/surfsense_backend/app/schemas/init.py
+++ b/surfsense_backend/app/schemas/init.py
@ -13,6 +13,7 @@ from .documents import (
    DocumentRead,
    DocumentsCreate,
    DocumentUpdate,
    DocumentWithChunksRead,
    ExtensionDocumentContent,
    ExtensionDocumentMetadata,
 )
@ -53,6 +54,7 @@ __all__ = [
    "DocumentBase",
    "DocumentRead",
    "DocumentUpdate",
    "DocumentWithChunksRead",
    "DocumentsCreate",
    "ExtensionDocumentContent",
    "ExtensionDocumentMetadata",
--- a/surfsense_backend/app/schemas/documents.py
+++ b/surfsense_backend/app/schemas/documents.py
@ -4,6 +4,8 @@ from pydantic import BaseModel, ConfigDict
 from app.db import DocumentType
 from .chunks import ChunkRead
 class ExtensionDocumentMetadata(BaseModel):
    BrowsingSessionId: str
@ -45,3 +47,9 @@ class DocumentRead(BaseModel):
    search_space_id: int
    model_config = ConfigDict(from_attributes=True)
 class DocumentWithChunksRead(DocumentRead):
    chunks: list[ChunkRead] = []
    model_config = ConfigDict(from_attributes=True)
--- a/surfsense_web/components/chat/ChatCitation.tsx
+++ b/surfsense_web/components/chat/ChatCitation.tsx
@ -1,58 +1,202 @@
 "use client";
-import { ExternalLink } from "lucide-react";
+import { ExternalLink, FileText, Loader2 } from "lucide-react";
 import type React from "react";
 import { useEffect, useRef, useState } from "react";
 import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Button } from "@/components/ui/button";
-import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { ScrollArea } from "@/components/ui/scroll-area";
 import {
 	Sheet,
 	SheetContent,
 	SheetDescription,
 	SheetHeader,
 	SheetTitle,
 	SheetTrigger,
 } from "@/components/ui/sheet";
 import { useDocumentByChunk } from "@/hooks/use-document-by-chunk";
 import { cn } from "@/lib/utils";
 export const CitationDisplay: React.FC<{ index: number; node: any }> = ({ index, node }) => {
-	const truncateText = (text: string, maxLength: number = 200) => {
+	const chunkId = Number(node?.id);
-		if (text.length <= maxLength) return text;
+	const sourceType = node?.metadata?.source_type;
-		return `${text.substring(0, maxLength)}...`;
+	const [isOpen, setIsOpen] = useState(false);
 	const { document, loading, error, fetchDocumentByChunk, clearDocument } = useDocumentByChunk();
 	const chunksContainerRef = useRef<HTMLDivElement>(null);
 	const highlightedChunkRef = useRef<HTMLDivElement>(null);
 	// Check if this is a source type that should render directly from node
 	const isDirectRenderSource = sourceType === "TAVILY_API" || sourceType === "LINKUP_API";
 	const handleOpenChange = async (open: boolean) => {
 		setIsOpen(open);
 		if (open && chunkId && !isDirectRenderSource) {
 			await fetchDocumentByChunk(chunkId);
 		} else if (!open && !isDirectRenderSource) {
 			clearDocument();
 		}
 	};
 	useEffect(() => {
 		// Scroll to highlighted chunk when document loads
 		if (document && highlightedChunkRef.current && chunksContainerRef.current) {
 			setTimeout(() => {
 				highlightedChunkRef.current?.scrollIntoView({
 					behavior: "smooth",
 					block: "start",
 				});
 			}, 100);
 		}
 	}, [document]);
 	const handleUrlClick = (e: React.MouseEvent, url: string) => {
 		e.preventDefault();
 		e.stopPropagation();
 		window.open(url, "_blank", "noopener,noreferrer");
 	};
 	const formatDocumentType = (type: string) => {
 		return type
 			.split("_")
 			.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
 			.join(" ");
 	};
 	return (
-		<Popover>
+		<Sheet open={isOpen} onOpenChange={handleOpenChange}>
-			<PopoverTrigger asChild>
+			<SheetTrigger asChild>
 				<span className="text-[10px] font-bold bg-slate-500 hover:bg-slate-600 text-white rounded-full w-4 h-4 inline-flex items-center justify-center align-super cursor-pointer transition-colors">
 					{index + 1}
 				</span>
-			</PopoverTrigger>
+			</SheetTrigger>
-			<PopoverContent className="w-80 p-4 space-y-3 relative" align="start">
+			<SheetContent side="right" className="w-full sm:max-w-5xl lg:max-w-7xl">
-				{/* External Link Button - Top Right */}
+				<SheetHeader className="px-6 py-4 border-b">
-				{node?.url && (
+					<SheetTitle className="flex items-center gap-3 text-lg">
-					<Button
+						<FileText className="h-6 w-6" />
-						size="icon"
+						{document?.title || node?.metadata?.title || node?.metadata?.group_name || "Source"}
-						variant="ghost"
+					</SheetTitle>
-						onClick={(e) => handleUrlClick(e, node.url)}
+					<SheetDescription className="text-base mt-2">
-						className="absolute top-3 right-3 inline-flex items-center justify-center w-6 h-6 text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-200 hover:bg-blue-50 dark:hover:bg-blue-900/20 rounded transition-colors"
+						{document
-						title="Open in new tab"
+							? formatDocumentType(document.document_type)
-					>
+							: sourceType && formatDocumentType(sourceType)}
-						<ExternalLink size={14} />
+					</SheetDescription>
-					</Button>
+				</SheetHeader>
 				{!isDirectRenderSource && loading && (
 					<div className="flex items-center justify-center h-64 px-6">
 						<Loader2 className="h-8 w-8 animate-spin text-muted-foreground" />
 					</div>
 				)}
-				{/* Heading */}
+				{!isDirectRenderSource && error && (
-				<div className="text-sm font-semibold text-slate-900 dark:text-slate-100 pr-8">
+					<div className="flex items-center justify-center h-64 px-6">
-					{node?.metadata?.group_name || "Source"}
+						<p className="text-sm text-destructive">{error}</p>
 					</div>
 				)}
-				{/* Source */}
+				{/* Direct render for TAVILY_API and LINEAR_API */}
-				<div className="text-xs text-slate-600 dark:text-slate-400 font-medium">
+				{isDirectRenderSource && (
 					<ScrollArea className="h-[calc(100vh-10rem)]">
 						<div className="px-6 py-4">
 							{/* External Link */}
 							{node?.url && (
 								<div className="mb-8">
 									<Button
 										size="default"
 										variant="outline"
 										onClick={(e) => handleUrlClick(e, node.url)}
 										className="w-full py-3"
 									>
 										<ExternalLink className="mr-2 h-4 w-4" />
 										Open in Browser
 									</Button>
 								</div>
 							)}
 							{/* Source Information */}
 							<div className="mb-8 p-6 bg-muted/50 rounded-lg border">
 								<h3 className="text-base font-semibold mb-4">Source Information</h3>
 								<div className="text-sm text-muted-foreground mb-3 font-medium">
 									{node?.metadata?.title || "Untitled"}
 								</div>
-
+								<div className="text-sm text-foreground leading-relaxed whitespace-pre-wrap">
-				{/* Body */}
+									{node?.text || "No content available"}
 				<div className="text-xs text-slate-700 dark:text-slate-300 leading-relaxed">
 					{truncateText(node?.text || "No content available")}
 								</div>
-			</PopoverContent>
+							</div>
-		</Popover>
+						</div>
 					</ScrollArea>
 				)}
 				{/* API-fetched document content */}
 				{!isDirectRenderSource && document && (
 					<ScrollArea className="h-[calc(100vh-10rem)]">
 						<div className="px-6 py-4">
 							{/* Document Metadata */}
 							{document.document_metadata && Object.keys(document.document_metadata).length > 0 && (
 								<div className="mb-8 p-6 bg-muted/50 rounded-lg border">
 									<h3 className="text-base font-semibold mb-4">Document Information</h3>
 									<dl className="grid grid-cols-1 gap-3 text-sm">
 										{Object.entries(document.document_metadata).map(([key, value]) => (
 											<div key={key} className="flex gap-3">
 												<dt className="font-medium text-muted-foreground capitalize min-w-0 flex-shrink-0">
 													{key.replace(/_/g, " ")}:
 												</dt>
 												<dd className="text-foreground break-words">{String(value)}</dd>
 											</div>
 										))}
 									</dl>
 								</div>
 							)}
 							{/* External Link */}
 							{node?.url && (
 								<div className="mb-8">
 									<Button
 										size="default"
 										variant="outline"
 										onClick={(e) => handleUrlClick(e, node.url)}
 										className="w-full py-3"
 									>
 										<ExternalLink className="mr-2 h-4 w-4" />
 										Open in Browser
 									</Button>
 								</div>
 							)}
 							{/* Chunks */}
 							<div className="space-y-6" ref={chunksContainerRef}>
 								<h3 className="text-base font-semibold mb-4">Document Content</h3>
 								{document.chunks.map((chunk, idx) => (
 									<div
 										key={chunk.id}
 										ref={chunk.id === chunkId ? highlightedChunkRef : null}
 										className={cn(
 											"p-6 rounded-lg border transition-all duration-300",
 											chunk.id === chunkId
 												? "bg-primary/10 border-primary shadow-md ring-1 ring-primary/20"
 												: "bg-background border-border hover:bg-muted/50 hover:border-muted-foreground/20"
 										)}
 									>
 										<div className="mb-4 flex items-center justify-between">
 											<span className="text-sm font-medium text-muted-foreground">
 												Chunk {idx + 1} of {document.chunks.length}
 											</span>
 											{chunk.id === chunkId && (
 												<span className="text-sm font-medium text-primary bg-primary/10 px-3 py-1 rounded-full">
 													Referenced Chunk
 												</span>
 											)}
 										</div>
 										<div className="text-sm text-foreground whitespace-pre-wrap leading-relaxed">
 											<MarkdownViewer content={chunk.content} className="max-w-fit" />
 										</div>
 									</div>
 								))}
 							</div>
 						</div>
 					</ScrollArea>
 				)}
 			</SheetContent>
 		</Sheet>
 	);
 };
--- a/surfsense_web/components/markdown-viewer.tsx
+++ b/surfsense_web/components/markdown-viewer.tsx
@ -1,7 +1,7 @@
 import { Check, Copy } from "lucide-react";
 import Image from "next/image";
 import { useTheme } from "next-themes";
-import React, { useEffect, useMemo, useRef, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
 import ReactMarkdown from "react-markdown";
 import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
 import { oneDark, oneLight } from "react-syntax-highlighter/dist/cjs/styles/prism";
@ -10,105 +10,51 @@ import rehypeSanitize from "rehype-sanitize";
 import remarkGfm from "remark-gfm";
 import { Button } from "@/components/ui/button";
 import { cn } from "@/lib/utils";
 import { Citation } from "./chat/Citation";
 import type { Source } from "./chat/types";
 import CopyButton from "./copy-button";
 interface MarkdownViewerProps {
 	content: string;
 	className?: string;
 	getCitationSource?: (id: number) => Source | null;
 	type?: "user" | "ai";
 }
-export function MarkdownViewer({
+export function MarkdownViewer({ content, className }: MarkdownViewerProps) {
 	content,
 	className,
 	getCitationSource,
 	type = "user",
 }: MarkdownViewerProps) {
 	const ref = useRef<HTMLDivElement>(null);
 	// Memoize the markdown components to prevent unnecessary re-renders
 	const components = useMemo(() => {
 		return {
 			// Define custom components for markdown elements
-			p: ({ node, children, ...props }: any) => {
+			p: ({ node, children, ...props }: any) => (
 				// If there's no getCitationSource function, just render normally
 				if (!getCitationSource) {
 					return (
 				<p className="my-2" {...props}>
 					{children}
 				</p>
-					);
+			),
-				}
+			a: ({ node, children, ...props }: any) => (
 				// Process citations within paragraph content
 				return (
 					<p className="my-2" {...props}>
 						{processCitationsInReactChildren(children, getCitationSource)}
 					</p>
 				);
 			},
 			a: ({ node, children, ...props }: any) => {
 				// Process citations within link content if needed
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return (
 				<a className="text-primary hover:underline" {...props}>
-						{processedChildren}
+					{children}
 				</a>
-				);
+			),
-			},
+			li: ({ node, children, ...props }: any) => <li {...props}>{children}</li>,
 			li: ({ node, children, ...props }: any) => {
 				// Process citations within list item content
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return <li {...props}>{processedChildren}</li>;
 			},
 			ul: ({ node, ...props }: any) => <ul className="list-disc pl-5 my-2" {...props} />,
 			ol: ({ node, ...props }: any) => <ol className="list-decimal pl-5 my-2" {...props} />,
-			h1: ({ node, children, ...props }: any) => {
+			h1: ({ node, children, ...props }: any) => (
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return (
 				<h1 className="text-2xl font-bold mt-6 mb-2" {...props}>
-						{processedChildren}
+					{children}
 				</h1>
-				);
+			),
-			},
+			h2: ({ node, children, ...props }: any) => (
 			h2: ({ node, children, ...props }: any) => {
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return (
 				<h2 className="text-xl font-bold mt-5 mb-2" {...props}>
-						{processedChildren}
+					{children}
 				</h2>
-				);
+			),
-			},
+			h3: ({ node, children, ...props }: any) => (
 			h3: ({ node, children, ...props }: any) => {
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return (
 				<h3 className="text-lg font-bold mt-4 mb-2" {...props}>
-						{processedChildren}
+					{children}
 				</h3>
-				);
+			),
-			},
+			h4: ({ node, children, ...props }: any) => (
 			h4: ({ node, children, ...props }: any) => {
 				const processedChildren = getCitationSource
 					? processCitationsInReactChildren(children, getCitationSource)
 					: children;
 				return (
 				<h4 className="text-base font-bold mt-3 mb-1" {...props}>
-						{processedChildren}
+					{children}
 				</h4>
-				);
+			),
 			},
 			blockquote: ({ node, ...props }: any) => (
 				<blockquote className="border-l-4 border-muted pl-4 italic my-2" {...props} />
 			),
@ -154,7 +100,7 @@ export function MarkdownViewer({
 				);
 			},
 		};
-	}, [getCitationSource]);
+	}, []);
 	return (
 		<div className={cn("prose prose-sm dark:prose-invert max-w-none", className)} ref={ref}>
@ -165,7 +111,6 @@ export function MarkdownViewer({
 			>
 				{content}
 			</ReactMarkdown>
 			{type === "ai" && <CopyButton ref={ref} />}
 		</div>
 	);
 }
@ -267,77 +212,3 @@ const CodeBlock = ({ children, language }: { children: string; language: string
 		</div>
 	);
 };
 // Helper function to process citations within React children
 const processCitationsInReactChildren = (
 	children: React.ReactNode,
 	getCitationSource: (id: number) => Source | null
 ): React.ReactNode => {
 	// If children is not an array or string, just return it
 	if (!children || (typeof children !== "string" && !Array.isArray(children))) {
 		return children;
 	}
 	// Handle string content directly - this is where we process citation references
 	if (typeof children === "string") {
 		return processCitationsInText(children, getCitationSource);
 	}
 	// Handle arrays of children recursively
 	if (Array.isArray(children)) {
 		return React.Children.map(children, (child) => {
 			if (typeof child === "string") {
 				return processCitationsInText(child, getCitationSource);
 			}
 			return child;
 		});
 	}
 	return children;
 };
 // Process citation references in text content
 const processCitationsInText = (
 	text: string,
 	getCitationSource: (id: number) => Source | null
 ): React.ReactNode[] => {
 	// Use improved regex to catch citation numbers more reliably
 	// This will match patterns like [1], [42], etc. including when they appear at the end of a line or sentence
 	const citationRegex = /\[(\d+)\]/g;
 	const parts: React.ReactNode[] = [];
 	let lastIndex = 0;
 	let match: RegExpExecArray | null = citationRegex.exec(text);
 	let position = 0;
 	while (match !== null) {
 		// Add text before the citation
 		if (match.index > lastIndex) {
 			parts.push(text.substring(lastIndex, match.index));
 		}
 		// Add the citation component
 		const citationId = parseInt(match[1], 10);
 		const source = getCitationSource(citationId);
 		parts.push(
 			<Citation
 				key={`citation-${citationId}-${position}`}
 				citationId={citationId}
 				citationText={match[0]}
 				position={position}
 				source={source}
 			/>
 		);
 		lastIndex = match.index + match[0].length;
 		position++;
 		match = citationRegex.exec(text);
 	}
 	// Add any remaining text after the last citation
 	if (lastIndex < text.length) {
 		parts.push(text.substring(lastIndex));
 	}
 	return parts;
 };
--- a/surfsense_web/components/ui/scroll-area.tsx
+++ b/surfsense_web/components/ui/scroll-area.tsx
@ -0,0 +1,56 @@
 "use client";
 import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
 import type * as React from "react";
 import { cn } from "@/lib/utils";
 function ScrollArea({
 	className,
 	children,
 	...props
 }: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
 	return (
 		<ScrollAreaPrimitive.Root
 			data-slot="scroll-area"
 			className={cn("relative", className)}
 			{...props}
 		>
 			<ScrollAreaPrimitive.Viewport
 				data-slot="scroll-area-viewport"
 				className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
 			>
 				{children}
 			</ScrollAreaPrimitive.Viewport>
 			<ScrollBar />
 			<ScrollAreaPrimitive.Corner />
 		</ScrollAreaPrimitive.Root>
 	);
 }
 function ScrollBar({
 	className,
 	orientation = "vertical",
 	...props
 }: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
 	return (
 		<ScrollAreaPrimitive.ScrollAreaScrollbar
 			data-slot="scroll-area-scrollbar"
 			orientation={orientation}
 			className={cn(
 				"flex touch-none p-px transition-colors select-none",
 				orientation === "vertical" && "h-full w-2.5 border-l border-l-transparent",
 				orientation === "horizontal" && "h-2.5 flex-col border-t border-t-transparent",
 				className
 			)}
 			{...props}
 		>
 			<ScrollAreaPrimitive.ScrollAreaThumb
 				data-slot="scroll-area-thumb"
 				className="bg-border relative flex-1 rounded-full"
 			/>
 		</ScrollAreaPrimitive.ScrollAreaScrollbar>
 	);
 }
 export { ScrollArea, ScrollBar };
--- a/surfsense_web/hooks/index.ts
+++ b/surfsense_web/hooks/index.ts
@ -1,2 +1,3 @@
 export * from "./use-document-by-chunk";
 export * from "./use-logs";
 export * from "./useSearchSourceConnectors";
--- a/surfsense_web/hooks/use-document-by-chunk.ts
+++ b/surfsense_web/hooks/use-document-by-chunk.ts
@ -0,0 +1,106 @@
 "use client";
 import { useCallback, useState } from "react";
 import { toast } from "sonner";
 export interface Chunk {
 	id: number;
 	content: string;
 	document_id: number;
 	created_at: string;
 }
 export interface DocumentWithChunks {
 	id: number;
 	title: string;
 	document_type: DocumentType;
 	document_metadata: any;
 	content: string;
 	created_at: string;
 	search_space_id: number;
 	chunks: Chunk[];
 }
 export type DocumentType =
 	| "EXTENSION"
 	| "CRAWLED_URL"
 	| "SLACK_CONNECTOR"
 	| "NOTION_CONNECTOR"
 	| "FILE"
 	| "YOUTUBE_VIDEO"
 	| "GITHUB_CONNECTOR"
 	| "LINEAR_CONNECTOR"
 	| "DISCORD_CONNECTOR"
 	| "JIRA_CONNECTOR"
 	| "CONFLUENCE_CONNECTOR"
 	| "CLICKUP_CONNECTOR"
 	| "GOOGLE_CALENDAR_CONNECTOR"
 	| "GOOGLE_GMAIL_CONNECTOR";
 export function useDocumentByChunk() {
 	const [document, setDocument] = useState<DocumentWithChunks | null>(null);
 	const [loading, setLoading] = useState(false);
 	const [error, setError] = useState<string | null>(null);
 	const fetchDocumentByChunk = useCallback(async (chunkId: number) => {
 		try {
 			setLoading(true);
 			setError(null);
 			setDocument(null);
 			const response = await fetch(
 				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/by-chunk/${chunkId}`,
 				{
 					headers: {
 						Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
 						"Content-Type": "application/json",
 					},
 					method: "GET",
 				}
 			);
 			if (!response.ok) {
 				const errorText = await response.text();
 				let errorMessage = "Failed to fetch document";
 				try {
 					const errorData = JSON.parse(errorText);
 					errorMessage = errorData.detail || errorMessage;
 				} catch {
 					// If parsing fails, use default message
 				}
 				if (response.status === 404) {
 					errorMessage = "Chunk not found or you don't have access to it";
 				}
 				toast.error(errorMessage);
 				throw new Error(errorMessage);
 			}
 			const data: DocumentWithChunks = await response.json();
 			setDocument(data);
 			setError(null);
 			return data;
 		} catch (err: any) {
 			const errorMessage = err.message || "Failed to fetch document";
 			setError(errorMessage);
 			console.error("Error fetching document by chunk:", err);
 			throw err;
 		} finally {
 			setLoading(false);
 		}
 	}, []);
 	const clearDocument = useCallback(() => {
 		setDocument(null);
 		setError(null);
 	}, []);
 	return {
 		document,
 		loading,
 		error,
 		fetchDocumentByChunk,
 		clearDocument,
 	};
 }
--- a/surfsense_web/next.config.ts
+++ b/surfsense_web/next.config.ts
@ -12,11 +12,7 @@ const nextConfig: NextConfig = {
 		remotePatterns: [
 			{
 				protocol: "https",
-				hostname: "images.unsplash.com",
+				hostname: "**",
 			},
 			{
 				protocol: "https",
 				hostname: "static.vecteezy.com",
 			},
 		],
 	},