feat: Document Selector in Chat.

- Still need improvements but lets use it first.
2025-09-01 18:19:08 +00:00 · 2025-06-04 21:46:50 -07:00 · 2025-06-04 21:46:50 -07:00 · d7bb31f894
commit d7bb31f894
parent e8a19c496b
12 changed files with 599 additions and 67 deletions
--- a/surfsense_backend/app/agents/researcher/configuration.py
+++ b/surfsense_backend/app/agents/researcher/configuration.py
@ -33,6 +33,7 @@ class Configuration:
    search_space_id: int
    search_mode: SearchMode
    research_mode: ResearchMode
+    document_ids_to_add_in_context: List[int]

    @classmethod
    def from_runnable_config(
--- a/surfsense_backend/app/agents/researcher/nodes.py
+++ b/surfsense_backend/app/agents/researcher/nodes.py
@ -21,6 +21,237 @@ from app.utils.query_service import QueryService

 from langgraph.types import StreamWriter

+# Additional imports for document fetching
+from sqlalchemy.future import select
+from app.db import Document, SearchSpace
+
+
+async def fetch_documents_by_ids(
+    document_ids: List[int],
+    user_id: str,
+    db_session: AsyncSession
+) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """
+    Fetch documents by their IDs with ownership check using DOCUMENTS mode approach.
+    
+    This function ensures that only documents belonging to the user are fetched,
+    providing security by checking ownership through SearchSpace association.
+    Similar to SearchMode.DOCUMENTS, it fetches full documents and concatenates their chunks.
+    Also creates source objects for UI display, grouped by document type.
+    
+    Args:
+        document_ids: List of document IDs to fetch
+        user_id: The user ID to check ownership
+        db_session: The database session
+        
+    Returns:
+        Tuple of (source_objects, document_chunks) - similar to ConnectorService pattern
+    """
+    if not document_ids:
+        return [], []
+    
+    try:
+        # Query documents with ownership check
+        result = await db_session.execute(
+            select(Document)
+            .join(SearchSpace)
+            .filter(
+                Document.id.in_(document_ids),
+                SearchSpace.user_id == user_id
+            )
+        )
+        documents = result.scalars().all()
+        
+        # Group documents by type for source object creation
+        documents_by_type = {}
+        formatted_documents = []
+        
+        for doc in documents:
+            # Fetch associated chunks for this document (similar to DocumentHybridSearchRetriever)
+            from app.db import Chunk
+            chunks_query = select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id)
+            chunks_result = await db_session.execute(chunks_query)
+            chunks = chunks_result.scalars().all()
+            
+            # Concatenate chunks content (similar to SearchMode.DOCUMENTS approach)
+            concatenated_chunks_content = " ".join([chunk.content for chunk in chunks]) if chunks else doc.content
+            
+            # Format to match connector service return format
+            formatted_doc = {
+                "chunk_id": f"user_doc_{doc.id}",
+                "content": concatenated_chunks_content,  # Use concatenated content like DOCUMENTS mode
+                "score": 0.5,  # High score since user explicitly selected these
+                "document": {
+                    "id": doc.id,
+                    "title": doc.title,
+                    "document_type": doc.document_type.value if doc.document_type else "UNKNOWN",
+                    "metadata": doc.document_metadata or {},
+                },
+                "source": doc.document_type.value if doc.document_type else "UNKNOWN"
+            }
+            formatted_documents.append(formatted_doc)
+            
+            # Group by document type for source objects
+            doc_type = doc.document_type.value if doc.document_type else "UNKNOWN"
+            if doc_type not in documents_by_type:
+                documents_by_type[doc_type] = []
+            documents_by_type[doc_type].append(doc)
+        
+        # Create source objects for each document type (similar to ConnectorService)
+        source_objects = []
+        connector_id_counter = 100  # Start from 100 to avoid conflicts with regular connectors
+        
+        for doc_type, docs in documents_by_type.items():
+            sources_list = []
+            
+            for doc in docs:
+                metadata = doc.document_metadata or {}
+                
+                # Create type-specific source formatting (similar to ConnectorService)
+                if doc_type == "LINEAR_CONNECTOR":
+                    # Extract Linear-specific metadata
+                    issue_identifier = metadata.get('issue_identifier', '')
+                    issue_title = metadata.get('issue_title', doc.title)
+                    issue_state = metadata.get('state', '')
+                    comment_count = metadata.get('comment_count', 0)
+                    
+                    # Create a more descriptive title for Linear issues
+                    title = f"Linear: {issue_identifier} - {issue_title}" if issue_identifier else f"Linear: {issue_title}"
+                    if issue_state:
+                        title += f" ({issue_state})"
+                        
+                    # Create description
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    if comment_count:
+                        description += f" | Comments: {comment_count}"
+                    
+                    # Create URL
+                    url = f"https://linear.app/issue/{issue_identifier}" if issue_identifier else ""
+                    
+                elif doc_type == "SLACK_CONNECTOR":
+                    # Extract Slack-specific metadata
+                    channel_name = metadata.get('channel_name', 'Unknown Channel')
+                    channel_id = metadata.get('channel_id', '')
+                    message_date = metadata.get('start_date', '')
+                    
+                    title = f"Slack: {channel_name}"
+                    if message_date:
+                        title += f" ({message_date})"
+                    
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    url = f"https://slack.com/app_redirect?channel={channel_id}" if channel_id else ""
+                    
+                elif doc_type == "NOTION_CONNECTOR":
+                    # Extract Notion-specific metadata
+                    page_title = metadata.get('page_title', doc.title)
+                    page_id = metadata.get('page_id', '')
+                    
+                    title = f"Notion: {page_title}"
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    url = f"https://notion.so/{page_id.replace('-', '')}" if page_id else ""
+                    
+                elif doc_type == "GITHUB_CONNECTOR":
+                    title = f"GitHub: {doc.title}"
+                    description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content)
+                    url = metadata.get('url', '')
+                    
+                elif doc_type == "YOUTUBE_VIDEO":
+                    # Extract YouTube-specific metadata
+                    video_title = metadata.get('video_title', doc.title)
+                    video_id = metadata.get('video_id', '')
+                    channel_name = metadata.get('channel_name', '')
+                    
+                    title = video_title
+                    if channel_name:
+                        title += f" - {channel_name}"
+                    
+                    description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content)
+                    url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
+                    
+                elif doc_type == "DISCORD_CONNECTOR":
+                    # Extract Discord-specific metadata
+                    channel_name = metadata.get('channel_name', 'Unknown Channel')
+                    channel_id = metadata.get('channel_id', '')
+                    guild_id = metadata.get('guild_id', '')
+                    message_date = metadata.get('start_date', '')
+                    
+                    title = f"Discord: {channel_name}"
+                    if message_date:
+                        title += f" ({message_date})"
+                    
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    
+                    if guild_id and channel_id:
+                        url = f"https://discord.com/channels/{guild_id}/{channel_id}"
+                    elif channel_id:
+                        url = f"https://discord.com/channels/@me/{channel_id}"
+                    else:
+                        url = ""
+                        
+                elif doc_type == "EXTENSION":
+                    # Extract Extension-specific metadata
+                    webpage_title = metadata.get('VisitedWebPageTitle', doc.title)
+                    webpage_url = metadata.get('VisitedWebPageURL', '')
+                    visit_date = metadata.get('VisitedWebPageDateWithTimeInISOString', '')
+                    
+                    title = webpage_title
+                    if visit_date:
+                        formatted_date = visit_date.split('T')[0] if 'T' in visit_date else visit_date
+                        title += f" (visited: {formatted_date})"
+                    
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    url = webpage_url
+                    
+                elif doc_type == "CRAWLED_URL":
+                    title = doc.title
+                    description = metadata.get('og:description', metadata.get('ogDescription', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content))
+                    url = metadata.get('url', '')
+                    
+                else:  # FILE and other types
+                    title = doc.title
+                    description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
+                    url = metadata.get('url', '')
+                
+                # Create source entry
+                source = {
+                    "id": doc.id,
+                    "title": title,
+                    "description": description,
+                    "url": url
+                }
+                sources_list.append(source)
+            
+            # Create source object for this document type
+            friendly_type_names = {
+                "LINEAR_CONNECTOR": "Linear Issues (Selected)",
+                "SLACK_CONNECTOR": "Slack (Selected)",
+                "NOTION_CONNECTOR": "Notion (Selected)",
+                "GITHUB_CONNECTOR": "GitHub (Selected)",
+                "YOUTUBE_VIDEO": "YouTube Videos (Selected)",
+                "DISCORD_CONNECTOR": "Discord (Selected)",
+                "EXTENSION": "Browser Extension (Selected)",
+                "CRAWLED_URL": "Web Pages (Selected)",
+                "FILE": "Files (Selected)"
+            }
+            
+            source_object = {
+                "id": connector_id_counter,
+                "name": friendly_type_names.get(doc_type, f"{doc_type} (Selected)"),
+                "type": f"USER_SELECTED_{doc_type}",
+                "sources": sources_list,
+            }
+            source_objects.append(source_object)
+            connector_id_counter += 1
+        
+        print(f"Fetched {len(formatted_documents)} user-selected documents (with concatenated chunks) from {len(document_ids)} requested IDs")
+        print(f"Created {len(source_objects)} source objects for UI display")
+        
+        return source_objects, formatted_documents
+        
+    except Exception as e:
+        print(f"Error fetching documents by IDs: {str(e)}")
+        return [], []
+

 class Section(BaseModel):
    """A section in the answer outline."""
@ -150,7 +381,8 @@ async def fetch_relevant_documents(
    state: State = None,
    top_k: int = 10,
    connector_service: ConnectorService = None,
-    search_mode: SearchMode = SearchMode.CHUNKS
+    search_mode: SearchMode = SearchMode.CHUNKS,
+    user_selected_sources: List[Dict[str, Any]] = None
 ) -> List[Dict[str, Any]]:
    """
    Fetch relevant documents for research questions using the provided connectors.
@ -436,6 +668,21 @@ async def fetch_relevant_documents(
    deduplicated_sources = []
    seen_source_keys = set()
    
+    # First add user-selected sources (if any)
+    if user_selected_sources:
+        for source_obj in user_selected_sources:
+            source_id = source_obj.get('id')
+            source_type = source_obj.get('type')
+            
+            if source_id and source_type:
+                source_key = f"{source_type}_{source_id}"
+                if source_key not in seen_source_keys:
+                    seen_source_keys.add(source_key)
+                    deduplicated_sources.append(source_obj)
+            else:
+                deduplicated_sources.append(source_obj)
+    
+    # Then add connector sources
    for source_obj in all_sources:
        # Use combination of source ID and type as a unique identifier
        # This ensures we don't accidentally deduplicate sources from different connectors
@ -453,7 +700,9 @@ async def fetch_relevant_documents(
    
    # Stream info about deduplicated sources
    if streaming_service and writer:
-        streaming_service.only_update_terminal(f"📚 Collected {len(deduplicated_sources)} unique sources across all connectors")
+        user_source_count = len(user_selected_sources) if user_selected_sources else 0
+        connector_source_count = len(deduplicated_sources) - user_source_count
+        streaming_service.only_update_terminal(f"📚 Collected {len(deduplicated_sources)} total sources ({user_source_count} user-selected + {connector_source_count} from connectors)")
        writer({"yeild_value": streaming_service._format_annotations()})
        
    # After all sources are collected and deduplicated, stream them
@ -576,8 +825,26 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
        TOP_K = 10
    
    relevant_documents = []
+    user_selected_documents = []
+    user_selected_sources = []
+    
    async with async_session_maker() as db_session:
        try:
+            # First, fetch user-selected documents if any
+            if configuration.document_ids_to_add_in_context:
+                streaming_service.only_update_terminal(f"📋 Including {len(configuration.document_ids_to_add_in_context)} user-selected documents...")
+                writer({"yeild_value": streaming_service._format_annotations()})
+                
+                user_selected_sources, user_selected_documents = await fetch_documents_by_ids(
+                    document_ids=configuration.document_ids_to_add_in_context,
+                    user_id=configuration.user_id,
+                    db_session=db_session
+                )
+                
+                if user_selected_documents:
+                    streaming_service.only_update_terminal(f"✅ Successfully added {len(user_selected_documents)} user-selected documents to context")
+                    writer({"yeild_value": streaming_service._format_annotations()})
+            
            # Create connector service inside the db_session scope
            connector_service = ConnectorService(db_session, user_id=configuration.user_id)
            await connector_service.initialize_counter()
@ -592,7 +859,8 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
                state=state,
                top_k=TOP_K,
                connector_service=connector_service,
-                search_mode=configuration.search_mode
+                search_mode=configuration.search_mode,
+                user_selected_sources=user_selected_sources
            )
        except Exception as e:
            error_message = f"Error fetching relevant documents: {str(e)}"
@ -603,8 +871,14 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
            # This allows the process to continue, but the report might lack information
            relevant_documents = []
    
+    # Combine user-selected documents with connector-fetched documents
+    all_documents = user_selected_documents + relevant_documents
+    
    print(f"Fetched {len(relevant_documents)} relevant documents for all sections")
-    streaming_service.only_update_terminal(f"✨ Starting to draft {len(answer_outline.answer_outline)} sections using {len(relevant_documents)} relevant document chunks")
+    print(f"Added {len(user_selected_documents)} user-selected documents for all sections")
+    print(f"Total documents for sections: {len(all_documents)}")
+    
+    streaming_service.only_update_terminal(f"✨ Starting to draft {len(answer_outline.answer_outline)} sections using {len(all_documents)} total document chunks ({len(user_selected_documents)} user-selected + {len(relevant_documents)} connector-found)")
    writer({"yeild_value": streaming_service._format_annotations()})
    
    # Create tasks to process each section in parallel with the same document set
@ -635,7 +909,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
                user_query=configuration.user_query,
                user_id=configuration.user_id,
                search_space_id=configuration.search_space_id,
-                relevant_documents=relevant_documents,
+                relevant_documents=all_documents,  # Use combined documents
                state=state,
                writer=writer,
                sub_section_type=sub_section_type,
@ -875,8 +1149,26 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
    TOP_K = 15
    
    relevant_documents = []
+    user_selected_documents = []
+    user_selected_sources = []
+    
    async with async_session_maker() as db_session:
        try:
+            # First, fetch user-selected documents if any
+            if configuration.document_ids_to_add_in_context:
+                streaming_service.only_update_terminal(f"📋 Including {len(configuration.document_ids_to_add_in_context)} user-selected documents...")
+                writer({"yeild_value": streaming_service._format_annotations()})
+                
+                user_selected_sources, user_selected_documents = await fetch_documents_by_ids(
+                    document_ids=configuration.document_ids_to_add_in_context,
+                    user_id=configuration.user_id,
+                    db_session=db_session
+                )
+                
+                if user_selected_documents:
+                    streaming_service.only_update_terminal(f"✅ Successfully added {len(user_selected_documents)} user-selected documents to context")
+                    writer({"yeild_value": streaming_service._format_annotations()})
+            
            # Create connector service inside the db_session scope
            connector_service = ConnectorService(db_session, user_id=configuration.user_id)
            await connector_service.initialize_counter()
@ -894,7 +1186,8 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
                state=state,
                top_k=TOP_K,
                connector_service=connector_service,
-                search_mode=configuration.search_mode
+                search_mode=configuration.search_mode,
+                user_selected_sources=user_selected_sources
            )
        except Exception as e:
            error_message = f"Error fetching relevant documents for QNA: {str(e)}"
@ -904,15 +1197,21 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
            # Continue with empty documents - the QNA agent will handle this gracefully
            relevant_documents = []
    
+    # Combine user-selected documents with connector-fetched documents
+    all_documents = user_selected_documents + relevant_documents
+    
    print(f"Fetched {len(relevant_documents)} relevant documents for QNA")
-    streaming_service.only_update_terminal(f"🧠 Generating comprehensive answer using {len(relevant_documents)} relevant sources...")
+    print(f"Added {len(user_selected_documents)} user-selected documents for QNA")
+    print(f"Total documents for QNA: {len(all_documents)}")
+    
+    streaming_service.only_update_terminal(f"🧠 Generating comprehensive answer using {len(all_documents)} total sources ({len(user_selected_documents)} user-selected + {len(relevant_documents)} connector-found)...")
    writer({"yeild_value": streaming_service._format_annotations()})
    
    # Prepare configuration for the QNA agent
    qna_config = {
        "configurable": {
            "user_query": reformulated_query,  # Use the reformulated query
-            "relevant_documents": relevant_documents,
+            "relevant_documents": all_documents,  # Use combined documents
            "user_id": configuration.user_id,
            "search_space_id": configuration.search_space_id
        }
--- a/surfsense_backend/app/routes/chats_routes.py
+++ b/surfsense_backend/app/routes/chats_routes.py
@ -30,6 +30,7 @@ async def handle_chat_data(
    search_space_id = request.data.get('search_space_id')
    research_mode: str = request.data.get('research_mode')
    selected_connectors: List[str] = request.data.get('selected_connectors')
+    document_ids_to_add_in_context: List[int] = request.data.get('document_ids_to_add_in_context')
    
    search_mode_str = request.data.get('search_mode', "CHUNKS")

@ -71,7 +72,8 @@ async def handle_chat_data(
        research_mode,
        selected_connectors,
        langchain_chat_history,
-        search_mode_str
+        search_mode_str,
+        document_ids_to_add_in_context
    ))
    response.headers['x-vercel-ai-data-stream'] = 'v1'
    return response
--- a/surfsense_backend/app/tasks/stream_connector_search_results.py
+++ b/surfsense_backend/app/tasks/stream_connector_search_results.py
@ -17,7 +17,8 @@ async def stream_connector_search_results(
    research_mode: str, 
    selected_connectors: List[str],
    langchain_chat_history: List[Any],
-    search_mode_str: str
+    search_mode_str: str,
+    document_ids_to_add_in_context: List[int]
 ) -> AsyncGenerator[str, None]:
    """
    Stream connector search results to the client
@ -62,7 +63,8 @@ async def stream_connector_search_results(
            "user_id": user_id_str,
            "search_space_id": search_space_id,
            "search_mode": search_mode,
-            "research_mode": research_mode
+            "research_mode": research_mode,
+            "document_ids_to_add_in_context": document_ids_to_add_in_context
        }
    }
    # Initialize state with database session and streaming service
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
@ -4,7 +4,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 from app.retriver.chunks_hybrid_search import ChucksHybridSearchRetriever
 from app.retriver.documents_hybrid_search import DocumentHybridSearchRetriever
-from app.db import SearchSourceConnector, SearchSourceConnectorType, Chunk, Document
+from app.db import SearchSourceConnector, SearchSourceConnectorType, Chunk, Document, SearchSpace
 from tavily import TavilyClient
 from linkup import LinkupClient
 from sqlalchemy import func
@ -33,7 +33,8 @@ class ConnectorService:
                result = await self.session.execute(
                    select(func.count(Chunk.id))
                    .join(Document)
-                    .filter(Document.user_id == self.user_id)
+                    .join(SearchSpace)
+                    .filter(SearchSpace.user_id == self.user_id)
                )
                chunk_count = result.scalar() or 0
                self.source_id_counter = chunk_count + 1
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
@ -72,7 +72,7 @@ export default function EditConnectorPage() {
  const [connector, setConnector] = useState<SearchSourceConnector | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [isSubmitting, setIsSubmitting] = useState(false);
-  console.log("connector", connector);
+  // console.log("connector", connector);
  // Initialize the form
  const form = useForm<ApiConnectorFormValues>({
    resolver: zodResolver(apiConnectorFormSchema),
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@ -283,8 +283,8 @@ export default function DocumentsTable() {
    const searchSpaceId = Number(params.search_space_id);
    const { documents, loading, error, refreshDocuments, deleteDocument } = useDocuments(searchSpaceId);
    
-    console.log("Search Space ID:", searchSpaceId);
-    console.log("Documents loaded:", documents?.length);
+    // console.log("Search Space ID:", searchSpaceId);
+    // console.log("Documents loaded:", documents?.length);
    
    useEffect(() => {
        console.log("Delete document function available:", !!deleteDocument);
@ -315,7 +315,7 @@ export default function DocumentsTable() {

    const handleDeleteRows = async () => {
        const selectedRows = table.getSelectedRowModel().rows;
-        console.log("Deleting selected rows:", selectedRows.length);
+        // console.log("Deleting selected rows:", selectedRows.length);
        
        if (selectedRows.length === 0) {
            toast.error("No rows selected");
@ -324,14 +324,14 @@ export default function DocumentsTable() {
        
        // Create an array of promises for each delete operation
        const deletePromises = selectedRows.map(row => {
-            console.log("Deleting row with ID:", row.original.id);
+            // console.log("Deleting row with ID:", row.original.id);
            return deleteDocument(row.original.id);
        });
        
        try {
            // Execute all delete operations
            const results = await Promise.all(deletePromises);
-            console.log("Delete results:", results);
+            // console.log("Delete results:", results);
            
            // Check if all deletions were successful
            const allSuccessful = results.every(result => result === true);
--- a/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
@ -15,8 +15,14 @@ import {
  Database,
  SendHorizontal,
  FileText,
-  Grid3x3
+  Grid3x3,
+  File,
+  Globe,
+  Webhook,
+  FolderOpen,
+  Upload
 } from 'lucide-react';
+import { IconBrandDiscord, IconBrandGithub, IconBrandNotion, IconBrandSlack, IconBrandYoutube, IconLayoutKanban } from "@tabler/icons-react";
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
 import { Button } from '@/components/ui/button';
 import { Input } from '@/components/ui/input';
@ -47,6 +53,7 @@ import {
 import { MarkdownViewer } from '@/components/markdown-viewer';
 import { Logo } from '@/components/Logo';
 import { useSearchSourceConnectors } from '@/hooks';
+import { useDocuments } from '@/hooks/use-documents';

 interface SourceItem {
  id: number;
@ -63,6 +70,31 @@ interface ConnectorSource {
  sources: SourceItem[];
 }

+type DocumentType = "EXTENSION" | "CRAWLED_URL" | "SLACK_CONNECTOR" | "NOTION_CONNECTOR" | "FILE" | "YOUTUBE_VIDEO" | "GITHUB_CONNECTOR" | "LINEAR_CONNECTOR" | "DISCORD_CONNECTOR";
+
+interface Document {
+  id: number;
+  title: string;
+  document_type: DocumentType;
+  document_metadata: any;
+  content: string;
+  created_at: string;
+  search_space_id: number;
+}
+
+// Document type icons mapping
+const documentTypeIcons = {
+  EXTENSION: Webhook,
+  CRAWLED_URL: Globe,
+  SLACK_CONNECTOR: IconBrandSlack,
+  NOTION_CONNECTOR: IconBrandNotion,
+  FILE: File,
+  YOUTUBE_VIDEO: IconBrandYoutube,
+  GITHUB_CONNECTOR: IconBrandGithub,
+  LINEAR_CONNECTOR: IconLayoutKanban,
+  DISCORD_CONNECTOR: IconBrandDiscord,
+} as const;
+
 /**
 * Button that displays selected connectors and opens connector selection dialog
 */
@ -78,6 +110,41 @@ const ConnectorButton = ({ selectedConnectors, onClick }: { selectedConnectors:
  );
 };

+/**
+ * Button that displays selected documents count and opens document selection dialog
+ */
+const DocumentSelectorButton = ({ 
+  selectedDocuments, 
+  onClick, 
+  documentsCount 
+}: { 
+  selectedDocuments: number[], 
+  onClick: () => void,
+  documentsCount: number 
+}) => {
+  return (
+    <div className="relative">
+      <Button
+        variant="outline"
+        onClick={onClick}
+        className="h-8 px-2 text-xs font-medium transition-colors border-border bg-background hover:bg-muted/50"
+      >
+        <FolderOpen className="h-3 w-3" />
+      </Button>
+      {selectedDocuments.length > 0 && (
+        <span className="absolute -top-1 -right-1 h-4 w-4 rounded-full bg-primary text-primary-foreground text-xs font-medium flex items-center justify-center leading-none">
+          {selectedDocuments.length > 99 ? '99+' : selectedDocuments.length}
+        </span>
+      )}
+      {selectedDocuments.length === 0 && (
+        <span className="absolute -top-1 -right-1 h-4 w-4 rounded-full bg-muted text-muted-foreground text-xs font-medium flex items-center justify-center leading-none">
+          0
+        </span>
+      )}
+    </div>
+  );
+};
+
 // Create a wrapper component for the sources dialog content
 const SourcesDialogContent = ({ 
  connector, 
@ -245,7 +312,7 @@ const ChatPage = () => {
  const [sourceFilter, setSourceFilter] = useState("");
  const tabsListRef = useRef<HTMLDivElement>(null);
  const [terminalExpanded, setTerminalExpanded] = useState(false);
-  const [selectedConnectors, setSelectedConnectors] = useState<string[]>(["CRAWLED_URL"]);
+  const [selectedConnectors, setSelectedConnectors] = useState<string[]>([]);
  const [searchMode, setSearchMode] = useState<'DOCUMENTS' | 'CHUNKS'>('DOCUMENTS');
  const [researchMode, setResearchMode] = useState<ResearchMode>("QNA");
  const [currentTime, setCurrentTime] = useState<string>('');
@ -256,6 +323,11 @@ const ChatPage = () => {
  const INITIAL_SOURCES_DISPLAY = 3;

  const { search_space_id, chat_id } = useParams();
+  
+  // Document selection state
+  const [selectedDocuments, setSelectedDocuments] = useState<number[]>([]);
+  const [documentFilter, setDocumentFilter] = useState("");
+  const { documents, loading: isLoadingDocuments, error: documentsError } = useDocuments(Number(search_space_id));

  // Function to scroll terminal to bottom
  const scrollTerminalToBottom = () => {
@ -342,6 +414,13 @@ const ChatPage = () => {
          background-color: rgba(155, 155, 155, 0.5);
          border-radius: 20px;
        }
+        /* Line clamp utility */
+        .line-clamp-2 {
+          display: -webkit-box;
+          -webkit-line-clamp: 2;
+          -webkit-box-orient: vertical;
+          overflow: hidden;
+        }
      `;
      document.head.appendChild(style);

@ -362,7 +441,8 @@ const ChatPage = () => {
        search_space_id: search_space_id,
        selected_connectors: selectedConnectors,
        research_mode: researchMode,
-        search_mode: searchMode
+        search_mode: searchMode,
+        document_ids_to_add_in_context: selectedDocuments
      }
    },
    onError: (error) => {
@ -377,7 +457,7 @@ const ChatPage = () => {
      try {
        if (!token) return; // Wait for token to be set

-        console.log('Fetching chat details for chat ID:', chat_id);
+        // console.log('Fetching chat details for chat ID:', chat_id);

        const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/chats/${Number(chat_id)}`, {
          method: 'GET',
@ -392,7 +472,7 @@ const ChatPage = () => {
        }

        const chatData = await response.json();
-        console.log('Chat details fetched:', chatData);
+        // console.log('Chat details fetched:', chatData);

        // Set research mode from chat data
        if (chatData.type) {
@ -442,7 +522,7 @@ const ChatPage = () => {
          const title = userMessages[0].content;


-          console.log('Updating chat with title:', title);
+          // console.log('Updating chat with title:', title);

          // Update the chat
          const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/chats/${Number(chat_id)}`, {
@ -464,7 +544,7 @@ const ChatPage = () => {
            throw new Error(`Failed to update chat: ${response.statusText}`);
          }

-          console.log('Chat updated successfully');
+          // console.log('Chat updated successfully');
        }
      } catch (err) {
        console.error('Error updating chat:', err);
@ -519,10 +599,9 @@ const ChatPage = () => {

    if (!input.trim() || status !== 'ready') return;

-    // You can add additional logic here if needed
-    // For example, validation for selected connectors
-    if (selectedConnectors.length === 0) {
-      alert("Please select at least one connector");
+    // Validation: require at least one connector OR at least one document
+    if (selectedConnectors.length === 0 && selectedDocuments.length === 0) {
+      alert("Please select at least one connector or document");
      return;
    }

@ -988,17 +1067,162 @@ const ChatPage = () => {
            </Button>
          </form>
          <div className="flex items-center justify-between px-2 py-2 mt-3">
-            <div className="flex items-center space-x-3">
-              {/* Connector Selection Dialog */}
-              <Dialog>
-                <DialogTrigger asChild>
-                  <div className="h-8">
-                    <ConnectorButton
-                      selectedConnectors={selectedConnectors}
-                      onClick={() => { }}
-                    />
+            <div className="flex items-center gap-2 flex-wrap">
+            {/* Document Selection Dialog */}
+            <Dialog>
+              <DialogTrigger asChild>
+                <DocumentSelectorButton
+                  selectedDocuments={selectedDocuments}
+                  onClick={() => { }}
+                  documentsCount={documents?.length || 0}
+                />
+              </DialogTrigger>
+              <DialogContent className="sm:max-w-2xl max-h-[80vh] overflow-y-auto">
+                <DialogHeader>
+                  <DialogTitle className="flex items-center justify-between">
+                    <span>Select Documents</span>
+                    <Button
+                      variant="outline"
+                      size="sm"
+                      onClick={() => window.open(`/dashboard/${search_space_id}/documents/upload`, '_blank')}
+                      className="h-8"
+                    >
+                      <Upload className="h-3 w-3 mr-1.5" />
+                      Upload
+                    </Button>
+                  </DialogTitle>
+                  <DialogDescription>
+                    Choose documents to include in your research context
+                  </DialogDescription>
+                </DialogHeader>
+
+                {/* Document Search */}
+                <div className="relative my-4">
+                  <Search className="absolute left-2 top-1/2 transform -translate-y-1/2 h-4 w-4 text-gray-400 dark:text-gray-500" />
+                  <Input
+                    placeholder="Search documents..."
+                    className="pl-8 pr-4"
+                    value={documentFilter}
+                    onChange={(e) => setDocumentFilter(e.target.value)}
+                  />
+                  {documentFilter && (
+                    <Button
+                      variant="ghost"
+                      size="icon"
+                      className="absolute right-2 top-1/2 transform -translate-y-1/2 h-4 w-4"
+                      onClick={() => setDocumentFilter("")}
+                    >
+                      <X className="h-3 w-3" />
+                    </Button>
+                  )}
+                </div>
+
+                {/* Document List */}
+                <div className="space-y-2 max-h-96 overflow-y-auto">
+                  {isLoadingDocuments ? (
+                    <div className="flex justify-center py-8">
+                      <Loader2 className="h-6 w-6 animate-spin text-primary" />
+                    </div>
+                  ) : documentsError ? (
+                    <div className="text-center py-8 text-destructive">
+                      <p>Error loading documents</p>
+                    </div>
+                  ) : (
+                    (() => {
+                      const filteredDocuments = documents?.filter(doc => 
+                        doc.title.toLowerCase().includes(documentFilter.toLowerCase())
+                      ) || [];
+
+                      if (filteredDocuments.length === 0) {
+                        return (
+                          <div className="text-center py-8 text-muted-foreground">
+                            <FolderOpen className="h-8 w-8 mx-auto mb-2 opacity-50" />
+                            <p>{documentFilter ? `No documents found matching "${documentFilter}"` : 'No documents available'}</p>
+                          </div>
+                        );
+                      }
+
+                      return filteredDocuments.map((document) => {
+                        const Icon = documentTypeIcons[document.document_type];
+                        const isSelected = selectedDocuments.includes(document.id);
+
+                        return (
+                          <div
+                            key={document.id}
+                            className={`flex items-start gap-3 p-3 rounded-md border cursor-pointer transition-colors ${
+                              isSelected
+                                ? 'border-primary bg-primary/10'
+                                : 'border-border hover:border-primary/50 hover:bg-muted'
+                            }`}
+                            onClick={() => {
+                              setSelectedDocuments(prev =>
+                                isSelected
+                                  ? prev.filter(id => id !== document.id)
+                                  : [...prev, document.id]
+                              );
+                            }}
+                          >
+                            <div className="flex-shrink-0 w-6 h-6 flex items-center justify-center mt-0.5">
+                              <Icon size={16} className="text-muted-foreground" />
+                            </div>
+                            <div className="flex-1 min-w-0">
+                              <h3 className="font-medium text-sm truncate">{document.title}</h3>
+                              <p className="text-xs text-muted-foreground mt-1">
+                                {document.document_type.replace(/_/g, ' ').toLowerCase()}
+                                {' • '}
+                                {new Date(document.created_at).toLocaleDateString()}
+                              </p>
+                              <p className="text-xs text-muted-foreground mt-1 line-clamp-2">
+                                {document.content.substring(0, 150)}...
+                              </p>
+                            </div>
+                            {isSelected && (
+                              <div className="flex-shrink-0">
+                                <Check className="h-4 w-4 text-primary" />
+                              </div>
+                            )}
+                          </div>
+                        );
+                      });
+                    })()
+                  )}
+                </div>
+
+                <DialogFooter className="flex justify-between items-center">
+                  <div className="text-sm text-muted-foreground">
+                    {selectedDocuments.length} document{selectedDocuments.length !== 1 ? 's' : ''} selected
                  </div>
-                </DialogTrigger>
+                  <div className="flex gap-2">
+                    <Button
+                      variant="outline"
+                      onClick={() => setSelectedDocuments([])}
+                    >
+                      Clear All
+                    </Button>
+                    <Button
+                      onClick={() => {
+                        const filteredDocuments = documents?.filter(doc => 
+                          doc.title.toLowerCase().includes(documentFilter.toLowerCase())
+                        ) || [];
+                        const allFilteredIds = filteredDocuments.map(doc => doc.id);
+                        setSelectedDocuments(allFilteredIds);
+                      }}
+                    >
+                      Select All Filtered
+                    </Button>
+                  </div>
+                </DialogFooter>
+              </DialogContent>
+            </Dialog>
+
+            {/* Connector Selection Dialog */}
+            <Dialog>
+              <DialogTrigger asChild>
+                <ConnectorButton
+                  selectedConnectors={selectedConnectors}
+                  onClick={() => { }}
+                />
+              </DialogTrigger>
                <DialogContent className="sm:max-w-md">
                  <DialogHeader>
                    <DialogTitle>Select Connectors</DialogTitle>
@ -1065,33 +1289,31 @@ const ChatPage = () => {
              </Dialog>

              {/* Search Mode Control */}
-              <div className="flex items-center p-0.5 rounded-md border border-border bg-muted/20 h-8">
-                <button
+              <div className="flex gap-1">
+                <Button
+                  variant={searchMode === 'DOCUMENTS' ? 'default' : 'outline'}
+                  size="sm"
                  onClick={() => setSearchMode('DOCUMENTS')}
-                  className={`flex h-full items-center justify-center gap-1 px-2 rounded text-xs font-medium transition-colors flex-1 whitespace-nowrap overflow-hidden ${
-                    searchMode === 'DOCUMENTS'
-                      ? 'bg-primary text-primary-foreground shadow-sm'
-                      : 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
-                  }`}
+                  className="h-8 px-3 text-xs"
+                  title="Search full documents"
                >
-                  <FileText className="h-3 w-3 flex-shrink-0 mr-1" />
-                  <span>Full Document</span>
-                </button>
-                <button
+                  <FileText className="h-3 w-3 mr-1.5" />
+                  <span className="hidden sm:inline">Full</span>
+                </Button>
+                <Button
+                  variant={searchMode === 'CHUNKS' ? 'default' : 'outline'}
+                  size="sm"
                  onClick={() => setSearchMode('CHUNKS')}
-                  className={`flex h-full items-center justify-center gap-1 px-2 rounded text-xs font-medium transition-colors flex-1 whitespace-nowrap overflow-hidden ${
-                    searchMode === 'CHUNKS'
-                      ? 'bg-primary text-primary-foreground shadow-sm'
-                      : 'text-muted-foreground hover:text-foreground hover:bg-muted/50'
-                  }`}
+                  className="h-8 px-3 text-xs"
+                  title="Search document chunks"
                >
-                  <Grid3x3 className="h-3 w-3 flex-shrink-0 mr-1" />
-                  <span>Document Chunks</span>
-                </button>
+                  <Grid3x3 className="h-3 w-3 mr-1.5" />
+                  <span className="hidden sm:inline">Chunks</span>
+                </Button>
              </div>

              {/* Research Mode Control */}
-              <div className="h-8">
+              <div className="h-8 min-w-0 overflow-hidden">
                <ResearchModeControl
                  value={researchMode}
                  onChange={setResearchMode}
--- a/surfsense_web/components/TokenHandler.tsx
+++ b/surfsense_web/components/TokenHandler.tsx
@ -35,7 +35,7 @@ const TokenHandler = ({
      try {
        // Store token in localStorage
        localStorage.setItem(storageKey, token);
-        console.log(`Token stored in localStorage with key: ${storageKey}`);
+        // console.log(`Token stored in localStorage with key: ${storageKey}`);
        
        // Redirect to specified path
        router.push(redirectPath);
--- a/surfsense_web/components/sidebar/AppSidebarProvider.tsx
+++ b/surfsense_web/components/sidebar/AppSidebarProvider.tsx
@ -120,8 +120,13 @@ export function AppSidebarProvider({
          // Use the API client instead of direct fetch - filter by current search space ID
          const chats: Chat[] = await apiClient.get<Chat[]>(`api/v1/chats/?limit=5&skip=0&search_space_id=${searchSpaceId}`);
          
+          // Sort chats by created_at in descending order (newest first)
+          const sortedChats = chats.sort((a, b) => 
+            new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
+          );
+          // console.log("sortedChats", sortedChats);
          // Transform API response to the format expected by AppSidebar
-          const formattedChats = chats.map(chat => ({
+          const formattedChats = sortedChats.map(chat => ({
            name: chat.title || `Chat ${chat.id}`, // Fallback if title is empty
            url: `/dashboard/${chat.search_space_id}/researcher/${chat.id}`,
            icon: 'MessageCircleMore',
--- a/surfsense_web/components/sidebar/app-sidebar.tsx
+++ b/surfsense_web/components/sidebar/app-sidebar.tsx
@ -227,7 +227,7 @@ export function AppSidebar({
      </SidebarHeader>
      <SidebarContent>
        <NavMain items={processedNavMain} />
-        {processedRecentChats.length > 0 && <NavProjects projects={processedRecentChats} />}
+        {processedRecentChats.length > 0 && <NavProjects chats={processedRecentChats} />}
        <NavSecondary items={processedNavSecondary} className="mt-auto" />
      </SidebarContent>
      <SidebarFooter>
--- a/surfsense_web/components/sidebar/nav-projects.tsx
+++ b/surfsense_web/components/sidebar/nav-projects.tsx
@ -43,9 +43,9 @@ interface ChatAction {
 }

 export function NavProjects({
-  projects,
+  chats,
 }: {
-  projects: {
+  chats: {
    name: string
    url: string
    icon: LucideIcon
@ -57,13 +57,13 @@ export function NavProjects({
  const { isMobile } = useSidebar()
  const router = useRouter()
  
-  const searchSpaceId = projects[0]?.search_space_id || ""
+  const searchSpaceId = chats[0]?.search_space_id || ""

  return (
    <SidebarGroup className="group-data-[collapsible=icon]:hidden">
      <SidebarGroupLabel>Recent Chats</SidebarGroupLabel>
      <SidebarMenu>
-        {projects.map((item, index) => (
+        {chats.map((item, index) => (
          <SidebarMenuItem key={item.id ? `chat-${item.id}` : `chat-${item.name}-${index}`}>
            <SidebarMenuButton>
              <item.icon />