fix: Fix for case where nothing is selected for context.

2025-09-01 18:19:08 +00:00 · 2025-06-04 23:09:31 -07:00 · 2025-06-04 23:09:31 -07:00 · 96f545f982
commit 96f545f982
parent 03aacc6d8b
5 changed files with 185 additions and 71 deletions
--- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py
@ -3,7 +3,7 @@ from langchain_core.runnables import RunnableConfig
 from .state import State
 from typing import Any, Dict
 from app.config import config as app_config
-from .prompts import get_qna_citation_system_prompt
+from .prompts import get_qna_citation_system_prompt, get_qna_no_documents_system_prompt
 from langchain_core.messages import HumanMessage, SystemMessage

 async def rerank_documents(state: State, config: RunnableConfig) -> Dict[str, Any]:
@ -73,7 +73,8 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
    This node takes the relevant documents provided in the configuration and uses
    an LLM to generate a comprehensive answer to the user's question with
    proper citations. The citations follow IEEE format using source IDs from the
-    documents.
+    documents. If no documents are provided, it will use chat history to generate
+    an answer.
    
    Returns:
        Dict containing the final answer in the "final_answer" key.
@ -87,55 +88,59 @@ async def answer_question(state: State, config: RunnableConfig) -> Dict[str, Any
    # Initialize LLM
    llm = app_config.fast_llm_instance
    
-    # If no documents were provided, return a message indicating this
-    if not documents or len(documents) == 0:
-        return {
-            "final_answer": "I don't have any relevant documents in your personal knowledge base to answer this question. Please try asking about topics covered in your saved content, or add more documents to your knowledge base."
-        }
+    # Check if we have documents to determine which prompt to use
+    has_documents = documents and len(documents) > 0
    
-    # Prepare documents for citation formatting
-    formatted_documents = []
-    for _i, doc in enumerate(documents):
-        # Extract content and metadata
-        content = doc.get("content", "")
-        doc_info = doc.get("document", {})
-        document_id = doc_info.get("id")  # Use document ID
+    # Prepare documents for citation formatting (if any)
+    documents_text = ""
+    if has_documents:
+        formatted_documents = []
+        for _i, doc in enumerate(documents):
+            # Extract content and metadata
+            content = doc.get("content", "")
+            doc_info = doc.get("document", {})
+            document_id = doc_info.get("id")  # Use document ID
+            
+            # Format document according to the citation system prompt's expected format
+            formatted_doc = f"""
+            <document>
+                <metadata>
+                    <source_id>{document_id}</source_id>
+                    <source_type>{doc_info.get("document_type", "CRAWLED_URL")}</source_type>
+                </metadata>
+                <content>
+                    {content}
+                </content>
+            </document>
+            """
+            formatted_documents.append(formatted_doc)
        
-        # Format document according to the citation system prompt's expected format
-        formatted_doc = f"""
-        <document>
-            <metadata>
-                <source_id>{document_id}</source_id>
-                <source_type>{doc_info.get("document_type", "CRAWLED_URL")}</source_type>
-            </metadata>
-            <content>
-                {content}
-            </content>
-        </document>
+        # Create the formatted documents text
+        documents_text = f"""
+        Source material from your personal knowledge base:
+        <documents>
+            {"\n".join(formatted_documents)}
+        </documents>
        """
-        formatted_documents.append(formatted_doc)
-    
-    # Create the formatted documents text
-    documents_text = "\n".join(formatted_documents)
    
    # Construct a clear, structured query for the LLM
    human_message_content = f"""
-    Source material from your personal knowledge base:
-    <documents>
-        {documents_text}
-    </documents>
+    {documents_text}
    
    User's question:
    <user_query>
        {user_query}
    </user_query>
    
-    Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner.
+    {"Please provide a detailed, comprehensive answer to the user's question using the information from their personal knowledge sources. Make sure to cite all information appropriately and engage in a conversational manner." if has_documents else "Please provide a helpful answer to the user's question based on our conversation history and your general knowledge. Engage in a conversational manner."}
    """
    
+    # Choose the appropriate system prompt based on document availability
+    system_prompt = get_qna_citation_system_prompt() if has_documents else get_qna_no_documents_system_prompt()
+    
    # Create messages for the LLM, including chat history for context
    messages_with_chat_history = state.chat_history + [
-        SystemMessage(content=get_qna_citation_system_prompt()),
+        SystemMessage(content=system_prompt),
        HumanMessage(content=human_message_content)
    ]
    
--- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
@ -118,3 +118,49 @@ Make sure your response:
 5. Offers follow-up suggestions when appropriate
 </user_query_instructions>
 """
+
+
+def get_qna_no_documents_system_prompt():
+    return f"""
+Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
+You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.
+
+<context>
+The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on:
+1. The conversation history and context
+2. Your general knowledge and expertise
+3. Understanding of the user's needs and interests based on our conversation
+</context>
+
+<instructions>
+1. Provide a comprehensive, helpful answer to the user's question
+2. Draw upon the conversation history to understand context and the user's specific needs
+3. Use your general knowledge to provide accurate, detailed information
+4. Be conversational and engaging, as if having a detailed discussion with the user
+5. Acknowledge when you're drawing from general knowledge rather than their personal sources
+6. Provide actionable insights and practical information when relevant
+7. Structure your answer logically and clearly
+8. If the question would benefit from personalized information from their knowledge base, gently suggest they might want to add relevant content to SurfSense
+9. Be honest about limitations while still being maximally helpful
+10. Maintain the helpful, knowledgeable tone that users expect from SurfSense
+</instructions>
+
+<format>
+- Write in a clear, conversational tone suitable for detailed Q&A discussions
+- Provide comprehensive answers that thoroughly address the user's question
+- Use appropriate paragraphs and structure for readability
+- No citations are needed since you're using general knowledge
+- Be thorough and detailed in your explanations while remaining focused on the user's specific question
+- If asking follow-up questions would be helpful, suggest them at the end of your response
+- When appropriate, mention that adding relevant content to their SurfSense knowledge base could provide more personalized answers
+</format>
+
+<user_query_instructions>
+When answering the user's question without access to their personal documents:
+1. Provide the most helpful and comprehensive answer possible using general knowledge
+2. Be conversational and engaging
+3. Draw upon conversation history for context
+4. Be clear that you're providing general information
+5. Suggest ways the user could get more personalized answers by expanding their knowledge base when relevant
+</user_query_instructions>
+"""
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/nodes.py
@ -3,7 +3,7 @@ from langchain_core.runnables import RunnableConfig
 from .state import State
 from typing import Any, Dict
 from app.config import config as app_config
-from .prompts import get_citation_system_prompt
+from .prompts import get_citation_system_prompt, get_no_documents_system_prompt
 from langchain_core.messages import HumanMessage, SystemMessage
 from .configuration import SubSectionType

@ -80,7 +80,8 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
    This node takes the relevant documents provided in the configuration and uses
    an LLM to generate a comprehensive answer to the sub-section title with
    proper citations. The citations follow IEEE format using source IDs from the
-    documents.
+    documents. If no documents are provided, it will use chat history to generate
+    content.
    
    Returns:
        Dict containing the final answer in the "final_answer" key.
@ -93,39 +94,44 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
    # Initialize LLM
    llm = app_config.fast_llm_instance
    
-    # If no documents were provided, return a message indicating this
-    if not documents or len(documents) == 0:
-        return {
-            "final_answer": "No relevant documents were provided to answer this question. Please provide documents or try a different approach."
-        }
+    # Check if we have documents to determine which prompt to use
+    has_documents = documents and len(documents) > 0
    
-    # Prepare documents for citation formatting
-    formatted_documents = []
-    for i, doc in enumerate(documents):
-        # Extract content and metadata
-        content = doc.get("content", "")
-        doc_info = doc.get("document", {})
-        document_id = doc_info.get("id")  # Use document ID
+    # Prepare documents for citation formatting (if any)
+    documents_text = ""
+    if has_documents:
+        formatted_documents = []
+        for i, doc in enumerate(documents):
+            # Extract content and metadata
+            content = doc.get("content", "")
+            doc_info = doc.get("document", {})
+            document_id = doc_info.get("id")  # Use document ID
+            
+            # Format document according to the citation system prompt's expected format
+            formatted_doc = f"""
+            <document>
+                <metadata>
+                    <source_id>{document_id}</source_id>
+                    <source_type>{doc_info.get("document_type", "CRAWLED_URL")}</source_type>
+                </metadata>
+                <content>
+                    {content}
+                </content>
+            </document>
+            """
+            formatted_documents.append(formatted_doc)
        
-        # Format document according to the citation system prompt's expected format
-        formatted_doc = f"""
-        <document>
-            <metadata>
-                <source_id>{document_id}</source_id>
-                <source_type>{doc_info.get("document_type", "CRAWLED_URL")}</source_type>
-            </metadata>
-            <content>
-                {content}
-            </content>
-        </document>
+        documents_text = f"""
+        Source material:
+        <documents>
+            {"\n".join(formatted_documents)}
+        </documents>
        """
-        formatted_documents.append(formatted_doc)
    
    # Create the query that uses the section title and questions
    section_title = configuration.sub_section_title
    sub_section_questions = configuration.sub_section_questions
    user_query = configuration.user_query  # Get the original user query
-    documents_text = "\n".join(formatted_documents)
    sub_section_type = configuration.sub_section_type

    # Format the questions as bullet points for clarity
@ -142,10 +148,7 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
    
    # Construct a clear, structured query for the LLM
    human_message_content = f"""
-    Source material:
-    <documents>
-        {documents_text}
-    </documents>
+    {documents_text}
    
    Now user's query is: 
    <user_query>
@ -164,11 +167,16 @@ async def write_sub_section(state: State, config: RunnableConfig) -> Dict[str, A
    <guiding_questions>
        {questions_text}
    </guiding_questions>
+    
+    {"Please write content for this sub-section using the provided source material and cite all information appropriately." if has_documents else "Please write content for this sub-section based on our conversation history and your general knowledge."}
    """
    
+    # Choose the appropriate system prompt based on document availability
+    system_prompt = get_citation_system_prompt() if has_documents else get_no_documents_system_prompt()
+    
    # Create messages for the LLM
    messages_with_chat_history = state.chat_history + [
-        SystemMessage(content=get_citation_system_prompt()),
+        SystemMessage(content=system_prompt),
        HumanMessage(content=human_message_content)
    ]
    
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
@ -127,4 +127,59 @@ Make sure your response:
 4. Is well-structured and professional in tone
 5. Acknowledges the personal nature of the information being provided
 </user_query_instructions>
+"""
+
+
+def get_no_documents_system_prompt():
+    return f"""
+Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
+You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.
+
+<context>
+You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on:
+1. The conversation history and context
+2. Your general knowledge and expertise
+3. The specific sub-section requirements provided
+4. Understanding of the user's needs based on our conversation
+</context>
+
+<instructions>
+1. Write comprehensive, well-structured content for the specified sub-section
+2. Draw upon the conversation history to understand the user's context and needs
+3. Use your general knowledge to provide accurate, detailed information
+4. Ensure the content fits the sub-section title and position in the document
+5. Follow the section positioning guidelines (introduction, middle, or conclusion)
+6. Structure the content logically with appropriate flow and transitions
+7. Write in a professional, academic tone suitable for research documents
+8. Acknowledge when you're drawing from general knowledge rather than personal sources
+9. If the content would benefit from personalized information, gently mention that adding relevant sources to SurfSense could enhance the content
+10. Ensure the content addresses the guiding questions without explicitly mentioning them
+11. Create content that flows naturally and maintains coherence with the overall document structure
+</instructions>
+
+<format>
+- Write in clear, professional language suitable for academic or research documents
+- Organize content with appropriate paragraphs and logical structure
+- No citations are needed since you're using general knowledge
+- Follow the specified section type (START/MIDDLE/END) guidelines
+- Ensure content flows naturally and maintains document coherence
+- Be comprehensive and detailed while staying focused on the sub-section topic
+- When appropriate, mention that adding relevant sources to SurfSense could provide more personalized and cited content
+</format>
+
+<section_guidelines>
+- START (Introduction): Provide context, background, and introduce key concepts
+- MIDDLE: Develop main points, provide detailed analysis, ensure smooth transitions
+- END (Conclusion): Summarize key points, provide closure, synthesize main insights
+</section_guidelines>
+
+<user_query_instructions>
+When writing content for a sub-section without access to personal documents:
+1. Create the most comprehensive and useful content possible using general knowledge
+2. Ensure the content fits the sub-section title and document position
+3. Draw upon conversation history for context about the user's needs
+4. Write in a professional, research-appropriate tone
+5. Address the guiding questions through natural content flow without explicitly listing them
+6. Suggest how adding relevant sources to SurfSense could enhance future content when appropriate
+</user_query_instructions>
 """
--- a/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
@ -600,10 +600,10 @@ const ChatPage = () => {
    if (!input.trim() || status !== 'ready') return;

    // Validation: require at least one connector OR at least one document
-    if (selectedConnectors.length === 0 && selectedDocuments.length === 0) {
-      alert("Please select at least one connector or document");
-      return;
-    }
+    // if (selectedConnectors.length === 0 && selectedDocuments.length === 0) {
+    //   alert("Please select at least one connector or document");
+    //   return;
+    // }

    // Call the original handleSubmit from useChat
    handleChatSubmit(e);