From 7be68ebf41e65af0949fa3dbd93e729aa108d574 Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk" <vermarohanfinal@gmail.com>
Date: Sun, 20 Apr 2025 00:10:23 -0700
Subject: [PATCH] refactor: remove process_section function and streamline test
 workflow

---
 .../app/agents/researcher/nodes.py            | 69 +------------------
 .../app/agents/researcher/test_researcher.py  | 30 ++++----
 2 files changed, 14 insertions(+), 85 deletions(-)

diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py
index d13e77b..7099f02 100644
--- a/surfsense_backend/app/agents/researcher/nodes.py
+++ b/surfsense_backend/app/agents/researcher/nodes.py
@@ -232,71 +232,7 @@ async def fetch_relevant_documents(
     
     return deduplicated_docs
 
-async def process_section(
-    section_title: str, 
-    user_id: str, 
-    search_space_id: int, 
-    session_maker,
-    research_questions: List[str],
-    connectors_to_search: List[str]
-) -> str:
-    """
-    Process a single section by sending it to the sub_section_writer graph.
-    
-    Args:
-        section_title: The title of the section
-        user_id: The user ID
-        search_space_id: The search space ID
-        session_maker: Factory for creating new database sessions
-        research_questions: List of research questions for this section
-        connectors_to_search: List of connectors to search
-        
-    Returns:
-        The written section content
-    """
-    try:
-        # Create a new database session for this section
-        async with session_maker() as db_session:
-            # Fetch relevant documents using all research questions for this section
-            relevant_documents = await fetch_relevant_documents(
-                research_questions=research_questions,
-                user_id=user_id,
-                search_space_id=search_space_id,
-                db_session=db_session,
-                connectors_to_search=connectors_to_search
-            )
-            
-            # Fallback if no documents found
-            if not relevant_documents:
-                print(f"No relevant documents found for section: {section_title}")
-                relevant_documents = [
-                    {"content": f"No specific information was found for: {question}"}
-                    for question in research_questions
-                ]
-            
-            # Call the sub_section_writer graph with the appropriate config
-            config = {
-                "configurable": {
-                    "sub_section_title": section_title,
-                    "relevant_documents": relevant_documents,
-                    "user_id": user_id,
-                    "search_space_id": search_space_id
-                }
-            }
-            
-            # Create the initial state with db_session
-            state = {"db_session": db_session}
-            
-            # Invoke the sub-section writer graph
-            print(f"Invoking sub_section_writer for: {section_title}")
-            result = await sub_section_writer_graph.ainvoke(state, config)
-            
-            # Return the final answer from the sub_section_writer
-            final_answer = result.get("final_answer", "No content was generated for this section.")
-            return final_answer
-    except Exception as e:
-        print(f"Error processing section '{section_title}': {str(e)}")
-        return f"Error processing section: {section_title}. Details: {str(e)}"
+
 
 async def process_sections(state: State, config: RunnableConfig) -> Dict[str, Any]:
     """
@@ -395,8 +331,7 @@ async def process_sections(state: State, config: RunnableConfig) -> Dict[str, An
     # Combine the results into a final report with section titles
     final_report = []
     for i, (section, content) in enumerate(zip(answer_outline.answer_outline, processed_results)):
-        section_header = f"## {section.section_title}"
-        final_report.append(section_header)
+        # Skip adding the section header since the content already contains the title
         final_report.append(content)
         final_report.append("\n")  # Add spacing between sections
     
diff --git a/surfsense_backend/app/agents/researcher/test_researcher.py b/surfsense_backend/app/agents/researcher/test_researcher.py
index 72fbc20..15c993e 100644
--- a/surfsense_backend/app/agents/researcher/test_researcher.py
+++ b/surfsense_backend/app/agents/researcher/test_researcher.py
@@ -31,7 +31,7 @@ from dotenv import load_dotenv
 # These imports should now work with the correct path
 from app.agents.researcher.graph import graph
 from app.agents.researcher.state import State
-from app.agents.researcher.nodes import write_answer_outline, process_sections
+
 
 # Load environment variables
 load_dotenv()
@@ -68,31 +68,25 @@ async def run_test():
             # Initialize state with database session and engine
             initial_state = State(db_session=db_session, engine=engine)
             
-            # Instead of using the graph directly, let's run the nodes manually
-            # to track the state transitions explicitly
-            print("\nSTEP 1: Running write_answer_outline node...")
-            outline_result = await write_answer_outline(initial_state, config)
+            # Run the graph directly
+            print("\nRunning the complete researcher workflow...")
+            result = await graph.ainvoke(initial_state, config)
             
-            # Update the state with the outline
-            if "answer_outline" in outline_result:
-                initial_state.answer_outline = outline_result["answer_outline"]
-                print(f"Generated answer outline with {len(initial_state.answer_outline.answer_outline)} sections")
+            # Extract the answer outline for display
+            if "answer_outline" in result and result["answer_outline"]:
+                print(f"\nGenerated answer outline with {len(result['answer_outline'].answer_outline)} sections")
                 
                 # Print the outline
                 print("\nGenerated Answer Outline:")
-                for section in initial_state.answer_outline.answer_outline:
+                for section in result["answer_outline"].answer_outline:
                     print(f"\nSection {section.section_id}: {section.section_title}")
                     print("Research Questions:")
                     for q in section.questions:
                         print(f"  - {q}")
             
-            # Run the second node with the updated state
-            print("\nSTEP 2: Running process_sections node...")
-            sections_result = await process_sections(initial_state, config)
-            
             # Check if we got a final report
-            if "final_written_report" in sections_result:
-                final_report = sections_result["final_written_report"]
+            if "final_written_report" in result and result["final_written_report"]:
+                final_report = result["final_written_report"]
                 print("\nFinal Research Report generated successfully!")
                 print(f"Report length: {len(final_report)} characters")
                 
@@ -101,9 +95,9 @@ async def run_test():
                 print(final_report)
             else:
                 print("\nNo final report was generated.")
-                print(f"Result keys: {list(sections_result.keys())}")
+                print(f"Available result keys: {list(result.keys())}")
             
-            return sections_result
+            return result
             
         except Exception as e:
             print(f"Error running researcher agent: {str(e)}")