diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 68a81bb..a1bb7e7 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -919,6 +919,29 @@ async def fetch_relevant_documents( ) } ) + elif connector == "CONFLUENCE_CONNECTOR": + source_object, confluence_chunks = await connector_service.search_confluence( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=top_k, + search_mode=search_mode, + ) + + # Add to sources and raw documents + if source_object: + all_sources.append(source_object) + all_raw_documents.extend(confluence_chunks) + + # Stream found document count + if streaming_service and writer: + writer( + { + "yield_value": streaming_service.format_terminal_info_delta( + f"📚 Found {len(confluence_chunks)} Confluence pages related to your query" + ) + } + ) except Exception as e: error_message = f"Error searching connector {connector}: {e!s}" diff --git a/surfsense_backend/app/connectors/confluence_connector.py b/surfsense_backend/app/connectors/confluence_connector.py index 7816b9b..6b0a40c 100644 --- a/surfsense_backend/app/connectors/confluence_connector.py +++ b/surfsense_backend/app/connectors/confluence_connector.py @@ -349,31 +349,7 @@ class ConfluenceConnector: else: break - # Filter pages by date range - filtered_pages = [] - start_datetime = datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc) - end_datetime = datetime.fromisoformat(end_date + "T23:59:59").replace(tzinfo=timezone.utc) - - for page in all_pages: - created_at = page.get("createdAt") - if created_at: - try: - page_date = datetime.fromisoformat( - created_at.replace("Z", "+00:00") - ) - if start_datetime <= page_date <= end_datetime: - # Add comments if requested - if include_comments: - page["comments"] = self.get_page_comments(page["id"]) - filtered_pages.append(page) - except ValueError: - # Skip pages with invalid date format - continue - - if not filtered_pages: - return [], "No pages found in the specified date range." - - return filtered_pages, None + return all_pages, None except Exception as e: return [], f"Error fetching pages: {e!s}" diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 1c6d612..1d17067 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -1072,6 +1072,102 @@ class ConnectorService: } return result_object, jira_chunks + async def search_confluence( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + search_mode: SearchMode = SearchMode.CHUNKS, + ) -> tuple: + """ + Search for Confluence pages and return both the source information and langchain documents + + Args: + user_query: The user's query + user_id: The user's ID + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + search_mode: Search mode (CHUNKS or DOCUMENTS) + + Returns: + tuple: (sources_info, langchain_documents) + """ + if search_mode == SearchMode.CHUNKS: + confluence_chunks = await self.chunk_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="CONFLUENCE_CONNECTOR", + ) + elif search_mode == SearchMode.DOCUMENTS: + confluence_chunks = await self.document_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="CONFLUENCE_CONNECTOR", + ) + # Transform document retriever results to match expected format + confluence_chunks = self._transform_document_results(confluence_chunks) + + # Early return if no results + if not confluence_chunks: + return { + "id": 40, + "name": "Confluence", + "type": "CONFLUENCE_CONNECTOR", + "sources": [], + }, [] + + # Process each chunk and create sources directly without deduplication + sources_list = [] + async with self.counter_lock: + for _i, chunk in enumerate(confluence_chunks): + # Extract document metadata + document = chunk.get("document", {}) + metadata = document.get("metadata", {}) + + # Extract Confluence-specific metadata + page_title = metadata.get("page_title", "Untitled Page") + page_id = metadata.get("page_id", "") + space_key = metadata.get("space_key", "") + + # Create a more descriptive title for Confluence pages + title = f"Confluence: {page_title}" + if space_key: + title += f" ({space_key})" + + # Create a more descriptive description for Confluence pages + description = chunk.get("content", "")[:100] + if len(description) == 100: + description += "..." + + # For URL, we can use a placeholder or construct a URL to the Confluence page if available + url = "" + if page_id: + url = f"{metadata.get('base_url')}/pages/{page_id}" + + source = { + "id": document.get("id", self.source_id_counter), + "title": title, + "description": description, + "url": url, + } + + self.source_id_counter += 1 + sources_list.append(source) + + # Create result object + result_object = { + "id": 40, + "name": "Confluence", + "type": "CONFLUENCE_CONNECTOR", + "sources": sources_list, + } + + return result_object, confluence_chunks async def search_linkup( self, user_query: str, user_id: str, mode: str = "standard"