diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index d063a86..20ad351 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -1208,6 +1208,132 @@ class ConnectorService: return result_object, calendar_chunks + async def search_google_gmail( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + search_mode: SearchMode = SearchMode.CHUNKS, + ) -> tuple: + """ + Search for Gmail messages and return both the source information and langchain documents + + Args: + user_query: The user's query + user_id: The user's ID + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + search_mode: Search mode (CHUNKS or DOCUMENTS) + + Returns: + tuple: (sources_info, langchain_documents) + """ + if search_mode == SearchMode.CHUNKS: + gmail_chunks = await self.chunk_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="GOOGLE_GMAIL_CONNECTOR", + ) + elif search_mode == SearchMode.DOCUMENTS: + gmail_chunks = await self.document_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="GOOGLE_GMAIL_CONNECTOR", + ) + # Transform document retriever results to match expected format + gmail_chunks = self._transform_document_results(gmail_chunks) + + # Early return if no results + if not gmail_chunks: + return { + "id": 32, + "name": "Gmail Messages", + "type": "GOOGLE_GMAIL_CONNECTOR", + "sources": [], + }, [] + + # Process each chunk and create sources directly without deduplication + sources_list = [] + async with self.counter_lock: + for _i, chunk in enumerate(gmail_chunks): + # Extract document metadata + document = chunk.get("document", {}) + metadata = document.get("metadata", {}) + + # Extract Gmail-specific metadata + message_id = metadata.get("message_id", "") + subject = metadata.get("subject", "No Subject") + sender = metadata.get("sender", "Unknown Sender") + date_str = metadata.get("date", "") + thread_id = metadata.get("thread_id", "") + + # Create a more descriptive title for Gmail messages + title = f"Email: {subject}" + if sender: + # Extract just the email address or name from sender + import re + + sender_match = re.search(r"<([^>]+)>", sender) + if sender_match: + sender_email = sender_match.group(1) + title += f" (from {sender_email})" + else: + title += f" (from {sender})" + + # Create a more descriptive description for Gmail messages + description = chunk.get("content", "")[:150] + if len(description) == 150: + description += "..." + + # Add message info to description + info_parts = [] + if date_str: + info_parts.append(f"Date: {date_str}") + if thread_id: + info_parts.append(f"Thread: {thread_id}") + + if info_parts: + if description: + description += f" | {' | '.join(info_parts)}" + else: + description = " | ".join(info_parts) + + # For URL, we could construct a URL to the Gmail message + url = "" + if message_id: + # Gmail message URL format + url = f"https://mail.google.com/mail/u/0/#inbox/{message_id}" + + source = { + "id": document.get("id", self.source_id_counter), + "title": title, + "description": description, + "url": url, + "message_id": message_id, + "subject": subject, + "sender": sender, + "date": date_str, + "thread_id": thread_id, + } + + self.source_id_counter += 1 + sources_list.append(source) + + # Create result object + result_object = { + "id": 32, # Assign a unique ID for the Gmail connector + "name": "Gmail Messages", + "type": "GOOGLE_GMAIL_CONNECTOR", + "sources": sources_list, + } + + return result_object, gmail_chunks + async def search_confluence( self, user_query: str,