mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-04 19:49:09 +00:00
add search google gmail connector service
This commit is contained in:
parent
1de0f0309c
commit
e17d969087
1 changed files with 126 additions and 0 deletions
|
@ -1208,6 +1208,132 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, calendar_chunks
|
return result_object, calendar_chunks
|
||||||
|
|
||||||
|
async def search_google_gmail(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
|
"""
|
||||||
|
Search for Gmail messages and return both the source information and langchain documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_query: The user's query
|
||||||
|
user_id: The user's ID
|
||||||
|
search_space_id: The search space ID to search in
|
||||||
|
top_k: Maximum number of results to return
|
||||||
|
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (sources_info, langchain_documents)
|
||||||
|
"""
|
||||||
|
if search_mode == SearchMode.CHUNKS:
|
||||||
|
gmail_chunks = await self.chunk_retriever.hybrid_search(
|
||||||
|
query_text=user_query,
|
||||||
|
top_k=top_k,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||||
|
)
|
||||||
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
|
gmail_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
query_text=user_query,
|
||||||
|
top_k=top_k,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||||
|
)
|
||||||
|
# Transform document retriever results to match expected format
|
||||||
|
gmail_chunks = self._transform_document_results(gmail_chunks)
|
||||||
|
|
||||||
|
# Early return if no results
|
||||||
|
if not gmail_chunks:
|
||||||
|
return {
|
||||||
|
"id": 32,
|
||||||
|
"name": "Gmail Messages",
|
||||||
|
"type": "GOOGLE_GMAIL_CONNECTOR",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
# Process each chunk and create sources directly without deduplication
|
||||||
|
sources_list = []
|
||||||
|
async with self.counter_lock:
|
||||||
|
for _i, chunk in enumerate(gmail_chunks):
|
||||||
|
# Extract document metadata
|
||||||
|
document = chunk.get("document", {})
|
||||||
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
|
# Extract Gmail-specific metadata
|
||||||
|
message_id = metadata.get("message_id", "")
|
||||||
|
subject = metadata.get("subject", "No Subject")
|
||||||
|
sender = metadata.get("sender", "Unknown Sender")
|
||||||
|
date_str = metadata.get("date", "")
|
||||||
|
thread_id = metadata.get("thread_id", "")
|
||||||
|
|
||||||
|
# Create a more descriptive title for Gmail messages
|
||||||
|
title = f"Email: {subject}"
|
||||||
|
if sender:
|
||||||
|
# Extract just the email address or name from sender
|
||||||
|
import re
|
||||||
|
|
||||||
|
sender_match = re.search(r"<([^>]+)>", sender)
|
||||||
|
if sender_match:
|
||||||
|
sender_email = sender_match.group(1)
|
||||||
|
title += f" (from {sender_email})"
|
||||||
|
else:
|
||||||
|
title += f" (from {sender})"
|
||||||
|
|
||||||
|
# Create a more descriptive description for Gmail messages
|
||||||
|
description = chunk.get("content", "")[:150]
|
||||||
|
if len(description) == 150:
|
||||||
|
description += "..."
|
||||||
|
|
||||||
|
# Add message info to description
|
||||||
|
info_parts = []
|
||||||
|
if date_str:
|
||||||
|
info_parts.append(f"Date: {date_str}")
|
||||||
|
if thread_id:
|
||||||
|
info_parts.append(f"Thread: {thread_id}")
|
||||||
|
|
||||||
|
if info_parts:
|
||||||
|
if description:
|
||||||
|
description += f" | {' | '.join(info_parts)}"
|
||||||
|
else:
|
||||||
|
description = " | ".join(info_parts)
|
||||||
|
|
||||||
|
# For URL, we could construct a URL to the Gmail message
|
||||||
|
url = ""
|
||||||
|
if message_id:
|
||||||
|
# Gmail message URL format
|
||||||
|
url = f"https://mail.google.com/mail/u/0/#inbox/{message_id}"
|
||||||
|
|
||||||
|
source = {
|
||||||
|
"id": document.get("id", self.source_id_counter),
|
||||||
|
"title": title,
|
||||||
|
"description": description,
|
||||||
|
"url": url,
|
||||||
|
"message_id": message_id,
|
||||||
|
"subject": subject,
|
||||||
|
"sender": sender,
|
||||||
|
"date": date_str,
|
||||||
|
"thread_id": thread_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.source_id_counter += 1
|
||||||
|
sources_list.append(source)
|
||||||
|
|
||||||
|
# Create result object
|
||||||
|
result_object = {
|
||||||
|
"id": 32, # Assign a unique ID for the Gmail connector
|
||||||
|
"name": "Gmail Messages",
|
||||||
|
"type": "GOOGLE_GMAIL_CONNECTOR",
|
||||||
|
"sources": sources_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_object, gmail_chunks
|
||||||
|
|
||||||
async def search_confluence(
|
async def search_confluence(
|
||||||
self,
|
self,
|
||||||
user_query: str,
|
user_query: str,
|
||||||
|
|
Loading…
Add table
Reference in a new issue