mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-02 02:29:08 +00:00
fix the confluence search implemetation
This commit is contained in:
parent
4b663ea73f
commit
dee54bf5e1
3 changed files with 120 additions and 25 deletions
|
@ -919,6 +919,29 @@ async def fetch_relevant_documents(
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
elif connector == "CONFLUENCE_CONNECTOR":
|
||||||
|
source_object, confluence_chunks = await connector_service.search_confluence(
|
||||||
|
user_query=reformulated_query,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
top_k=top_k,
|
||||||
|
search_mode=search_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to sources and raw documents
|
||||||
|
if source_object:
|
||||||
|
all_sources.append(source_object)
|
||||||
|
all_raw_documents.extend(confluence_chunks)
|
||||||
|
|
||||||
|
# Stream found document count
|
||||||
|
if streaming_service and writer:
|
||||||
|
writer(
|
||||||
|
{
|
||||||
|
"yield_value": streaming_service.format_terminal_info_delta(
|
||||||
|
f"📚 Found {len(confluence_chunks)} Confluence pages related to your query"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = f"Error searching connector {connector}: {e!s}"
|
error_message = f"Error searching connector {connector}: {e!s}"
|
||||||
|
|
|
@ -349,31 +349,7 @@ class ConfluenceConnector:
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Filter pages by date range
|
return all_pages, None
|
||||||
filtered_pages = []
|
|
||||||
start_datetime = datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
|
|
||||||
end_datetime = datetime.fromisoformat(end_date + "T23:59:59").replace(tzinfo=timezone.utc)
|
|
||||||
|
|
||||||
for page in all_pages:
|
|
||||||
created_at = page.get("createdAt")
|
|
||||||
if created_at:
|
|
||||||
try:
|
|
||||||
page_date = datetime.fromisoformat(
|
|
||||||
created_at.replace("Z", "+00:00")
|
|
||||||
)
|
|
||||||
if start_datetime <= page_date <= end_datetime:
|
|
||||||
# Add comments if requested
|
|
||||||
if include_comments:
|
|
||||||
page["comments"] = self.get_page_comments(page["id"])
|
|
||||||
filtered_pages.append(page)
|
|
||||||
except ValueError:
|
|
||||||
# Skip pages with invalid date format
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not filtered_pages:
|
|
||||||
return [], "No pages found in the specified date range."
|
|
||||||
|
|
||||||
return filtered_pages, None
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return [], f"Error fetching pages: {e!s}"
|
return [], f"Error fetching pages: {e!s}"
|
||||||
|
|
|
@ -1072,6 +1072,102 @@ class ConnectorService:
|
||||||
}
|
}
|
||||||
|
|
||||||
return result_object, jira_chunks
|
return result_object, jira_chunks
|
||||||
|
async def search_confluence(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
|
"""
|
||||||
|
Search for Confluence pages and return both the source information and langchain documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_query: The user's query
|
||||||
|
user_id: The user's ID
|
||||||
|
search_space_id: The search space ID to search in
|
||||||
|
top_k: Maximum number of results to return
|
||||||
|
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (sources_info, langchain_documents)
|
||||||
|
"""
|
||||||
|
if search_mode == SearchMode.CHUNKS:
|
||||||
|
confluence_chunks = await self.chunk_retriever.hybrid_search(
|
||||||
|
query_text=user_query,
|
||||||
|
top_k=top_k,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
document_type="CONFLUENCE_CONNECTOR",
|
||||||
|
)
|
||||||
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
|
confluence_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
query_text=user_query,
|
||||||
|
top_k=top_k,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
document_type="CONFLUENCE_CONNECTOR",
|
||||||
|
)
|
||||||
|
# Transform document retriever results to match expected format
|
||||||
|
confluence_chunks = self._transform_document_results(confluence_chunks)
|
||||||
|
|
||||||
|
# Early return if no results
|
||||||
|
if not confluence_chunks:
|
||||||
|
return {
|
||||||
|
"id": 40,
|
||||||
|
"name": "Confluence",
|
||||||
|
"type": "CONFLUENCE_CONNECTOR",
|
||||||
|
"sources": [],
|
||||||
|
}, []
|
||||||
|
|
||||||
|
# Process each chunk and create sources directly without deduplication
|
||||||
|
sources_list = []
|
||||||
|
async with self.counter_lock:
|
||||||
|
for _i, chunk in enumerate(confluence_chunks):
|
||||||
|
# Extract document metadata
|
||||||
|
document = chunk.get("document", {})
|
||||||
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
|
# Extract Confluence-specific metadata
|
||||||
|
page_title = metadata.get("page_title", "Untitled Page")
|
||||||
|
page_id = metadata.get("page_id", "")
|
||||||
|
space_key = metadata.get("space_key", "")
|
||||||
|
|
||||||
|
# Create a more descriptive title for Confluence pages
|
||||||
|
title = f"Confluence: {page_title}"
|
||||||
|
if space_key:
|
||||||
|
title += f" ({space_key})"
|
||||||
|
|
||||||
|
# Create a more descriptive description for Confluence pages
|
||||||
|
description = chunk.get("content", "")[:100]
|
||||||
|
if len(description) == 100:
|
||||||
|
description += "..."
|
||||||
|
|
||||||
|
# For URL, we can use a placeholder or construct a URL to the Confluence page if available
|
||||||
|
url = ""
|
||||||
|
if page_id:
|
||||||
|
url = f"{metadata.get('base_url')}/pages/{page_id}"
|
||||||
|
|
||||||
|
source = {
|
||||||
|
"id": document.get("id", self.source_id_counter),
|
||||||
|
"title": title,
|
||||||
|
"description": description,
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.source_id_counter += 1
|
||||||
|
sources_list.append(source)
|
||||||
|
|
||||||
|
# Create result object
|
||||||
|
result_object = {
|
||||||
|
"id": 40,
|
||||||
|
"name": "Confluence",
|
||||||
|
"type": "CONFLUENCE_CONNECTOR",
|
||||||
|
"sources": sources_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_object, confluence_chunks
|
||||||
|
|
||||||
async def search_linkup(
|
async def search_linkup(
|
||||||
self, user_query: str, user_id: str, mode: str = "standard"
|
self, user_query: str, user_id: str, mode: str = "standard"
|
||||||
|
|
Loading…
Add table
Reference in a new issue