fix the confluence search implemetation

This commit is contained in:
CREDO23 2025-07-27 13:20:12 +02:00
parent 4b663ea73f
commit dee54bf5e1
3 changed files with 120 additions and 25 deletions

View file

@ -919,6 +919,29 @@ async def fetch_relevant_documents(
)
}
)
elif connector == "CONFLUENCE_CONNECTOR":
source_object, confluence_chunks = await connector_service.search_confluence(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
search_mode=search_mode,
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(confluence_chunks)
# Stream found document count
if streaming_service and writer:
writer(
{
"yield_value": streaming_service.format_terminal_info_delta(
f"📚 Found {len(confluence_chunks)} Confluence pages related to your query"
)
}
)
except Exception as e:
error_message = f"Error searching connector {connector}: {e!s}"

View file

@ -349,31 +349,7 @@ class ConfluenceConnector:
else:
break
# Filter pages by date range
filtered_pages = []
start_datetime = datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
end_datetime = datetime.fromisoformat(end_date + "T23:59:59").replace(tzinfo=timezone.utc)
for page in all_pages:
created_at = page.get("createdAt")
if created_at:
try:
page_date = datetime.fromisoformat(
created_at.replace("Z", "+00:00")
)
if start_datetime <= page_date <= end_datetime:
# Add comments if requested
if include_comments:
page["comments"] = self.get_page_comments(page["id"])
filtered_pages.append(page)
except ValueError:
# Skip pages with invalid date format
continue
if not filtered_pages:
return [], "No pages found in the specified date range."
return filtered_pages, None
return all_pages, None
except Exception as e:
return [], f"Error fetching pages: {e!s}"

View file

@ -1072,6 +1072,102 @@ class ConnectorService:
}
return result_object, jira_chunks
async def search_confluence(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
search_mode: SearchMode = SearchMode.CHUNKS,
) -> tuple:
"""
Search for Confluence pages and return both the source information and langchain documents
Args:
user_query: The user's query
user_id: The user's ID
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
search_mode: Search mode (CHUNKS or DOCUMENTS)
Returns:
tuple: (sources_info, langchain_documents)
"""
if search_mode == SearchMode.CHUNKS:
confluence_chunks = await self.chunk_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="CONFLUENCE_CONNECTOR",
)
elif search_mode == SearchMode.DOCUMENTS:
confluence_chunks = await self.document_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="CONFLUENCE_CONNECTOR",
)
# Transform document retriever results to match expected format
confluence_chunks = self._transform_document_results(confluence_chunks)
# Early return if no results
if not confluence_chunks:
return {
"id": 40,
"name": "Confluence",
"type": "CONFLUENCE_CONNECTOR",
"sources": [],
}, []
# Process each chunk and create sources directly without deduplication
sources_list = []
async with self.counter_lock:
for _i, chunk in enumerate(confluence_chunks):
# Extract document metadata
document = chunk.get("document", {})
metadata = document.get("metadata", {})
# Extract Confluence-specific metadata
page_title = metadata.get("page_title", "Untitled Page")
page_id = metadata.get("page_id", "")
space_key = metadata.get("space_key", "")
# Create a more descriptive title for Confluence pages
title = f"Confluence: {page_title}"
if space_key:
title += f" ({space_key})"
# Create a more descriptive description for Confluence pages
description = chunk.get("content", "")[:100]
if len(description) == 100:
description += "..."
# For URL, we can use a placeholder or construct a URL to the Confluence page if available
url = ""
if page_id:
url = f"{metadata.get('base_url')}/pages/{page_id}"
source = {
"id": document.get("id", self.source_id_counter),
"title": title,
"description": description,
"url": url,
}
self.source_id_counter += 1
sources_list.append(source)
# Create result object
result_object = {
"id": 40,
"name": "Confluence",
"type": "CONFLUENCE_CONNECTOR",
"sources": sources_list,
}
return result_object, confluence_chunks
async def search_linkup(
self, user_query: str, user_id: str, mode: str = "standard"