add coderabbit suggestions

This commit is contained in:
CREDO23 2025-07-25 08:57:32 +02:00
parent 756a429159
commit b4d29ba3a0
6 changed files with 839 additions and 497 deletions

View file

@ -0,0 +1 @@
{"2d0ec64d93969318101ee479b664221b32241665":{"files":{"surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx":["EHKKvlOK0vfy0GgHwlG/J2Bx5rw=",true]},"modified":1753426633288}}

View file

@ -2,32 +2,31 @@ import asyncio
import json import json
from typing import Any, Dict, List from typing import Any, Dict, List
from app.db import Document, SearchSpace
from app.services.connector_service import ConnectorService from app.services.connector_service import ConnectorService
from app.services.query_service import QueryService
from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnableConfig from langchain_core.runnables import RunnableConfig
from sqlalchemy.ext.asyncio import AsyncSession
from .configuration import Configuration, SearchMode
from .prompts import get_answer_outline_system_prompt, get_further_questions_system_prompt
from .state import State
from .sub_section_writer.graph import graph as sub_section_writer_graph
from .sub_section_writer.configuration import SubSectionType
from .qna_agent.graph import graph as qna_agent_graph
from .utils import AnswerOutline, get_connector_emoji, get_connector_friendly_name
from app.services.query_service import QueryService
from langgraph.types import StreamWriter from langgraph.types import StreamWriter
from sqlalchemy.ext.asyncio import AsyncSession
# Additional imports for document fetching # Additional imports for document fetching
from sqlalchemy.future import select from sqlalchemy.future import select
from app.db import Document, SearchSpace
from .configuration import Configuration, SearchMode
from .prompts import (
get_answer_outline_system_prompt,
get_further_questions_system_prompt,
)
from .qna_agent.graph import graph as qna_agent_graph
from .state import State
from .sub_section_writer.configuration import SubSectionType
from .sub_section_writer.graph import graph as sub_section_writer_graph
from .utils import AnswerOutline, get_connector_emoji, get_connector_friendly_name
async def fetch_documents_by_ids( async def fetch_documents_by_ids(
document_ids: List[int], document_ids: List[int], user_id: str, db_session: AsyncSession
user_id: str,
db_session: AsyncSession
) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
""" """
Fetch documents by their IDs with ownership check using DOCUMENTS mode approach. Fetch documents by their IDs with ownership check using DOCUMENTS mode approach.
@ -53,10 +52,7 @@ async def fetch_documents_by_ids(
result = await db_session.execute( result = await db_session.execute(
select(Document) select(Document)
.join(SearchSpace) .join(SearchSpace)
.filter( .filter(Document.id.in_(document_ids), SearchSpace.user_id == user_id)
Document.id.in_(document_ids),
SearchSpace.user_id == user_id
)
) )
documents = result.scalars().all() documents = result.scalars().all()
@ -67,12 +63,17 @@ async def fetch_documents_by_ids(
for doc in documents: for doc in documents:
# Fetch associated chunks for this document (similar to DocumentHybridSearchRetriever) # Fetch associated chunks for this document (similar to DocumentHybridSearchRetriever)
from app.db import Chunk from app.db import Chunk
chunks_query = select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id)
chunks_query = (
select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id)
)
chunks_result = await db_session.execute(chunks_query) chunks_result = await db_session.execute(chunks_query)
chunks = chunks_result.scalars().all() chunks = chunks_result.scalars().all()
# Concatenate chunks content (similar to SearchMode.DOCUMENTS approach) # Concatenate chunks content (similar to SearchMode.DOCUMENTS approach)
concatenated_chunks_content = " ".join([chunk.content for chunk in chunks]) if chunks else doc.content concatenated_chunks_content = (
" ".join([chunk.content for chunk in chunks]) if chunks else doc.content
)
# Format to match connector service return format # Format to match connector service return format
formatted_doc = { formatted_doc = {
@ -82,10 +83,12 @@ async def fetch_documents_by_ids(
"document": { "document": {
"id": doc.id, "id": doc.id,
"title": doc.title, "title": doc.title,
"document_type": doc.document_type.value if doc.document_type else "UNKNOWN", "document_type": (
doc.document_type.value if doc.document_type else "UNKNOWN"
),
"metadata": doc.document_metadata or {}, "metadata": doc.document_metadata or {},
}, },
"source": doc.document_type.value if doc.document_type else "UNKNOWN" "source": doc.document_type.value if doc.document_type else "UNKNOWN",
} }
formatted_documents.append(formatted_doc) formatted_documents.append(formatted_doc)
@ -97,7 +100,9 @@ async def fetch_documents_by_ids(
# Create source objects for each document type (similar to ConnectorService) # Create source objects for each document type (similar to ConnectorService)
source_objects = [] source_objects = []
connector_id_counter = 100 # Start from 100 to avoid conflicts with regular connectors connector_id_counter = (
100 # Start from 100 to avoid conflicts with regular connectors
)
for doc_type, docs in documents_by_type.items(): for doc_type, docs in documents_by_type.items():
sources_list = [] sources_list = []
@ -108,76 +113,126 @@ async def fetch_documents_by_ids(
# Create type-specific source formatting (similar to ConnectorService) # Create type-specific source formatting (similar to ConnectorService)
if doc_type == "LINEAR_CONNECTOR": if doc_type == "LINEAR_CONNECTOR":
# Extract Linear-specific metadata # Extract Linear-specific metadata
issue_identifier = metadata.get('issue_identifier', '') issue_identifier = metadata.get("issue_identifier", "")
issue_title = metadata.get('issue_title', doc.title) issue_title = metadata.get("issue_title", doc.title)
issue_state = metadata.get('state', '') issue_state = metadata.get("state", "")
comment_count = metadata.get('comment_count', 0) comment_count = metadata.get("comment_count", 0)
# Create a more descriptive title for Linear issues # Create a more descriptive title for Linear issues
title = f"Linear: {issue_identifier} - {issue_title}" if issue_identifier else f"Linear: {issue_title}" title = (
f"Linear: {issue_identifier} - {issue_title}"
if issue_identifier
else f"Linear: {issue_title}"
)
if issue_state: if issue_state:
title += f" ({issue_state})" title += f" ({issue_state})"
# Create description # Create description
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
if comment_count: if comment_count:
description += f" | Comments: {comment_count}" description += f" | Comments: {comment_count}"
# Create URL # Create URL
url = f"https://linear.app/issue/{issue_identifier}" if issue_identifier else "" url = (
f"https://linear.app/issue/{issue_identifier}"
if issue_identifier
else ""
)
elif doc_type == "SLACK_CONNECTOR": elif doc_type == "SLACK_CONNECTOR":
# Extract Slack-specific metadata # Extract Slack-specific metadata
channel_name = metadata.get('channel_name', 'Unknown Channel') channel_name = metadata.get("channel_name", "Unknown Channel")
channel_id = metadata.get('channel_id', '') channel_id = metadata.get("channel_id", "")
message_date = metadata.get('start_date', '') message_date = metadata.get("start_date", "")
title = f"Slack: {channel_name}" title = f"Slack: {channel_name}"
if message_date: if message_date:
title += f" ({message_date})" title += f" ({message_date})"
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
url = f"https://slack.com/app_redirect?channel={channel_id}" if channel_id else "" doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
url = (
f"https://slack.com/app_redirect?channel={channel_id}"
if channel_id
else ""
)
elif doc_type == "NOTION_CONNECTOR": elif doc_type == "NOTION_CONNECTOR":
# Extract Notion-specific metadata # Extract Notion-specific metadata
page_title = metadata.get('page_title', doc.title) page_title = metadata.get("page_title", doc.title)
page_id = metadata.get('page_id', '') page_id = metadata.get("page_id", "")
title = f"Notion: {page_title}" title = f"Notion: {page_title}"
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
url = f"https://notion.so/{page_id.replace('-', '')}" if page_id else "" doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
url = (
f"https://notion.so/{page_id.replace('-', '')}"
if page_id
else ""
)
elif doc_type == "GITHUB_CONNECTOR": elif doc_type == "GITHUB_CONNECTOR":
title = f"GitHub: {doc.title}" title = f"GitHub: {doc.title}"
description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content) description = metadata.get(
url = metadata.get('url', '') "description",
(
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
),
)
url = metadata.get("url", "")
elif doc_type == "YOUTUBE_VIDEO": elif doc_type == "YOUTUBE_VIDEO":
# Extract YouTube-specific metadata # Extract YouTube-specific metadata
video_title = metadata.get('video_title', doc.title) video_title = metadata.get("video_title", doc.title)
video_id = metadata.get('video_id', '') video_id = metadata.get("video_id", "")
channel_name = metadata.get('channel_name', '') channel_name = metadata.get("channel_name", "")
title = video_title title = video_title
if channel_name: if channel_name:
title += f" - {channel_name}" title += f" - {channel_name}"
description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content) description = metadata.get(
url = f"https://www.youtube.com/watch?v={video_id}" if video_id else "" "description",
(
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
),
)
url = (
f"https://www.youtube.com/watch?v={video_id}"
if video_id
else ""
)
elif doc_type == "DISCORD_CONNECTOR": elif doc_type == "DISCORD_CONNECTOR":
# Extract Discord-specific metadata # Extract Discord-specific metadata
channel_name = metadata.get('channel_name', 'Unknown Channel') channel_name = metadata.get("channel_name", "Unknown Channel")
channel_id = metadata.get('channel_id', '') channel_id = metadata.get("channel_id", "")
guild_id = metadata.get('guild_id', '') guild_id = metadata.get("guild_id", "")
message_date = metadata.get('start_date', '') message_date = metadata.get("start_date", "")
title = f"Discord: {channel_name}" title = f"Discord: {channel_name}"
if message_date: if message_date:
title += f" ({message_date})" title += f" ({message_date})"
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
if guild_id and channel_id: if guild_id and channel_id:
url = f"https://discord.com/channels/{guild_id}/{channel_id}" url = f"https://discord.com/channels/{guild_id}/{channel_id}"
@ -188,20 +243,28 @@ async def fetch_documents_by_ids(
elif doc_type == "JIRA_CONNECTOR": elif doc_type == "JIRA_CONNECTOR":
# Extract Jira-specific metadata # Extract Jira-specific metadata
issue_key = metadata.get('issue_key', 'Unknown Issue') issue_key = metadata.get("issue_key", "Unknown Issue")
issue_title = metadata.get('issue_title', 'Untitled Issue') issue_title = metadata.get("issue_title", "Untitled Issue")
status = metadata.get('status', '') status = metadata.get("status", "")
priority = metadata.get('priority', '') priority = metadata.get("priority", "")
issue_type = metadata.get('issue_type', '') issue_type = metadata.get("issue_type", "")
title = f"Jira: {issue_key} - {issue_title}" title = f"Jira: {issue_key} - {issue_title}"
if status: if status:
title += f" ({status})" title += f" ({status})"
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
if priority:
description += f" | Priority: {priority}"
if issue_type:
description += f" | Type: {issue_type}"
# Construct Jira URL if we have the base URL # Construct Jira URL if we have the base URL
base_url = metadata.get('base_url', '') base_url = metadata.get("base_url", "")
if base_url and issue_key: if base_url and issue_key:
url = f"{base_url}/browse/{issue_key}" url = f"{base_url}/browse/{issue_key}"
else: else:
@ -209,34 +272,58 @@ async def fetch_documents_by_ids(
elif doc_type == "EXTENSION": elif doc_type == "EXTENSION":
# Extract Extension-specific metadata # Extract Extension-specific metadata
webpage_title = metadata.get('VisitedWebPageTitle', doc.title) webpage_title = metadata.get("VisitedWebPageTitle", doc.title)
webpage_url = metadata.get('VisitedWebPageURL', '') webpage_url = metadata.get("VisitedWebPageURL", "")
visit_date = metadata.get('VisitedWebPageDateWithTimeInISOString', '') visit_date = metadata.get(
"VisitedWebPageDateWithTimeInISOString", ""
)
title = webpage_title title = webpage_title
if visit_date: if visit_date:
formatted_date = visit_date.split('T')[0] if 'T' in visit_date else visit_date formatted_date = (
visit_date.split("T")[0]
if "T" in visit_date
else visit_date
)
title += f" (visited: {formatted_date})" title += f" (visited: {formatted_date})"
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
url = webpage_url url = webpage_url
elif doc_type == "CRAWLED_URL": elif doc_type == "CRAWLED_URL":
title = doc.title title = doc.title
description = metadata.get('og:description', metadata.get('ogDescription', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content)) description = metadata.get(
url = metadata.get('url', '') "og:description",
metadata.get(
"ogDescription",
(
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
),
),
)
url = metadata.get("url", "")
else: # FILE and other types else: # FILE and other types
title = doc.title title = doc.title
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content description = (
url = metadata.get('url', '') doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
url = metadata.get("url", "")
# Create source entry # Create source entry
source = { source = {
"id": doc.id, "id": doc.id,
"title": title, "title": title,
"description": description, "description": description,
"url": url "url": url,
} }
sources_list.append(source) sources_list.append(source)
@ -251,7 +338,7 @@ async def fetch_documents_by_ids(
"JIRA_CONNECTOR": "Jira Issues (Selected)", "JIRA_CONNECTOR": "Jira Issues (Selected)",
"EXTENSION": "Browser Extension (Selected)", "EXTENSION": "Browser Extension (Selected)",
"CRAWLED_URL": "Web Pages (Selected)", "CRAWLED_URL": "Web Pages (Selected)",
"FILE": "Files (Selected)" "FILE": "Files (Selected)",
} }
source_object = { source_object = {
@ -263,7 +350,9 @@ async def fetch_documents_by_ids(
source_objects.append(source_object) source_objects.append(source_object)
connector_id_counter += 1 connector_id_counter += 1
print(f"Fetched {len(formatted_documents)} user-selected documents (with concatenated chunks) from {len(document_ids)} requested IDs") print(
f"Fetched {len(formatted_documents)} user-selected documents (with concatenated chunks) from {len(document_ids)} requested IDs"
)
print(f"Created {len(source_objects)} source objects for UI display") print(f"Created {len(source_objects)} source objects for UI display")
return source_objects, formatted_documents return source_objects, formatted_documents
@ -273,7 +362,9 @@ async def fetch_documents_by_ids(
return [], [] return [], []
async def write_answer_outline(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]: async def write_answer_outline(
state: State, config: RunnableConfig, writer: StreamWriter
) -> Dict[str, Any]:
""" """
Create a structured answer outline based on the user query. Create a structured answer outline based on the user query.
@ -285,7 +376,6 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
Dict containing the answer outline in the "answer_outline" key for state update. Dict containing the answer outline in the "answer_outline" key for state update.
""" """
from app.services.llm_service import get_user_strategic_llm from app.services.llm_service import get_user_strategic_llm
from app.db import get_async_session
streaming_service = state.streaming_service streaming_service = state.streaming_service
@ -353,7 +443,7 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
# Create messages for the LLM # Create messages for the LLM
messages = [ messages = [
SystemMessage(content=get_answer_outline_system_prompt()), SystemMessage(content=get_answer_outline_system_prompt()),
HumanMessage(content=human_message_content) HumanMessage(content=human_message_content),
] ]
# Call the LLM directly without using structured output # Call the LLM directly without using structured output
@ -373,8 +463,8 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
content = response.content content = response.content
# Find the JSON in the content (handle case where LLM might add additional text) # Find the JSON in the content (handle case where LLM might add additional text)
json_start = content.find('{') json_start = content.find("{")
json_end = content.rfind('}') + 1 json_end = content.rfind("}") + 1
if json_start >= 0 and json_end > json_start: if json_start >= 0 and json_end > json_start:
json_str = content[json_start:json_end] json_str = content[json_start:json_end]
@ -384,7 +474,9 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
# Convert to Pydantic model # Convert to Pydantic model
answer_outline = AnswerOutline(**parsed_data) answer_outline = AnswerOutline(**parsed_data)
total_questions = sum(len(section.questions) for section in answer_outline.answer_outline) total_questions = sum(
len(section.questions) for section in answer_outline.answer_outline
)
writer( writer(
{ {
@ -429,7 +521,7 @@ async def fetch_relevant_documents(
top_k: int = 10, top_k: int = 10,
connector_service: ConnectorService = None, connector_service: ConnectorService = None,
search_mode: SearchMode = SearchMode.CHUNKS, search_mode: SearchMode = SearchMode.CHUNKS,
user_selected_sources: List[Dict[str, Any]] = None user_selected_sources: List[Dict[str, Any]] = None,
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
Fetch relevant documents for research questions using the provided connectors. Fetch relevant documents for research questions using the provided connectors.
@ -461,7 +553,9 @@ async def fetch_relevant_documents(
# Stream initial status update # Stream initial status update
if streaming_service and writer: if streaming_service and writer:
connector_names = [get_connector_friendly_name(connector) for connector in connectors_to_search] connector_names = [
get_connector_friendly_name(connector) for connector in connectors_to_search
]
connector_names_str = ", ".join(connector_names) connector_names_str = ", ".join(connector_names)
writer( writer(
{ {
@ -504,12 +598,15 @@ async def fetch_relevant_documents(
try: try:
if connector == "YOUTUBE_VIDEO": if connector == "YOUTUBE_VIDEO":
source_object, youtube_chunks = await connector_service.search_youtube( (
source_object,
youtube_chunks,
) = await connector_service.search_youtube(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -528,12 +625,15 @@ async def fetch_relevant_documents(
) )
elif connector == "EXTENSION": elif connector == "EXTENSION":
source_object, extension_chunks = await connector_service.search_extension( (
source_object,
extension_chunks,
) = await connector_service.search_extension(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -552,12 +652,15 @@ async def fetch_relevant_documents(
) )
elif connector == "CRAWLED_URL": elif connector == "CRAWLED_URL":
source_object, crawled_urls_chunks = await connector_service.search_crawled_urls( (
source_object,
crawled_urls_chunks,
) = await connector_service.search_crawled_urls(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -581,7 +684,7 @@ async def fetch_relevant_documents(
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -605,7 +708,7 @@ async def fetch_relevant_documents(
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -624,12 +727,15 @@ async def fetch_relevant_documents(
) )
elif connector == "NOTION_CONNECTOR": elif connector == "NOTION_CONNECTOR":
source_object, notion_chunks = await connector_service.search_notion( (
source_object,
notion_chunks,
) = await connector_service.search_notion(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -648,12 +754,15 @@ async def fetch_relevant_documents(
) )
elif connector == "GITHUB_CONNECTOR": elif connector == "GITHUB_CONNECTOR":
source_object, github_chunks = await connector_service.search_github( (
source_object,
github_chunks,
) = await connector_service.search_github(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -672,12 +781,15 @@ async def fetch_relevant_documents(
) )
elif connector == "LINEAR_CONNECTOR": elif connector == "LINEAR_CONNECTOR":
source_object, linear_chunks = await connector_service.search_linear( (
source_object,
linear_chunks,
) = await connector_service.search_linear(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -696,10 +808,11 @@ async def fetch_relevant_documents(
) )
elif connector == "TAVILY_API": elif connector == "TAVILY_API":
source_object, tavily_chunks = await connector_service.search_tavily( (
user_query=reformulated_query, source_object,
user_id=user_id, tavily_chunks,
top_k=top_k ) = await connector_service.search_tavily(
user_query=reformulated_query, user_id=user_id, top_k=top_k
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -723,7 +836,7 @@ async def fetch_relevant_documents(
source_object, linkup_chunks = await connector_service.search_linkup( source_object, linkup_chunks = await connector_service.search_linkup(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
mode=linkup_mode mode=linkup_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -742,12 +855,15 @@ async def fetch_relevant_documents(
) )
elif connector == "DISCORD_CONNECTOR": elif connector == "DISCORD_CONNECTOR":
source_object, discord_chunks = await connector_service.search_discord( (
source_object,
discord_chunks,
) = await connector_service.search_discord(
user_query=reformulated_query, user_query=reformulated_query,
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
if source_object: if source_object:
@ -769,7 +885,7 @@ async def fetch_relevant_documents(
user_id=user_id, user_id=user_id,
search_space_id=search_space_id, search_space_id=search_space_id,
top_k=top_k, top_k=top_k,
search_mode=search_mode search_mode=search_mode,
) )
# Add to sources and raw documents # Add to sources and raw documents
@ -812,8 +928,8 @@ async def fetch_relevant_documents(
# First add user-selected sources (if any) # First add user-selected sources (if any)
if user_selected_sources: if user_selected_sources:
for source_obj in user_selected_sources: for source_obj in user_selected_sources:
source_id = source_obj.get('id') source_id = source_obj.get("id")
source_type = source_obj.get('type') source_type = source_obj.get("type")
if source_id and source_type: if source_id and source_type:
source_key = f"{source_type}_{source_id}" source_key = f"{source_type}_{source_id}"
@ -827,8 +943,8 @@ async def fetch_relevant_documents(
for source_obj in all_sources: for source_obj in all_sources:
# Use combination of source ID and type as a unique identifier # Use combination of source ID and type as a unique identifier
# This ensures we don't accidentally deduplicate sources from different connectors # This ensures we don't accidentally deduplicate sources from different connectors
source_id = source_obj.get('id') source_id = source_obj.get("id")
source_type = source_obj.get('type') source_type = source_obj.get("type")
if source_id and source_type: if source_id and source_type:
source_key = f"{source_type}_{source_id}" source_key = f"{source_type}_{source_id}"
@ -877,7 +993,13 @@ async def fetch_relevant_documents(
# After all sources are collected and deduplicated, stream them # After all sources are collected and deduplicated, stream them
if streaming_service and writer: if streaming_service and writer:
writer({"yield_value": streaming_service.format_sources_delta(deduplicated_sources)}) writer(
{
"yield_value": streaming_service.format_sources_delta(
deduplicated_sources
)
}
)
# Deduplicate raw documents based on chunk_id or content # Deduplicate raw documents based on chunk_id or content
seen_chunk_ids = set() seen_chunk_ids = set()
@ -890,7 +1012,9 @@ async def fetch_relevant_documents(
content_hash = hash(content) content_hash = hash(content)
# Skip if we've seen this chunk_id or content before # Skip if we've seen this chunk_id or content before
if (chunk_id and chunk_id in seen_chunk_ids) or content_hash in seen_content_hashes: if (
chunk_id and chunk_id in seen_chunk_ids
) or content_hash in seen_content_hashes:
continue continue
# Add to our tracking sets and keep this document # Add to our tracking sets and keep this document
@ -913,7 +1037,9 @@ async def fetch_relevant_documents(
return deduplicated_docs return deduplicated_docs
async def process_sections(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]: async def process_sections(
state: State, config: RunnableConfig, writer: StreamWriter
) -> Dict[str, Any]:
""" """
Process all sections in parallel and combine the results. Process all sections in parallel and combine the results.
@ -997,10 +1123,13 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
} }
) )
user_selected_sources, user_selected_documents = await fetch_documents_by_ids( (
user_selected_sources,
user_selected_documents,
) = await fetch_documents_by_ids(
document_ids=configuration.document_ids_to_add_in_context, document_ids=configuration.document_ids_to_add_in_context,
user_id=configuration.user_id, user_id=configuration.user_id,
db_session=state.db_session db_session=state.db_session,
) )
if user_selected_documents: if user_selected_documents:
@ -1013,7 +1142,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
) )
# Create connector service using state db_session # Create connector service using state db_session
connector_service = ConnectorService(state.db_session, user_id=configuration.user_id) connector_service = ConnectorService(
state.db_session, user_id=configuration.user_id
)
await connector_service.initialize_counter() await connector_service.initialize_counter()
relevant_documents = await fetch_relevant_documents( relevant_documents = await fetch_relevant_documents(
@ -1027,7 +1158,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
top_k=TOP_K, top_k=TOP_K,
connector_service=connector_service, connector_service=connector_service,
search_mode=configuration.search_mode, search_mode=configuration.search_mode,
user_selected_sources=user_selected_sources user_selected_sources=user_selected_sources,
) )
except Exception as e: except Exception as e:
error_message = f"Error fetching relevant documents: {str(e)}" error_message = f"Error fetching relevant documents: {str(e)}"
@ -1041,7 +1172,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
all_documents = user_selected_documents + relevant_documents all_documents = user_selected_documents + relevant_documents
print(f"Fetched {len(relevant_documents)} relevant documents for all sections") print(f"Fetched {len(relevant_documents)} relevant documents for all sections")
print(f"Added {len(user_selected_documents)} user-selected documents for all sections") print(
f"Added {len(user_selected_documents)} user-selected documents for all sections"
)
print(f"Total documents for sections: {len(all_documents)}") print(f"Total documents for sections: {len(all_documents)}")
writer( writer(
@ -1074,7 +1207,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
section_contents[i] = { section_contents[i] = {
"title": section.section_title, "title": section.section_title,
"content": "", "content": "",
"index": i "index": i,
} }
section_tasks.append( section_tasks.append(
@ -1089,7 +1222,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
state=state, state=state,
writer=writer, writer=writer,
sub_section_type=sub_section_type, sub_section_type=sub_section_type,
section_contents=section_contents section_contents=section_contents,
) )
) )
@ -1127,7 +1260,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
# Combine the results into a final report with section titles # Combine the results into a final report with section titles
final_report = [] final_report = []
for i, (section, content) in enumerate(zip(answer_outline.answer_outline, processed_results)): for i, (section, content) in enumerate(
zip(answer_outline.answer_outline, processed_results)
):
# Skip adding the section header since the content already contains the title # Skip adding the section header since the content already contains the title
final_report.append(content) final_report.append(content)
final_report.append("\n") final_report.append("\n")
@ -1135,11 +1270,12 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
# Stream each section with its title # Stream each section with its title
writer( writer(
{ {
"yield_value": state.streaming_service.format_text_chunk(f"# {section.section_title}\n\n{content}") "yield_value": state.streaming_service.format_text_chunk(
f"# {section.section_title}\n\n{content}"
)
} }
) )
# Join all sections with newlines # Join all sections with newlines
final_written_report = "\n".join(final_report) final_written_report = "\n".join(final_report)
print(f"Generated final report with {len(final_report)} parts") print(f"Generated final report with {len(final_report)} parts")
@ -1156,7 +1292,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
# Since all sections used the same document pool, we can use it directly # Since all sections used the same document pool, we can use it directly
return { return {
"final_written_report": final_written_report, "final_written_report": final_written_report,
"reranked_documents": all_documents "reranked_documents": all_documents,
} }
@ -1171,7 +1307,7 @@ async def process_section_with_documents(
state: State = None, state: State = None,
writer: StreamWriter = None, writer: StreamWriter = None,
sub_section_type: SubSectionType = SubSectionType.MIDDLE, sub_section_type: SubSectionType = SubSectionType.MIDDLE,
section_contents: Dict[int, Dict[str, Any]] = None section_contents: Dict[int, Dict[str, Any]] = None,
) -> str: ) -> str:
""" """
Process a single section using pre-fetched documents. Process a single section using pre-fetched documents.
@ -1236,10 +1372,7 @@ async def process_section_with_documents(
} }
# Create the initial state with db_session and chat_history # Create the initial state with db_session and chat_history
sub_state = { sub_state = {"db_session": state.db_session, "chat_history": state.chat_history}
"db_session": state.db_session,
"chat_history": state.chat_history
}
# Invoke the sub-section writer graph with streaming # Invoke the sub-section writer graph with streaming
print(f"Invoking sub_section_writer for: {section_title}") print(f"Invoking sub_section_writer for: {section_title}")
@ -1255,12 +1388,14 @@ async def process_section_with_documents(
# Variables to track streaming state # Variables to track streaming state
complete_content = "" # Tracks the complete content received so far complete_content = "" # Tracks the complete content received so far
async for chunk_type, chunk in sub_section_writer_graph.astream(sub_state, config, stream_mode=["values"]): async for chunk_type, chunk in sub_section_writer_graph.astream(
sub_state, config, stream_mode=["values"]
):
if "final_answer" in chunk: if "final_answer" in chunk:
new_content = chunk["final_answer"] new_content = chunk["final_answer"]
if new_content and new_content != complete_content: if new_content and new_content != complete_content:
# Extract only the new content (delta) # Extract only the new content (delta)
delta = new_content[len(complete_content):] delta = new_content[len(complete_content) :]
# Update what we've processed so far # Update what we've processed so far
complete_content = new_content complete_content = new_content
@ -1284,15 +1419,18 @@ async def process_section_with_documents(
for i in range(len(section_contents)): for i in range(len(section_contents)):
if i in section_contents and section_contents[i]["content"]: if i in section_contents and section_contents[i]["content"]:
# Add section header # Add section header
complete_answer.append(f"# {section_contents[i]['title']}") complete_answer.append(
f"# {section_contents[i]['title']}"
)
complete_answer.append("") # Empty line after title complete_answer.append("") # Empty line after title
# Add section content # Add section content
content_lines = section_contents[i]["content"].split("\n") content_lines = section_contents[i]["content"].split(
"\n"
)
complete_answer.extend(content_lines) complete_answer.extend(content_lines)
complete_answer.append("") # Empty line after content complete_answer.append("") # Empty line after content
# Set default if no content was received # Set default if no content was received
if not complete_content: if not complete_content:
complete_content = "No content was generated for this section." complete_content = "No content was generated for this section."
@ -1325,25 +1463,34 @@ async def process_section_with_documents(
return f"Error processing section: {section_title}. Details: {str(e)}" return f"Error processing section: {section_title}. Details: {str(e)}"
async def reformulate_user_query(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]: async def reformulate_user_query(
state: State, config: RunnableConfig, writer: StreamWriter
) -> Dict[str, Any]:
""" """
Reforms the user query based on the chat history. Reforms the user query based on the chat history.
""" """
configuration = Configuration.from_runnable_config(config) configuration = Configuration.from_runnable_config(config)
user_query = configuration.user_query user_query = configuration.user_query
chat_history_str = await QueryService.langchain_chat_history_to_str(state.chat_history) chat_history_str = await QueryService.langchain_chat_history_to_str(
state.chat_history
)
if len(state.chat_history) == 0: if len(state.chat_history) == 0:
reformulated_query = user_query reformulated_query = user_query
else: else:
reformulated_query = await QueryService.reformulate_query_with_chat_history(user_query=user_query, session=state.db_session, user_id=configuration.user_id, chat_history_str=chat_history_str) reformulated_query = await QueryService.reformulate_query_with_chat_history(
user_query=user_query,
session=state.db_session,
user_id=configuration.user_id,
chat_history_str=chat_history_str,
)
return { return {"reformulated_query": reformulated_query}
"reformulated_query": reformulated_query
}
async def handle_qna_workflow(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]: async def handle_qna_workflow(
state: State, config: RunnableConfig, writer: StreamWriter
) -> Dict[str, Any]:
""" """
Handle the QNA research workflow. Handle the QNA research workflow.
@ -1402,10 +1549,13 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
} }
) )
user_selected_sources, user_selected_documents = await fetch_documents_by_ids( (
user_selected_sources,
user_selected_documents,
) = await fetch_documents_by_ids(
document_ids=configuration.document_ids_to_add_in_context, document_ids=configuration.document_ids_to_add_in_context,
user_id=configuration.user_id, user_id=configuration.user_id,
db_session=state.db_session db_session=state.db_session,
) )
if user_selected_documents: if user_selected_documents:
@ -1418,7 +1568,9 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
) )
# Create connector service using state db_session # Create connector service using state db_session
connector_service = ConnectorService(state.db_session, user_id=configuration.user_id) connector_service = ConnectorService(
state.db_session, user_id=configuration.user_id
)
await connector_service.initialize_counter() await connector_service.initialize_counter()
# Use the reformulated query as a single research question # Use the reformulated query as a single research question
@ -1435,7 +1587,7 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
top_k=TOP_K, top_k=TOP_K,
connector_service=connector_service, connector_service=connector_service,
search_mode=configuration.search_mode, search_mode=configuration.search_mode,
user_selected_sources=user_selected_sources user_selected_sources=user_selected_sources,
) )
except Exception as e: except Exception as e:
error_message = f"Error fetching relevant documents for QNA: {str(e)}" error_message = f"Error fetching relevant documents for QNA: {str(e)}"
@ -1466,15 +1618,12 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
"reformulated_query": reformulated_query, "reformulated_query": reformulated_query,
"relevant_documents": all_documents, # Use combined documents "relevant_documents": all_documents, # Use combined documents
"user_id": configuration.user_id, "user_id": configuration.user_id,
"search_space_id": configuration.search_space_id "search_space_id": configuration.search_space_id,
} }
} }
# Create the state for the QNA agent (it has a different state structure) # Create the state for the QNA agent (it has a different state structure)
qna_state = { qna_state = {"db_session": state.db_session, "chat_history": state.chat_history}
"db_session": state.db_session,
"chat_history": state.chat_history
}
try: try:
writer( writer(
@ -1490,12 +1639,14 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
captured_reranked_documents = [] captured_reranked_documents = []
# Call the QNA agent with streaming # Call the QNA agent with streaming
async for _chunk_type, chunk in qna_agent_graph.astream(qna_state, qna_config, stream_mode=["values"]): async for _chunk_type, chunk in qna_agent_graph.astream(
qna_state, qna_config, stream_mode=["values"]
):
if "final_answer" in chunk: if "final_answer" in chunk:
new_content = chunk["final_answer"] new_content = chunk["final_answer"]
if new_content and new_content != complete_content: if new_content and new_content != complete_content:
# Extract only the new content (delta) # Extract only the new content (delta)
delta = new_content[len(complete_content):] delta = new_content[len(complete_content) :]
complete_content = new_content complete_content = new_content
# Stream the real-time answer if there's new content # Stream the real-time answer if there's new content
@ -1533,7 +1684,7 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
# Return the final answer and captured reranked documents for further question generation # Return the final answer and captured reranked documents for further question generation
return { return {
"final_written_report": complete_content, "final_written_report": complete_content,
"reranked_documents": captured_reranked_documents "reranked_documents": captured_reranked_documents,
} }
except Exception as e: except Exception as e:
@ -1544,7 +1695,9 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
return {"final_written_report": f"Error generating answer: {str(e)}"} return {"final_written_report": f"Error generating answer: {str(e)}"}
async def generate_further_questions(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]: async def generate_further_questions(
state: State, config: RunnableConfig, writer: StreamWriter
) -> Dict[str, Any]:
""" """
Generate contextually relevant follow-up questions based on chat history and available documents. Generate contextually relevant follow-up questions based on chat history and available documents.
@ -1564,7 +1717,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
streaming_service = state.streaming_service streaming_service = state.streaming_service
# Get reranked documents from the state (will be populated by sub-agents) # Get reranked documents from the state (will be populated by sub-agents)
reranked_documents = getattr(state, 'reranked_documents', None) or [] reranked_documents = getattr(state, "reranked_documents", None) or []
writer( writer(
{ {
@ -1588,7 +1741,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
# Format chat history for the prompt # Format chat history for the prompt
chat_history_xml = "<chat_history>\n" chat_history_xml = "<chat_history>\n"
for message in chat_history: for message in chat_history:
if hasattr(message, 'type'): if hasattr(message, "type"):
if message.type == "human": if message.type == "human":
chat_history_xml += f"<user>{message.content}</user>\n" chat_history_xml += f"<user>{message.content}</user>\n"
elif message.type == "ai": elif message.type == "ai":
@ -1606,13 +1759,13 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
source_type = document_info.get("document_type", "UNKNOWN") source_type = document_info.get("document_type", "UNKNOWN")
content = doc.get("content", "") content = doc.get("content", "")
documents_xml += f"<document>\n" documents_xml += "<document>\n"
documents_xml += f"<metadata>\n" documents_xml += "<metadata>\n"
documents_xml += f"<source_id>{source_id}</source_id>\n" documents_xml += f"<source_id>{source_id}</source_id>\n"
documents_xml += f"<source_type>{source_type}</source_type>\n" documents_xml += f"<source_type>{source_type}</source_type>\n"
documents_xml += f"</metadata>\n" documents_xml += "</metadata>\n"
documents_xml += f"<content>\n{content}</content>\n" documents_xml += f"<content>\n{content}</content>\n"
documents_xml += f"</document>\n" documents_xml += "</document>\n"
documents_xml += "</documents>" documents_xml += "</documents>"
# Create the human message content # Create the human message content
@ -1651,7 +1804,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
# Create messages for the LLM # Create messages for the LLM
messages = [ messages = [
SystemMessage(content=get_further_questions_system_prompt()), SystemMessage(content=get_further_questions_system_prompt()),
HumanMessage(content=human_message_content) HumanMessage(content=human_message_content),
] ]
try: try:
@ -1662,8 +1815,8 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
content = response.content content = response.content
# Find the JSON in the content # Find the JSON in the content
json_start = content.find('{') json_start = content.find("{")
json_end = content.rfind('}') + 1 json_end = content.rfind("}") + 1
if json_start >= 0 and json_end > json_start: if json_start >= 0 and json_end > json_start:
json_str = content[json_start:json_end] json_str = content[json_start:json_end]

View file

@ -6,7 +6,6 @@ Allows fetching issue lists and their comments, projects and more.
""" """
import base64 import base64
import json
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@ -119,8 +118,6 @@ class JiraConnector:
response = requests.get(url, headers=headers, params=params, timeout=500) response = requests.get(url, headers=headers, params=params, timeout=500)
print(json.dumps(response.json(), indent=2))
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
else: else:
@ -227,6 +224,7 @@ class JiraConnector:
date_filter = ( date_filter = (
f"(createdDate >= '{start_date}' AND createdDate <= '{end_date}')" f"(createdDate >= '{start_date}' AND createdDate <= '{end_date}')"
) )
# TODO : This JQL needs some improvement to work as expected
jql = f"{date_filter}" jql = f"{date_filter}"
if project_key: if project_key:
@ -252,7 +250,7 @@ class JiraConnector:
fields.append("comment") fields.append("comment")
params = { params = {
"jql": "", "jql": "", # TODO : Add a JQL query to filter from a date range
"fields": ",".join(fields), "fields": ",".join(fields),
"maxResults": 100, "maxResults": 100,
"startAt": 0, "startAt": 0,
@ -263,10 +261,8 @@ class JiraConnector:
while True: while True:
params["startAt"] = start_at params["startAt"] = start_at
print(json.dumps(params, indent=2))
result = self.make_api_request("search", params)
print(json.dumps(result, indent=2)) result = self.make_api_request("search", params)
if not isinstance(result, dict) or "issues" not in result: if not isinstance(result, dict) or "issues" not in result:
return [], "Invalid response from Jira API" return [], "Invalid response from Jira API"

View file

@ -9,35 +9,58 @@ POST /search-source-connectors/{connector_id}/index - Index content from a conne
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR, DISCORD_CONNECTOR). Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR, DISCORD_CONNECTOR).
""" """
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
from sqlalchemy.ext.asyncio import AsyncSession import logging
from sqlalchemy.future import select from datetime import datetime, timedelta
from sqlalchemy.exc import IntegrityError from typing import Any, Dict, List
from typing import List, Dict, Any
from app.db import get_async_session, User, SearchSourceConnector, SearchSourceConnectorType, SearchSpace, async_session_maker from app.connectors.github_connector import GitHubConnector
from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead, SearchSourceConnectorBase from app.db import (
SearchSourceConnector,
SearchSourceConnectorType,
SearchSpace,
User,
async_session_maker,
get_async_session,
)
from app.schemas import (
SearchSourceConnectorBase,
SearchSourceConnectorCreate,
SearchSourceConnectorRead,
SearchSourceConnectorUpdate,
)
from app.tasks.connectors_indexing_tasks import (
index_discord_messages,
index_github_repos,
index_jira_issues,
index_linear_issues,
index_notion_pages,
index_slack_messages,
)
from app.users import current_active_user from app.users import current_active_user
from app.utils.check_ownership import check_ownership from app.utils.check_ownership import check_ownership
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query
from pydantic import BaseModel, Field, ValidationError from pydantic import BaseModel, Field, ValidationError
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos, index_linear_issues, index_discord_messages, index_jira_issues from sqlalchemy.exc import IntegrityError
from app.connectors.github_connector import GitHubConnector from sqlalchemy.ext.asyncio import AsyncSession
from datetime import datetime, timedelta from sqlalchemy.future import select
import logging
# Set up logging # Set up logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
router = APIRouter() router = APIRouter()
# Use Pydantic's BaseModel here # Use Pydantic's BaseModel here
class GitHubPATRequest(BaseModel): class GitHubPATRequest(BaseModel):
github_pat: str = Field(..., description="GitHub Personal Access Token") github_pat: str = Field(..., description="GitHub Personal Access Token")
# --- New Endpoint to list GitHub Repositories --- # --- New Endpoint to list GitHub Repositories ---
@router.post("/github/repositories/", response_model=List[Dict[str, Any]]) @router.post("/github/repositories/", response_model=List[Dict[str, Any]])
async def list_github_repositories( async def list_github_repositories(
pat_request: GitHubPATRequest, pat_request: GitHubPATRequest,
user: User = Depends(current_active_user) # Ensure the user is logged in user: User = Depends(current_active_user), # Ensure the user is logged in
): ):
""" """
Fetches a list of repositories accessible by the provided GitHub PAT. Fetches a list of repositories accessible by the provided GitHub PAT.
@ -54,14 +77,19 @@ async def list_github_repositories(
logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}") logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}")
raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}") raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}")
except Exception as e: except Exception as e:
logger.error(f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}") logger.error(
raise HTTPException(status_code=500, detail="Failed to fetch GitHub repositories.") f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}"
)
raise HTTPException(
status_code=500, detail="Failed to fetch GitHub repositories."
)
@router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead) @router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead)
async def create_search_source_connector( async def create_search_source_connector(
connector: SearchSourceConnectorCreate, connector: SearchSourceConnectorCreate,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user) user: User = Depends(current_active_user),
): ):
""" """
Create a new search source connector. Create a new search source connector.
@ -72,17 +100,16 @@ async def create_search_source_connector(
try: try:
# Check if a connector with the same type already exists for this user # Check if a connector with the same type already exists for this user
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector).filter(
.filter(
SearchSourceConnector.user_id == user.id, SearchSourceConnector.user_id == user.id,
SearchSourceConnector.connector_type == connector.connector_type SearchSourceConnector.connector_type == connector.connector_type,
) )
) )
existing_connector = result.scalars().first() existing_connector = result.scalars().first()
if existing_connector: if existing_connector:
raise HTTPException( raise HTTPException(
status_code=409, status_code=409,
detail=f"A connector with type {connector.connector_type} already exists. Each user can have only one connector of each type." detail=f"A connector with type {connector.connector_type} already exists. Each user can have only one connector of each type.",
) )
db_connector = SearchSourceConnector(**connector.model_dump(), user_id=user.id) db_connector = SearchSourceConnector(**connector.model_dump(), user_id=user.id)
session.add(db_connector) session.add(db_connector)
@ -91,15 +118,12 @@ async def create_search_source_connector(
return db_connector return db_connector
except ValidationError as e: except ValidationError as e:
await session.rollback() await session.rollback()
raise HTTPException( raise HTTPException(status_code=422, detail=f"Validation error: {str(e)}")
status_code=422,
detail=f"Validation error: {str(e)}"
)
except IntegrityError as e: except IntegrityError as e:
await session.rollback() await session.rollback()
raise HTTPException( raise HTTPException(
status_code=409, status_code=409,
detail=f"Integrity error: A connector with this type already exists. {str(e)}" detail=f"Integrity error: A connector with this type already exists. {str(e)}",
) )
except HTTPException: except HTTPException:
await session.rollback() await session.rollback()
@ -109,38 +133,44 @@ async def create_search_source_connector(
await session.rollback() await session.rollback()
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=f"Failed to create search source connector: {str(e)}" detail=f"Failed to create search source connector: {str(e)}",
) )
@router.get("/search-source-connectors/", response_model=List[SearchSourceConnectorRead])
@router.get(
"/search-source-connectors/", response_model=List[SearchSourceConnectorRead]
)
async def read_search_source_connectors( async def read_search_source_connectors(
skip: int = 0, skip: int = 0,
limit: int = 100, limit: int = 100,
search_space_id: int = None, search_space_id: int = None,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user) user: User = Depends(current_active_user),
): ):
"""List all search source connectors for the current user.""" """List all search source connectors for the current user."""
try: try:
query = select(SearchSourceConnector).filter(SearchSourceConnector.user_id == user.id) query = select(SearchSourceConnector).filter(
SearchSourceConnector.user_id == user.id
)
# No need to filter by search_space_id as connectors are user-owned, not search space specific # No need to filter by search_space_id as connectors are user-owned, not search space specific
result = await session.execute( result = await session.execute(query.offset(skip).limit(limit))
query.offset(skip).limit(limit)
)
return result.scalars().all() return result.scalars().all()
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=f"Failed to fetch search source connectors: {str(e)}" detail=f"Failed to fetch search source connectors: {str(e)}",
) )
@router.get("/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead)
@router.get(
"/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead
)
async def read_search_source_connector( async def read_search_source_connector(
connector_id: int, connector_id: int,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user) user: User = Depends(current_active_user),
): ):
"""Get a specific search source connector by ID.""" """Get a specific search source connector by ID."""
try: try:
@ -149,30 +179,36 @@ async def read_search_source_connector(
raise raise
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
status_code=500, status_code=500, detail=f"Failed to fetch search source connector: {str(e)}"
detail=f"Failed to fetch search source connector: {str(e)}"
) )
@router.put("/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead)
@router.put(
"/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead
)
async def update_search_source_connector( async def update_search_source_connector(
connector_id: int, connector_id: int,
connector_update: SearchSourceConnectorUpdate, connector_update: SearchSourceConnectorUpdate,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user) user: User = Depends(current_active_user),
): ):
""" """
Update a search source connector. Update a search source connector.
Handles partial updates, including merging changes into the 'config' field. Handles partial updates, including merging changes into the 'config' field.
""" """
db_connector = await check_ownership(session, SearchSourceConnector, connector_id, user) db_connector = await check_ownership(
session, SearchSourceConnector, connector_id, user
)
# Convert the sparse update data (only fields present in request) to a dict # Convert the sparse update data (only fields present in request) to a dict
update_data = connector_update.model_dump(exclude_unset=True) update_data = connector_update.model_dump(exclude_unset=True)
# Special handling for 'config' field # Special handling for 'config' field
if "config" in update_data: if "config" in update_data:
incoming_config = update_data["config"] # Config data from the request incoming_config = update_data["config"] # Config data from the request
existing_config = db_connector.config if db_connector.config else {} # Current config from DB existing_config = (
db_connector.config if db_connector.config else {}
) # Current config from DB
# Merge incoming config into existing config # Merge incoming config into existing config
# This preserves existing keys (like GITHUB_PAT) if they are not in the incoming data # This preserves existing keys (like GITHUB_PAT) if they are not in the incoming data
@ -182,24 +218,27 @@ async def update_search_source_connector(
# -- Validation after merging -- # -- Validation after merging --
# Validate the *merged* config based on the connector type # Validate the *merged* config based on the connector type
# We need the connector type - use the one from the update if provided, else the existing one # We need the connector type - use the one from the update if provided, else the existing one
current_connector_type = connector_update.connector_type if connector_update.connector_type is not None else db_connector.connector_type current_connector_type = (
connector_update.connector_type
if connector_update.connector_type is not None
else db_connector.connector_type
)
try: try:
# We can reuse the base validator by creating a temporary base model instance # We can reuse the base validator by creating a temporary base model instance
# Note: This assumes 'name' and 'is_indexable' are not crucial for config validation itself # Note: This assumes 'name' and 'is_indexable' are not crucial for config validation itself
temp_data_for_validation = { temp_data_for_validation = {
"name": db_connector.name, # Use existing name "name": db_connector.name, # Use existing name
"connector_type": current_connector_type, "connector_type": current_connector_type,
"is_indexable": db_connector.is_indexable, # Use existing value "is_indexable": db_connector.is_indexable, # Use existing value
"last_indexed_at": db_connector.last_indexed_at, # Not used by validator "last_indexed_at": db_connector.last_indexed_at, # Not used by validator
"config": merged_config "config": merged_config,
} }
SearchSourceConnectorBase.model_validate(temp_data_for_validation) SearchSourceConnectorBase.model_validate(temp_data_for_validation)
except ValidationError as e: except ValidationError as e:
# Raise specific validation error for the merged config # Raise specific validation error for the merged config
raise HTTPException( raise HTTPException(
status_code=422, status_code=422, detail=f"Validation error for merged config: {str(e)}"
detail=f"Validation error for merged config: {str(e)}"
) )
# If validation passes, update the main update_data dict with the merged config # If validation passes, update the main update_data dict with the merged config
@ -210,18 +249,17 @@ async def update_search_source_connector(
# Prevent changing connector_type if it causes a duplicate (check moved here) # Prevent changing connector_type if it causes a duplicate (check moved here)
if key == "connector_type" and value != db_connector.connector_type: if key == "connector_type" and value != db_connector.connector_type:
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector).filter(
.filter(
SearchSourceConnector.user_id == user.id, SearchSourceConnector.user_id == user.id,
SearchSourceConnector.connector_type == value, SearchSourceConnector.connector_type == value,
SearchSourceConnector.id != connector_id SearchSourceConnector.id != connector_id,
) )
) )
existing_connector = result.scalars().first() existing_connector = result.scalars().first()
if existing_connector: if existing_connector:
raise HTTPException( raise HTTPException(
status_code=409, status_code=409,
detail=f"A connector with type {value} already exists. Each user can have only one connector of each type." detail=f"A connector with type {value} already exists. Each user can have only one connector of each type.",
) )
setattr(db_connector, key, value) setattr(db_connector, key, value)
@ -234,26 +272,31 @@ async def update_search_source_connector(
await session.rollback() await session.rollback()
# This might occur if connector_type constraint is violated somehow after the check # This might occur if connector_type constraint is violated somehow after the check
raise HTTPException( raise HTTPException(
status_code=409, status_code=409, detail=f"Database integrity error during update: {str(e)}"
detail=f"Database integrity error during update: {str(e)}"
) )
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
logger.error(f"Failed to update search source connector {connector_id}: {e}", exc_info=True) logger.error(
f"Failed to update search source connector {connector_id}: {e}",
exc_info=True,
)
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=f"Failed to update search source connector: {str(e)}" detail=f"Failed to update search source connector: {str(e)}",
) )
@router.delete("/search-source-connectors/{connector_id}", response_model=dict) @router.delete("/search-source-connectors/{connector_id}", response_model=dict)
async def delete_search_source_connector( async def delete_search_source_connector(
connector_id: int, connector_id: int,
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user) user: User = Depends(current_active_user),
): ):
"""Delete a search source connector.""" """Delete a search source connector."""
try: try:
db_connector = await check_ownership(session, SearchSourceConnector, connector_id, user) db_connector = await check_ownership(
session, SearchSourceConnector, connector_id, user
)
await session.delete(db_connector) await session.delete(db_connector)
await session.commit() await session.commit()
return {"message": "Search source connector deleted successfully"} return {"message": "Search source connector deleted successfully"}
@ -263,18 +306,29 @@ async def delete_search_source_connector(
await session.rollback() await session.rollback()
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=f"Failed to delete search source connector: {str(e)}" detail=f"Failed to delete search source connector: {str(e)}",
) )
@router.post("/search-source-connectors/{connector_id}/index", response_model=Dict[str, Any])
@router.post(
"/search-source-connectors/{connector_id}/index", response_model=Dict[str, Any]
)
async def index_connector_content( async def index_connector_content(
connector_id: int, connector_id: int,
search_space_id: int = Query(..., description="ID of the search space to store indexed content"), search_space_id: int = Query(
start_date: str = Query(None, description="Start date for indexing (YYYY-MM-DD format). If not provided, uses last_indexed_at or defaults to 365 days ago"), ..., description="ID of the search space to store indexed content"
end_date: str = Query(None, description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date"), ),
start_date: str = Query(
None,
description="Start date for indexing (YYYY-MM-DD format). If not provided, uses last_indexed_at or defaults to 365 days ago",
),
end_date: str = Query(
None,
description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date",
),
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user), user: User = Depends(current_active_user),
background_tasks: BackgroundTasks = None background_tasks: BackgroundTasks = None,
): ):
""" """
Index content from a connector to a search space. Index content from a connector to a search space.
@ -297,10 +351,14 @@ async def index_connector_content(
""" """
try: try:
# Check if the connector belongs to the user # Check if the connector belongs to the user
connector = await check_ownership(session, SearchSourceConnector, connector_id, user) connector = await check_ownership(
session, SearchSourceConnector, connector_id, user
)
# Check if the search space belongs to the user # Check if the search space belongs to the user
search_space = await check_ownership(session, SearchSpace, search_space_id, user) search_space = await check_ownership(
session, SearchSpace, search_space_id, user
)
# Handle different connector types # Handle different connector types
response_message = "" response_message = ""
@ -317,7 +375,9 @@ async def index_connector_content(
else: else:
indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d") indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
else: else:
indexing_from = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") indexing_from = (datetime.now() - timedelta(days=365)).strftime(
"%Y-%m-%d"
)
else: else:
indexing_from = start_date indexing_from = start_date
@ -328,32 +388,77 @@ async def index_connector_content(
if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR: if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR:
# Run indexing in background # Run indexing in background
logger.info(f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") logger.info(
background_tasks.add_task(run_slack_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to) f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_slack_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "Slack indexing started in the background." response_message = "Slack indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.NOTION_CONNECTOR: elif connector.connector_type == SearchSourceConnectorType.NOTION_CONNECTOR:
# Run indexing in background # Run indexing in background
logger.info(f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") logger.info(
background_tasks.add_task(run_notion_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to) f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_notion_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "Notion indexing started in the background." response_message = "Notion indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR: elif connector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
# Run indexing in background # Run indexing in background
logger.info(f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") logger.info(
background_tasks.add_task(run_github_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to) f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_github_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "GitHub indexing started in the background." response_message = "GitHub indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR: elif connector.connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR:
# Run indexing in background # Run indexing in background
logger.info(f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") logger.info(
background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to) f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_linear_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "Linear indexing started in the background." response_message = "Linear indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.JIRA_CONNECTOR: elif connector.connector_type == SearchSourceConnectorType.JIRA_CONNECTOR:
# Run indexing in background # Run indexing in background
logger.info(f"Triggering Jira indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") logger.info(
background_tasks.add_task(run_jira_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to) f"Triggering Jira indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_jira_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "Jira indexing started in the background." response_message = "Jira indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR: elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
@ -362,14 +467,19 @@ async def index_connector_content(
f"Triggering Discord indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" f"Triggering Discord indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
) )
background_tasks.add_task( background_tasks.add_task(
run_discord_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to run_discord_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
) )
response_message = "Discord indexing started in the background." response_message = "Discord indexing started in the background."
else: else:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail=f"Indexing not supported for connector type: {connector.connector_type}" detail=f"Indexing not supported for connector type: {connector.connector_type}",
) )
return { return {
@ -377,21 +487,21 @@ async def index_connector_content(
"connector_id": connector_id, "connector_id": connector_id,
"search_space_id": search_space_id, "search_space_id": search_space_id,
"indexing_from": indexing_from, "indexing_from": indexing_from,
"indexing_to": indexing_to "indexing_to": indexing_to,
} }
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
logger.error(f"Failed to initiate indexing for connector {connector_id}: {e}", exc_info=True) logger.error(
f"Failed to initiate indexing for connector {connector_id}: {e}",
exc_info=True,
)
raise HTTPException( raise HTTPException(
status_code=500, status_code=500, detail=f"Failed to initiate indexing: {str(e)}"
detail=f"Failed to initiate indexing: {str(e)}"
) )
async def update_connector_last_indexed(
session: AsyncSession, async def update_connector_last_indexed(session: AsyncSession, connector_id: int):
connector_id: int
):
""" """
Update the last_indexed_at timestamp for a connector. Update the last_indexed_at timestamp for a connector.
@ -401,8 +511,9 @@ async def update_connector_last_indexed(
""" """
try: try:
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector).filter(
.filter(SearchSourceConnector.id == connector_id) SearchSourceConnector.id == connector_id
)
) )
connector = result.scalars().first() connector = result.scalars().first()
@ -411,22 +522,28 @@ async def update_connector_last_indexed(
await session.commit() await session.commit()
logger.info(f"Updated last_indexed_at for connector {connector_id}") logger.info(f"Updated last_indexed_at for connector {connector_id}")
except Exception as e: except Exception as e:
logger.error(f"Failed to update last_indexed_at for connector {connector_id}: {str(e)}") logger.error(
f"Failed to update last_indexed_at for connector {connector_id}: {str(e)}"
)
await session.rollback() await session.rollback()
async def run_slack_indexing_with_new_session( async def run_slack_indexing_with_new_session(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Create a new session and run the Slack indexing task. Create a new session and run the Slack indexing task.
This prevents session leaks by creating a dedicated session for the background task. This prevents session leaks by creating a dedicated session for the background task.
""" """
async with async_session_maker() as session: async with async_session_maker() as session:
await run_slack_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_slack_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
async def run_slack_indexing( async def run_slack_indexing(
session: AsyncSession, session: AsyncSession,
@ -434,7 +551,7 @@ async def run_slack_indexing(
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Background task to run Slack indexing. Background task to run Slack indexing.
@ -456,31 +573,39 @@ async def run_slack_indexing(
user_id=user_id, user_id=user_id,
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
update_last_indexed=False # Don't update timestamp in the indexing function update_last_indexed=False, # Don't update timestamp in the indexing function
) )
# Only update last_indexed_at if indexing was successful (either new docs or updated docs) # Only update last_indexed_at if indexing was successful (either new docs or updated docs)
if documents_processed > 0: if documents_processed > 0:
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
logger.info(f"Slack indexing completed successfully: {documents_processed} documents processed") logger.info(
f"Slack indexing completed successfully: {documents_processed} documents processed"
)
else: else:
logger.error(f"Slack indexing failed or no documents processed: {error_or_warning}") logger.error(
f"Slack indexing failed or no documents processed: {error_or_warning}"
)
except Exception as e: except Exception as e:
logger.error(f"Error in background Slack indexing task: {str(e)}") logger.error(f"Error in background Slack indexing task: {str(e)}")
async def run_notion_indexing_with_new_session( async def run_notion_indexing_with_new_session(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Create a new session and run the Notion indexing task. Create a new session and run the Notion indexing task.
This prevents session leaks by creating a dedicated session for the background task. This prevents session leaks by creating a dedicated session for the background task.
""" """
async with async_session_maker() as session: async with async_session_maker() as session:
await run_notion_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_notion_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
async def run_notion_indexing( async def run_notion_indexing(
session: AsyncSession, session: AsyncSession,
@ -488,7 +613,7 @@ async def run_notion_indexing(
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Background task to run Notion indexing. Background task to run Notion indexing.
@ -510,112 +635,158 @@ async def run_notion_indexing(
user_id=user_id, user_id=user_id,
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
update_last_indexed=False # Don't update timestamp in the indexing function update_last_indexed=False, # Don't update timestamp in the indexing function
) )
# Only update last_indexed_at if indexing was successful (either new docs or updated docs) # Only update last_indexed_at if indexing was successful (either new docs or updated docs)
if documents_processed > 0: if documents_processed > 0:
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
logger.info(f"Notion indexing completed successfully: {documents_processed} documents processed") logger.info(
f"Notion indexing completed successfully: {documents_processed} documents processed"
)
else: else:
logger.error(f"Notion indexing failed or no documents processed: {error_or_warning}") logger.error(
f"Notion indexing failed or no documents processed: {error_or_warning}"
)
except Exception as e: except Exception as e:
logger.error(f"Error in background Notion indexing task: {str(e)}") logger.error(f"Error in background Notion indexing task: {str(e)}")
# Add new helper functions for GitHub indexing # Add new helper functions for GitHub indexing
async def run_github_indexing_with_new_session( async def run_github_indexing_with_new_session(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Wrapper to run GitHub indexing with its own database session.""" """Wrapper to run GitHub indexing with its own database session."""
logger.info(f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id} from {start_date} to {end_date}") logger.info(
f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session: async with async_session_maker() as session:
await run_github_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_github_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(f"Background task finished: Indexing GitHub connector {connector_id}") logger.info(f"Background task finished: Indexing GitHub connector {connector_id}")
async def run_github_indexing( async def run_github_indexing(
session: AsyncSession, session: AsyncSession,
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Runs the GitHub indexing task and updates the timestamp.""" """Runs the GitHub indexing task and updates the timestamp."""
try: try:
indexed_count, error_message = await index_github_repos( indexed_count, error_message = await index_github_repos(
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False session,
connector_id,
search_space_id,
user_id,
start_date,
end_date,
update_last_indexed=False,
) )
if error_message: if error_message:
logger.error(f"GitHub indexing failed for connector {connector_id}: {error_message}") logger.error(
f"GitHub indexing failed for connector {connector_id}: {error_message}"
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure
else: else:
logger.info(f"GitHub indexing successful for connector {connector_id}. Indexed {indexed_count} documents.") logger.info(
f"GitHub indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
)
# Update the last indexed timestamp only on success # Update the last indexed timestamp only on success
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
await session.commit() # Commit timestamp update await session.commit() # Commit timestamp update
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
logger.error(f"Critical error in run_github_indexing for connector {connector_id}: {e}", exc_info=True) logger.error(
f"Critical error in run_github_indexing for connector {connector_id}: {e}",
exc_info=True,
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure
# Add new helper functions for Linear indexing # Add new helper functions for Linear indexing
async def run_linear_indexing_with_new_session( async def run_linear_indexing_with_new_session(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Wrapper to run Linear indexing with its own database session.""" """Wrapper to run Linear indexing with its own database session."""
logger.info(f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id} from {start_date} to {end_date}") logger.info(
f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session: async with async_session_maker() as session:
await run_linear_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_linear_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(f"Background task finished: Indexing Linear connector {connector_id}") logger.info(f"Background task finished: Indexing Linear connector {connector_id}")
async def run_linear_indexing( async def run_linear_indexing(
session: AsyncSession, session: AsyncSession,
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Runs the Linear indexing task and updates the timestamp.""" """Runs the Linear indexing task and updates the timestamp."""
try: try:
indexed_count, error_message = await index_linear_issues( indexed_count, error_message = await index_linear_issues(
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False session,
connector_id,
search_space_id,
user_id,
start_date,
end_date,
update_last_indexed=False,
) )
if error_message: if error_message:
logger.error(f"Linear indexing failed for connector {connector_id}: {error_message}") logger.error(
f"Linear indexing failed for connector {connector_id}: {error_message}"
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure
else: else:
logger.info(f"Linear indexing successful for connector {connector_id}. Indexed {indexed_count} documents.") logger.info(
f"Linear indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
)
# Update the last indexed timestamp only on success # Update the last indexed timestamp only on success
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
await session.commit() # Commit timestamp update await session.commit() # Commit timestamp update
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
logger.error(f"Critical error in run_linear_indexing for connector {connector_id}: {e}", exc_info=True) logger.error(
f"Critical error in run_linear_indexing for connector {connector_id}: {e}",
exc_info=True,
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure
# Add new helper functions for discord indexing # Add new helper functions for discord indexing
async def run_discord_indexing_with_new_session( async def run_discord_indexing_with_new_session(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Create a new session and run the Discord indexing task. Create a new session and run the Discord indexing task.
This prevents session leaks by creating a dedicated session for the background task. This prevents session leaks by creating a dedicated session for the background task.
""" """
async with async_session_maker() as session: async with async_session_maker() as session:
await run_discord_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_discord_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
async def run_discord_indexing( async def run_discord_indexing(
session: AsyncSession, session: AsyncSession,
@ -623,7 +794,7 @@ async def run_discord_indexing(
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
""" """
Background task to run Discord indexing. Background task to run Discord indexing.
@ -644,15 +815,19 @@ async def run_discord_indexing(
user_id=user_id, user_id=user_id,
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
update_last_indexed=False # Don't update timestamp in the indexing function update_last_indexed=False, # Don't update timestamp in the indexing function
) )
# Only update last_indexed_at if indexing was successful (either new docs or updated docs) # Only update last_indexed_at if indexing was successful (either new docs or updated docs)
if documents_processed > 0: if documents_processed > 0:
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
logger.info(f"Discord indexing completed successfully: {documents_processed} documents processed") logger.info(
f"Discord indexing completed successfully: {documents_processed} documents processed"
)
else: else:
logger.error(f"Discord indexing failed or no documents processed: {error_or_warning}") logger.error(
f"Discord indexing failed or no documents processed: {error_or_warning}"
)
except Exception as e: except Exception as e:
logger.error(f"Error in background Discord indexing task: {str(e)}") logger.error(f"Error in background Discord indexing task: {str(e)}")
@ -663,36 +838,53 @@ async def run_jira_indexing_with_new_session(
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Wrapper to run Jira indexing with its own database session.""" """Wrapper to run Jira indexing with its own database session."""
logger.info(f"Background task started: Indexing Jira connector {connector_id} into space {search_space_id} from {start_date} to {end_date}") logger.info(
f"Background task started: Indexing Jira connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session: async with async_session_maker() as session:
await run_jira_indexing(session, connector_id, search_space_id, user_id, start_date, end_date) await run_jira_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(f"Background task finished: Indexing Jira connector {connector_id}") logger.info(f"Background task finished: Indexing Jira connector {connector_id}")
async def run_jira_indexing( async def run_jira_indexing(
session: AsyncSession, session: AsyncSession,
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
start_date: str, start_date: str,
end_date: str end_date: str,
): ):
"""Runs the Jira indexing task and updates the timestamp.""" """Runs the Jira indexing task and updates the timestamp."""
try: try:
indexed_count, error_message = await index_jira_issues( indexed_count, error_message = await index_jira_issues(
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False session,
connector_id,
search_space_id,
user_id,
start_date,
end_date,
update_last_indexed=False,
) )
if error_message: if error_message:
logger.error(f"Jira indexing failed for connector {connector_id}: {error_message}") logger.error(
f"Jira indexing failed for connector {connector_id}: {error_message}"
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure
else: else:
logger.info(f"Jira indexing successful for connector {connector_id}. Indexed {indexed_count} documents.") logger.info(
f"Jira indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
)
# Update the last indexed timestamp only on success # Update the last indexed timestamp only on success
await update_connector_last_indexed(session, connector_id) await update_connector_last_indexed(session, connector_id)
await session.commit() # Commit timestamp update await session.commit() # Commit timestamp update
except Exception as e: except Exception as e:
await session.rollback() logger.error(
logger.error(f"Critical error in run_jira_indexing for connector {connector_id}: {e}", exc_info=True) f"Critical error in run_jira_indexing for connector {connector_id}: {e}",
exc_info=True,
)
# Optionally update status in DB to indicate failure # Optionally update status in DB to indicate failure

View file

@ -992,7 +992,7 @@ class ConnectorService:
# Early return if no results # Early return if no results
if not jira_chunks: if not jira_chunks:
return { return {
"id": 10, "id": 30,
"name": "Jira Issues", "name": "Jira Issues",
"type": "JIRA_CONNECTOR", "type": "JIRA_CONNECTOR",
"sources": [], "sources": [],

View file

@ -60,7 +60,7 @@ import {
IconBrandSlack, IconBrandSlack,
IconBrandYoutube, IconBrandYoutube,
IconLayoutKanban, IconLayoutKanban,
IconBrandTrello, IconTicket,
} from "@tabler/icons-react"; } from "@tabler/icons-react";
import { import {
ColumnDef, ColumnDef,
@ -178,7 +178,7 @@ const documentTypeIcons = {
YOUTUBE_VIDEO: IconBrandYoutube, YOUTUBE_VIDEO: IconBrandYoutube,
GITHUB_CONNECTOR: IconBrandGithub, GITHUB_CONNECTOR: IconBrandGithub,
LINEAR_CONNECTOR: IconLayoutKanban, LINEAR_CONNECTOR: IconLayoutKanban,
JIRA_CONNECTOR: IconBrandTrello, JIRA_CONNECTOR: IconTicket,
DISCORD_CONNECTOR: IconBrandDiscord, DISCORD_CONNECTOR: IconBrandDiscord,
} as const; } as const;