mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-04 11:39:19 +00:00
add coderabbit suggestions
This commit is contained in:
parent
756a429159
commit
b4d29ba3a0
6 changed files with 839 additions and 497 deletions
1
node_modules/.cache/prettier/.prettier-caches/a2ecb2962bf19c1099cfe708e42daa0097f94976.json
generated
vendored
Normal file
1
node_modules/.cache/prettier/.prettier-caches/a2ecb2962bf19c1099cfe708e42daa0097f94976.json
generated
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
{"2d0ec64d93969318101ee479b664221b32241665":{"files":{"surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx":["EHKKvlOK0vfy0GgHwlG/J2Bx5rw=",true]},"modified":1753426633288}}
|
|
@ -2,32 +2,31 @@ import asyncio
|
|||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from app.db import Document, SearchSpace
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.services.query_service import QueryService
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from .configuration import Configuration, SearchMode
|
||||
from .prompts import get_answer_outline_system_prompt, get_further_questions_system_prompt
|
||||
from .state import State
|
||||
from .sub_section_writer.graph import graph as sub_section_writer_graph
|
||||
from .sub_section_writer.configuration import SubSectionType
|
||||
from .qna_agent.graph import graph as qna_agent_graph
|
||||
from .utils import AnswerOutline, get_connector_emoji, get_connector_friendly_name
|
||||
|
||||
from app.services.query_service import QueryService
|
||||
|
||||
from langgraph.types import StreamWriter
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
# Additional imports for document fetching
|
||||
from sqlalchemy.future import select
|
||||
from app.db import Document, SearchSpace
|
||||
|
||||
from .configuration import Configuration, SearchMode
|
||||
from .prompts import (
|
||||
get_answer_outline_system_prompt,
|
||||
get_further_questions_system_prompt,
|
||||
)
|
||||
from .qna_agent.graph import graph as qna_agent_graph
|
||||
from .state import State
|
||||
from .sub_section_writer.configuration import SubSectionType
|
||||
from .sub_section_writer.graph import graph as sub_section_writer_graph
|
||||
from .utils import AnswerOutline, get_connector_emoji, get_connector_friendly_name
|
||||
|
||||
|
||||
async def fetch_documents_by_ids(
|
||||
document_ids: List[int],
|
||||
user_id: str,
|
||||
db_session: AsyncSession
|
||||
document_ids: List[int], user_id: str, db_session: AsyncSession
|
||||
) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Fetch documents by their IDs with ownership check using DOCUMENTS mode approach.
|
||||
|
@ -53,10 +52,7 @@ async def fetch_documents_by_ids(
|
|||
result = await db_session.execute(
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(
|
||||
Document.id.in_(document_ids),
|
||||
SearchSpace.user_id == user_id
|
||||
)
|
||||
.filter(Document.id.in_(document_ids), SearchSpace.user_id == user_id)
|
||||
)
|
||||
documents = result.scalars().all()
|
||||
|
||||
|
@ -67,12 +63,17 @@ async def fetch_documents_by_ids(
|
|||
for doc in documents:
|
||||
# Fetch associated chunks for this document (similar to DocumentHybridSearchRetriever)
|
||||
from app.db import Chunk
|
||||
chunks_query = select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id)
|
||||
|
||||
chunks_query = (
|
||||
select(Chunk).where(Chunk.document_id == doc.id).order_by(Chunk.id)
|
||||
)
|
||||
chunks_result = await db_session.execute(chunks_query)
|
||||
chunks = chunks_result.scalars().all()
|
||||
|
||||
# Concatenate chunks content (similar to SearchMode.DOCUMENTS approach)
|
||||
concatenated_chunks_content = " ".join([chunk.content for chunk in chunks]) if chunks else doc.content
|
||||
concatenated_chunks_content = (
|
||||
" ".join([chunk.content for chunk in chunks]) if chunks else doc.content
|
||||
)
|
||||
|
||||
# Format to match connector service return format
|
||||
formatted_doc = {
|
||||
|
@ -82,10 +83,12 @@ async def fetch_documents_by_ids(
|
|||
"document": {
|
||||
"id": doc.id,
|
||||
"title": doc.title,
|
||||
"document_type": doc.document_type.value if doc.document_type else "UNKNOWN",
|
||||
"document_type": (
|
||||
doc.document_type.value if doc.document_type else "UNKNOWN"
|
||||
),
|
||||
"metadata": doc.document_metadata or {},
|
||||
},
|
||||
"source": doc.document_type.value if doc.document_type else "UNKNOWN"
|
||||
"source": doc.document_type.value if doc.document_type else "UNKNOWN",
|
||||
}
|
||||
formatted_documents.append(formatted_doc)
|
||||
|
||||
|
@ -97,7 +100,9 @@ async def fetch_documents_by_ids(
|
|||
|
||||
# Create source objects for each document type (similar to ConnectorService)
|
||||
source_objects = []
|
||||
connector_id_counter = 100 # Start from 100 to avoid conflicts with regular connectors
|
||||
connector_id_counter = (
|
||||
100 # Start from 100 to avoid conflicts with regular connectors
|
||||
)
|
||||
|
||||
for doc_type, docs in documents_by_type.items():
|
||||
sources_list = []
|
||||
|
@ -108,76 +113,126 @@ async def fetch_documents_by_ids(
|
|||
# Create type-specific source formatting (similar to ConnectorService)
|
||||
if doc_type == "LINEAR_CONNECTOR":
|
||||
# Extract Linear-specific metadata
|
||||
issue_identifier = metadata.get('issue_identifier', '')
|
||||
issue_title = metadata.get('issue_title', doc.title)
|
||||
issue_state = metadata.get('state', '')
|
||||
comment_count = metadata.get('comment_count', 0)
|
||||
issue_identifier = metadata.get("issue_identifier", "")
|
||||
issue_title = metadata.get("issue_title", doc.title)
|
||||
issue_state = metadata.get("state", "")
|
||||
comment_count = metadata.get("comment_count", 0)
|
||||
|
||||
# Create a more descriptive title for Linear issues
|
||||
title = f"Linear: {issue_identifier} - {issue_title}" if issue_identifier else f"Linear: {issue_title}"
|
||||
title = (
|
||||
f"Linear: {issue_identifier} - {issue_title}"
|
||||
if issue_identifier
|
||||
else f"Linear: {issue_title}"
|
||||
)
|
||||
if issue_state:
|
||||
title += f" ({issue_state})"
|
||||
|
||||
# Create description
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
if comment_count:
|
||||
description += f" | Comments: {comment_count}"
|
||||
|
||||
# Create URL
|
||||
url = f"https://linear.app/issue/{issue_identifier}" if issue_identifier else ""
|
||||
url = (
|
||||
f"https://linear.app/issue/{issue_identifier}"
|
||||
if issue_identifier
|
||||
else ""
|
||||
)
|
||||
|
||||
elif doc_type == "SLACK_CONNECTOR":
|
||||
# Extract Slack-specific metadata
|
||||
channel_name = metadata.get('channel_name', 'Unknown Channel')
|
||||
channel_id = metadata.get('channel_id', '')
|
||||
message_date = metadata.get('start_date', '')
|
||||
channel_name = metadata.get("channel_name", "Unknown Channel")
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
message_date = metadata.get("start_date", "")
|
||||
|
||||
title = f"Slack: {channel_name}"
|
||||
if message_date:
|
||||
title += f" ({message_date})"
|
||||
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
url = f"https://slack.com/app_redirect?channel={channel_id}" if channel_id else ""
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
url = (
|
||||
f"https://slack.com/app_redirect?channel={channel_id}"
|
||||
if channel_id
|
||||
else ""
|
||||
)
|
||||
|
||||
elif doc_type == "NOTION_CONNECTOR":
|
||||
# Extract Notion-specific metadata
|
||||
page_title = metadata.get('page_title', doc.title)
|
||||
page_id = metadata.get('page_id', '')
|
||||
page_title = metadata.get("page_title", doc.title)
|
||||
page_id = metadata.get("page_id", "")
|
||||
|
||||
title = f"Notion: {page_title}"
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
url = f"https://notion.so/{page_id.replace('-', '')}" if page_id else ""
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
url = (
|
||||
f"https://notion.so/{page_id.replace('-', '')}"
|
||||
if page_id
|
||||
else ""
|
||||
)
|
||||
|
||||
elif doc_type == "GITHUB_CONNECTOR":
|
||||
title = f"GitHub: {doc.title}"
|
||||
description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content)
|
||||
url = metadata.get('url', '')
|
||||
description = metadata.get(
|
||||
"description",
|
||||
(
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
),
|
||||
)
|
||||
url = metadata.get("url", "")
|
||||
|
||||
elif doc_type == "YOUTUBE_VIDEO":
|
||||
# Extract YouTube-specific metadata
|
||||
video_title = metadata.get('video_title', doc.title)
|
||||
video_id = metadata.get('video_id', '')
|
||||
channel_name = metadata.get('channel_name', '')
|
||||
video_title = metadata.get("video_title", doc.title)
|
||||
video_id = metadata.get("video_id", "")
|
||||
channel_name = metadata.get("channel_name", "")
|
||||
|
||||
title = video_title
|
||||
if channel_name:
|
||||
title += f" - {channel_name}"
|
||||
|
||||
description = metadata.get('description', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content)
|
||||
url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
|
||||
description = metadata.get(
|
||||
"description",
|
||||
(
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
),
|
||||
)
|
||||
url = (
|
||||
f"https://www.youtube.com/watch?v={video_id}"
|
||||
if video_id
|
||||
else ""
|
||||
)
|
||||
|
||||
elif doc_type == "DISCORD_CONNECTOR":
|
||||
# Extract Discord-specific metadata
|
||||
channel_name = metadata.get('channel_name', 'Unknown Channel')
|
||||
channel_id = metadata.get('channel_id', '')
|
||||
guild_id = metadata.get('guild_id', '')
|
||||
message_date = metadata.get('start_date', '')
|
||||
channel_name = metadata.get("channel_name", "Unknown Channel")
|
||||
channel_id = metadata.get("channel_id", "")
|
||||
guild_id = metadata.get("guild_id", "")
|
||||
message_date = metadata.get("start_date", "")
|
||||
|
||||
title = f"Discord: {channel_name}"
|
||||
if message_date:
|
||||
title += f" ({message_date})"
|
||||
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
|
||||
if guild_id and channel_id:
|
||||
url = f"https://discord.com/channels/{guild_id}/{channel_id}"
|
||||
|
@ -188,20 +243,28 @@ async def fetch_documents_by_ids(
|
|||
|
||||
elif doc_type == "JIRA_CONNECTOR":
|
||||
# Extract Jira-specific metadata
|
||||
issue_key = metadata.get('issue_key', 'Unknown Issue')
|
||||
issue_title = metadata.get('issue_title', 'Untitled Issue')
|
||||
status = metadata.get('status', '')
|
||||
priority = metadata.get('priority', '')
|
||||
issue_type = metadata.get('issue_type', '')
|
||||
issue_key = metadata.get("issue_key", "Unknown Issue")
|
||||
issue_title = metadata.get("issue_title", "Untitled Issue")
|
||||
status = metadata.get("status", "")
|
||||
priority = metadata.get("priority", "")
|
||||
issue_type = metadata.get("issue_type", "")
|
||||
|
||||
title = f"Jira: {issue_key} - {issue_title}"
|
||||
if status:
|
||||
title += f" ({status})"
|
||||
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
if priority:
|
||||
description += f" | Priority: {priority}"
|
||||
if issue_type:
|
||||
description += f" | Type: {issue_type}"
|
||||
|
||||
# Construct Jira URL if we have the base URL
|
||||
base_url = metadata.get('base_url', '')
|
||||
base_url = metadata.get("base_url", "")
|
||||
if base_url and issue_key:
|
||||
url = f"{base_url}/browse/{issue_key}"
|
||||
else:
|
||||
|
@ -209,34 +272,58 @@ async def fetch_documents_by_ids(
|
|||
|
||||
elif doc_type == "EXTENSION":
|
||||
# Extract Extension-specific metadata
|
||||
webpage_title = metadata.get('VisitedWebPageTitle', doc.title)
|
||||
webpage_url = metadata.get('VisitedWebPageURL', '')
|
||||
visit_date = metadata.get('VisitedWebPageDateWithTimeInISOString', '')
|
||||
webpage_title = metadata.get("VisitedWebPageTitle", doc.title)
|
||||
webpage_url = metadata.get("VisitedWebPageURL", "")
|
||||
visit_date = metadata.get(
|
||||
"VisitedWebPageDateWithTimeInISOString", ""
|
||||
)
|
||||
|
||||
title = webpage_title
|
||||
if visit_date:
|
||||
formatted_date = visit_date.split('T')[0] if 'T' in visit_date else visit_date
|
||||
formatted_date = (
|
||||
visit_date.split("T")[0]
|
||||
if "T" in visit_date
|
||||
else visit_date
|
||||
)
|
||||
title += f" (visited: {formatted_date})"
|
||||
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
url = webpage_url
|
||||
|
||||
elif doc_type == "CRAWLED_URL":
|
||||
title = doc.title
|
||||
description = metadata.get('og:description', metadata.get('ogDescription', doc.content[:100] + "..." if len(doc.content) > 100 else doc.content))
|
||||
url = metadata.get('url', '')
|
||||
description = metadata.get(
|
||||
"og:description",
|
||||
metadata.get(
|
||||
"ogDescription",
|
||||
(
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
),
|
||||
),
|
||||
)
|
||||
url = metadata.get("url", "")
|
||||
|
||||
else: # FILE and other types
|
||||
title = doc.title
|
||||
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||
url = metadata.get('url', '')
|
||||
description = (
|
||||
doc.content[:100] + "..."
|
||||
if len(doc.content) > 100
|
||||
else doc.content
|
||||
)
|
||||
url = metadata.get("url", "")
|
||||
|
||||
# Create source entry
|
||||
source = {
|
||||
"id": doc.id,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"url": url
|
||||
"url": url,
|
||||
}
|
||||
sources_list.append(source)
|
||||
|
||||
|
@ -251,7 +338,7 @@ async def fetch_documents_by_ids(
|
|||
"JIRA_CONNECTOR": "Jira Issues (Selected)",
|
||||
"EXTENSION": "Browser Extension (Selected)",
|
||||
"CRAWLED_URL": "Web Pages (Selected)",
|
||||
"FILE": "Files (Selected)"
|
||||
"FILE": "Files (Selected)",
|
||||
}
|
||||
|
||||
source_object = {
|
||||
|
@ -263,7 +350,9 @@ async def fetch_documents_by_ids(
|
|||
source_objects.append(source_object)
|
||||
connector_id_counter += 1
|
||||
|
||||
print(f"Fetched {len(formatted_documents)} user-selected documents (with concatenated chunks) from {len(document_ids)} requested IDs")
|
||||
print(
|
||||
f"Fetched {len(formatted_documents)} user-selected documents (with concatenated chunks) from {len(document_ids)} requested IDs"
|
||||
)
|
||||
print(f"Created {len(source_objects)} source objects for UI display")
|
||||
|
||||
return source_objects, formatted_documents
|
||||
|
@ -273,7 +362,9 @@ async def fetch_documents_by_ids(
|
|||
return [], []
|
||||
|
||||
|
||||
async def write_answer_outline(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]:
|
||||
async def write_answer_outline(
|
||||
state: State, config: RunnableConfig, writer: StreamWriter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a structured answer outline based on the user query.
|
||||
|
||||
|
@ -285,7 +376,6 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
|
|||
Dict containing the answer outline in the "answer_outline" key for state update.
|
||||
"""
|
||||
from app.services.llm_service import get_user_strategic_llm
|
||||
from app.db import get_async_session
|
||||
|
||||
streaming_service = state.streaming_service
|
||||
|
||||
|
@ -353,7 +443,7 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
|
|||
# Create messages for the LLM
|
||||
messages = [
|
||||
SystemMessage(content=get_answer_outline_system_prompt()),
|
||||
HumanMessage(content=human_message_content)
|
||||
HumanMessage(content=human_message_content),
|
||||
]
|
||||
|
||||
# Call the LLM directly without using structured output
|
||||
|
@ -373,8 +463,8 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
|
|||
content = response.content
|
||||
|
||||
# Find the JSON in the content (handle case where LLM might add additional text)
|
||||
json_start = content.find('{')
|
||||
json_end = content.rfind('}') + 1
|
||||
json_start = content.find("{")
|
||||
json_end = content.rfind("}") + 1
|
||||
if json_start >= 0 and json_end > json_start:
|
||||
json_str = content[json_start:json_end]
|
||||
|
||||
|
@ -384,7 +474,9 @@ async def write_answer_outline(state: State, config: RunnableConfig, writer: Str
|
|||
# Convert to Pydantic model
|
||||
answer_outline = AnswerOutline(**parsed_data)
|
||||
|
||||
total_questions = sum(len(section.questions) for section in answer_outline.answer_outline)
|
||||
total_questions = sum(
|
||||
len(section.questions) for section in answer_outline.answer_outline
|
||||
)
|
||||
|
||||
writer(
|
||||
{
|
||||
|
@ -429,7 +521,7 @@ async def fetch_relevant_documents(
|
|||
top_k: int = 10,
|
||||
connector_service: ConnectorService = None,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
user_selected_sources: List[Dict[str, Any]] = None
|
||||
user_selected_sources: List[Dict[str, Any]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch relevant documents for research questions using the provided connectors.
|
||||
|
@ -461,7 +553,9 @@ async def fetch_relevant_documents(
|
|||
|
||||
# Stream initial status update
|
||||
if streaming_service and writer:
|
||||
connector_names = [get_connector_friendly_name(connector) for connector in connectors_to_search]
|
||||
connector_names = [
|
||||
get_connector_friendly_name(connector) for connector in connectors_to_search
|
||||
]
|
||||
connector_names_str = ", ".join(connector_names)
|
||||
writer(
|
||||
{
|
||||
|
@ -504,12 +598,15 @@ async def fetch_relevant_documents(
|
|||
|
||||
try:
|
||||
if connector == "YOUTUBE_VIDEO":
|
||||
source_object, youtube_chunks = await connector_service.search_youtube(
|
||||
(
|
||||
source_object,
|
||||
youtube_chunks,
|
||||
) = await connector_service.search_youtube(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -528,12 +625,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "EXTENSION":
|
||||
source_object, extension_chunks = await connector_service.search_extension(
|
||||
(
|
||||
source_object,
|
||||
extension_chunks,
|
||||
) = await connector_service.search_extension(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -552,12 +652,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "CRAWLED_URL":
|
||||
source_object, crawled_urls_chunks = await connector_service.search_crawled_urls(
|
||||
(
|
||||
source_object,
|
||||
crawled_urls_chunks,
|
||||
) = await connector_service.search_crawled_urls(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -581,7 +684,7 @@ async def fetch_relevant_documents(
|
|||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -605,7 +708,7 @@ async def fetch_relevant_documents(
|
|||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -624,12 +727,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "NOTION_CONNECTOR":
|
||||
source_object, notion_chunks = await connector_service.search_notion(
|
||||
(
|
||||
source_object,
|
||||
notion_chunks,
|
||||
) = await connector_service.search_notion(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -648,12 +754,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "GITHUB_CONNECTOR":
|
||||
source_object, github_chunks = await connector_service.search_github(
|
||||
(
|
||||
source_object,
|
||||
github_chunks,
|
||||
) = await connector_service.search_github(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -672,12 +781,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "LINEAR_CONNECTOR":
|
||||
source_object, linear_chunks = await connector_service.search_linear(
|
||||
(
|
||||
source_object,
|
||||
linear_chunks,
|
||||
) = await connector_service.search_linear(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -696,10 +808,11 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "TAVILY_API":
|
||||
source_object, tavily_chunks = await connector_service.search_tavily(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
top_k=top_k
|
||||
(
|
||||
source_object,
|
||||
tavily_chunks,
|
||||
) = await connector_service.search_tavily(
|
||||
user_query=reformulated_query, user_id=user_id, top_k=top_k
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -723,7 +836,7 @@ async def fetch_relevant_documents(
|
|||
source_object, linkup_chunks = await connector_service.search_linkup(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
mode=linkup_mode
|
||||
mode=linkup_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -742,12 +855,15 @@ async def fetch_relevant_documents(
|
|||
)
|
||||
|
||||
elif connector == "DISCORD_CONNECTOR":
|
||||
source_object, discord_chunks = await connector_service.search_discord(
|
||||
(
|
||||
source_object,
|
||||
discord_chunks,
|
||||
) = await connector_service.search_discord(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
# Add to sources and raw documents
|
||||
if source_object:
|
||||
|
@ -769,7 +885,7 @@ async def fetch_relevant_documents(
|
|||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
@ -812,8 +928,8 @@ async def fetch_relevant_documents(
|
|||
# First add user-selected sources (if any)
|
||||
if user_selected_sources:
|
||||
for source_obj in user_selected_sources:
|
||||
source_id = source_obj.get('id')
|
||||
source_type = source_obj.get('type')
|
||||
source_id = source_obj.get("id")
|
||||
source_type = source_obj.get("type")
|
||||
|
||||
if source_id and source_type:
|
||||
source_key = f"{source_type}_{source_id}"
|
||||
|
@ -827,8 +943,8 @@ async def fetch_relevant_documents(
|
|||
for source_obj in all_sources:
|
||||
# Use combination of source ID and type as a unique identifier
|
||||
# This ensures we don't accidentally deduplicate sources from different connectors
|
||||
source_id = source_obj.get('id')
|
||||
source_type = source_obj.get('type')
|
||||
source_id = source_obj.get("id")
|
||||
source_type = source_obj.get("type")
|
||||
|
||||
if source_id and source_type:
|
||||
source_key = f"{source_type}_{source_id}"
|
||||
|
@ -877,7 +993,13 @@ async def fetch_relevant_documents(
|
|||
|
||||
# After all sources are collected and deduplicated, stream them
|
||||
if streaming_service and writer:
|
||||
writer({"yield_value": streaming_service.format_sources_delta(deduplicated_sources)})
|
||||
writer(
|
||||
{
|
||||
"yield_value": streaming_service.format_sources_delta(
|
||||
deduplicated_sources
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
# Deduplicate raw documents based on chunk_id or content
|
||||
seen_chunk_ids = set()
|
||||
|
@ -890,7 +1012,9 @@ async def fetch_relevant_documents(
|
|||
content_hash = hash(content)
|
||||
|
||||
# Skip if we've seen this chunk_id or content before
|
||||
if (chunk_id and chunk_id in seen_chunk_ids) or content_hash in seen_content_hashes:
|
||||
if (
|
||||
chunk_id and chunk_id in seen_chunk_ids
|
||||
) or content_hash in seen_content_hashes:
|
||||
continue
|
||||
|
||||
# Add to our tracking sets and keep this document
|
||||
|
@ -913,7 +1037,9 @@ async def fetch_relevant_documents(
|
|||
return deduplicated_docs
|
||||
|
||||
|
||||
async def process_sections(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]:
|
||||
async def process_sections(
|
||||
state: State, config: RunnableConfig, writer: StreamWriter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process all sections in parallel and combine the results.
|
||||
|
||||
|
@ -997,10 +1123,13 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
}
|
||||
)
|
||||
|
||||
user_selected_sources, user_selected_documents = await fetch_documents_by_ids(
|
||||
(
|
||||
user_selected_sources,
|
||||
user_selected_documents,
|
||||
) = await fetch_documents_by_ids(
|
||||
document_ids=configuration.document_ids_to_add_in_context,
|
||||
user_id=configuration.user_id,
|
||||
db_session=state.db_session
|
||||
db_session=state.db_session,
|
||||
)
|
||||
|
||||
if user_selected_documents:
|
||||
|
@ -1013,7 +1142,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
)
|
||||
|
||||
# Create connector service using state db_session
|
||||
connector_service = ConnectorService(state.db_session, user_id=configuration.user_id)
|
||||
connector_service = ConnectorService(
|
||||
state.db_session, user_id=configuration.user_id
|
||||
)
|
||||
await connector_service.initialize_counter()
|
||||
|
||||
relevant_documents = await fetch_relevant_documents(
|
||||
|
@ -1027,7 +1158,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
top_k=TOP_K,
|
||||
connector_service=connector_service,
|
||||
search_mode=configuration.search_mode,
|
||||
user_selected_sources=user_selected_sources
|
||||
user_selected_sources=user_selected_sources,
|
||||
)
|
||||
except Exception as e:
|
||||
error_message = f"Error fetching relevant documents: {str(e)}"
|
||||
|
@ -1041,7 +1172,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
all_documents = user_selected_documents + relevant_documents
|
||||
|
||||
print(f"Fetched {len(relevant_documents)} relevant documents for all sections")
|
||||
print(f"Added {len(user_selected_documents)} user-selected documents for all sections")
|
||||
print(
|
||||
f"Added {len(user_selected_documents)} user-selected documents for all sections"
|
||||
)
|
||||
print(f"Total documents for sections: {len(all_documents)}")
|
||||
|
||||
writer(
|
||||
|
@ -1074,7 +1207,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
section_contents[i] = {
|
||||
"title": section.section_title,
|
||||
"content": "",
|
||||
"index": i
|
||||
"index": i,
|
||||
}
|
||||
|
||||
section_tasks.append(
|
||||
|
@ -1089,7 +1222,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
state=state,
|
||||
writer=writer,
|
||||
sub_section_type=sub_section_type,
|
||||
section_contents=section_contents
|
||||
section_contents=section_contents,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -1127,7 +1260,9 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
|
||||
# Combine the results into a final report with section titles
|
||||
final_report = []
|
||||
for i, (section, content) in enumerate(zip(answer_outline.answer_outline, processed_results)):
|
||||
for i, (section, content) in enumerate(
|
||||
zip(answer_outline.answer_outline, processed_results)
|
||||
):
|
||||
# Skip adding the section header since the content already contains the title
|
||||
final_report.append(content)
|
||||
final_report.append("\n")
|
||||
|
@ -1135,11 +1270,12 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
# Stream each section with its title
|
||||
writer(
|
||||
{
|
||||
"yield_value": state.streaming_service.format_text_chunk(f"# {section.section_title}\n\n{content}")
|
||||
"yield_value": state.streaming_service.format_text_chunk(
|
||||
f"# {section.section_title}\n\n{content}"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Join all sections with newlines
|
||||
final_written_report = "\n".join(final_report)
|
||||
print(f"Generated final report with {len(final_report)} parts")
|
||||
|
@ -1156,7 +1292,7 @@ async def process_sections(state: State, config: RunnableConfig, writer: StreamW
|
|||
# Since all sections used the same document pool, we can use it directly
|
||||
return {
|
||||
"final_written_report": final_written_report,
|
||||
"reranked_documents": all_documents
|
||||
"reranked_documents": all_documents,
|
||||
}
|
||||
|
||||
|
||||
|
@ -1171,7 +1307,7 @@ async def process_section_with_documents(
|
|||
state: State = None,
|
||||
writer: StreamWriter = None,
|
||||
sub_section_type: SubSectionType = SubSectionType.MIDDLE,
|
||||
section_contents: Dict[int, Dict[str, Any]] = None
|
||||
section_contents: Dict[int, Dict[str, Any]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Process a single section using pre-fetched documents.
|
||||
|
@ -1236,10 +1372,7 @@ async def process_section_with_documents(
|
|||
}
|
||||
|
||||
# Create the initial state with db_session and chat_history
|
||||
sub_state = {
|
||||
"db_session": state.db_session,
|
||||
"chat_history": state.chat_history
|
||||
}
|
||||
sub_state = {"db_session": state.db_session, "chat_history": state.chat_history}
|
||||
|
||||
# Invoke the sub-section writer graph with streaming
|
||||
print(f"Invoking sub_section_writer for: {section_title}")
|
||||
|
@ -1255,12 +1388,14 @@ async def process_section_with_documents(
|
|||
# Variables to track streaming state
|
||||
complete_content = "" # Tracks the complete content received so far
|
||||
|
||||
async for chunk_type, chunk in sub_section_writer_graph.astream(sub_state, config, stream_mode=["values"]):
|
||||
async for chunk_type, chunk in sub_section_writer_graph.astream(
|
||||
sub_state, config, stream_mode=["values"]
|
||||
):
|
||||
if "final_answer" in chunk:
|
||||
new_content = chunk["final_answer"]
|
||||
if new_content and new_content != complete_content:
|
||||
# Extract only the new content (delta)
|
||||
delta = new_content[len(complete_content):]
|
||||
delta = new_content[len(complete_content) :]
|
||||
|
||||
# Update what we've processed so far
|
||||
complete_content = new_content
|
||||
|
@ -1284,15 +1419,18 @@ async def process_section_with_documents(
|
|||
for i in range(len(section_contents)):
|
||||
if i in section_contents and section_contents[i]["content"]:
|
||||
# Add section header
|
||||
complete_answer.append(f"# {section_contents[i]['title']}")
|
||||
complete_answer.append(
|
||||
f"# {section_contents[i]['title']}"
|
||||
)
|
||||
complete_answer.append("") # Empty line after title
|
||||
|
||||
# Add section content
|
||||
content_lines = section_contents[i]["content"].split("\n")
|
||||
content_lines = section_contents[i]["content"].split(
|
||||
"\n"
|
||||
)
|
||||
complete_answer.extend(content_lines)
|
||||
complete_answer.append("") # Empty line after content
|
||||
|
||||
|
||||
# Set default if no content was received
|
||||
if not complete_content:
|
||||
complete_content = "No content was generated for this section."
|
||||
|
@ -1325,25 +1463,34 @@ async def process_section_with_documents(
|
|||
return f"Error processing section: {section_title}. Details: {str(e)}"
|
||||
|
||||
|
||||
async def reformulate_user_query(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]:
|
||||
async def reformulate_user_query(
|
||||
state: State, config: RunnableConfig, writer: StreamWriter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Reforms the user query based on the chat history.
|
||||
"""
|
||||
|
||||
configuration = Configuration.from_runnable_config(config)
|
||||
user_query = configuration.user_query
|
||||
chat_history_str = await QueryService.langchain_chat_history_to_str(state.chat_history)
|
||||
chat_history_str = await QueryService.langchain_chat_history_to_str(
|
||||
state.chat_history
|
||||
)
|
||||
if len(state.chat_history) == 0:
|
||||
reformulated_query = user_query
|
||||
else:
|
||||
reformulated_query = await QueryService.reformulate_query_with_chat_history(user_query=user_query, session=state.db_session, user_id=configuration.user_id, chat_history_str=chat_history_str)
|
||||
reformulated_query = await QueryService.reformulate_query_with_chat_history(
|
||||
user_query=user_query,
|
||||
session=state.db_session,
|
||||
user_id=configuration.user_id,
|
||||
chat_history_str=chat_history_str,
|
||||
)
|
||||
|
||||
return {
|
||||
"reformulated_query": reformulated_query
|
||||
}
|
||||
return {"reformulated_query": reformulated_query}
|
||||
|
||||
|
||||
async def handle_qna_workflow(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]:
|
||||
async def handle_qna_workflow(
|
||||
state: State, config: RunnableConfig, writer: StreamWriter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle the QNA research workflow.
|
||||
|
||||
|
@ -1402,10 +1549,13 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
}
|
||||
)
|
||||
|
||||
user_selected_sources, user_selected_documents = await fetch_documents_by_ids(
|
||||
(
|
||||
user_selected_sources,
|
||||
user_selected_documents,
|
||||
) = await fetch_documents_by_ids(
|
||||
document_ids=configuration.document_ids_to_add_in_context,
|
||||
user_id=configuration.user_id,
|
||||
db_session=state.db_session
|
||||
db_session=state.db_session,
|
||||
)
|
||||
|
||||
if user_selected_documents:
|
||||
|
@ -1418,7 +1568,9 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
)
|
||||
|
||||
# Create connector service using state db_session
|
||||
connector_service = ConnectorService(state.db_session, user_id=configuration.user_id)
|
||||
connector_service = ConnectorService(
|
||||
state.db_session, user_id=configuration.user_id
|
||||
)
|
||||
await connector_service.initialize_counter()
|
||||
|
||||
# Use the reformulated query as a single research question
|
||||
|
@ -1435,7 +1587,7 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
top_k=TOP_K,
|
||||
connector_service=connector_service,
|
||||
search_mode=configuration.search_mode,
|
||||
user_selected_sources=user_selected_sources
|
||||
user_selected_sources=user_selected_sources,
|
||||
)
|
||||
except Exception as e:
|
||||
error_message = f"Error fetching relevant documents for QNA: {str(e)}"
|
||||
|
@ -1466,15 +1618,12 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
"reformulated_query": reformulated_query,
|
||||
"relevant_documents": all_documents, # Use combined documents
|
||||
"user_id": configuration.user_id,
|
||||
"search_space_id": configuration.search_space_id
|
||||
"search_space_id": configuration.search_space_id,
|
||||
}
|
||||
}
|
||||
|
||||
# Create the state for the QNA agent (it has a different state structure)
|
||||
qna_state = {
|
||||
"db_session": state.db_session,
|
||||
"chat_history": state.chat_history
|
||||
}
|
||||
qna_state = {"db_session": state.db_session, "chat_history": state.chat_history}
|
||||
|
||||
try:
|
||||
writer(
|
||||
|
@ -1490,12 +1639,14 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
captured_reranked_documents = []
|
||||
|
||||
# Call the QNA agent with streaming
|
||||
async for _chunk_type, chunk in qna_agent_graph.astream(qna_state, qna_config, stream_mode=["values"]):
|
||||
async for _chunk_type, chunk in qna_agent_graph.astream(
|
||||
qna_state, qna_config, stream_mode=["values"]
|
||||
):
|
||||
if "final_answer" in chunk:
|
||||
new_content = chunk["final_answer"]
|
||||
if new_content and new_content != complete_content:
|
||||
# Extract only the new content (delta)
|
||||
delta = new_content[len(complete_content):]
|
||||
delta = new_content[len(complete_content) :]
|
||||
complete_content = new_content
|
||||
|
||||
# Stream the real-time answer if there's new content
|
||||
|
@ -1533,7 +1684,7 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
# Return the final answer and captured reranked documents for further question generation
|
||||
return {
|
||||
"final_written_report": complete_content,
|
||||
"reranked_documents": captured_reranked_documents
|
||||
"reranked_documents": captured_reranked_documents,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
@ -1544,7 +1695,9 @@ async def handle_qna_workflow(state: State, config: RunnableConfig, writer: Stre
|
|||
return {"final_written_report": f"Error generating answer: {str(e)}"}
|
||||
|
||||
|
||||
async def generate_further_questions(state: State, config: RunnableConfig, writer: StreamWriter) -> Dict[str, Any]:
|
||||
async def generate_further_questions(
|
||||
state: State, config: RunnableConfig, writer: StreamWriter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate contextually relevant follow-up questions based on chat history and available documents.
|
||||
|
||||
|
@ -1564,7 +1717,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
|
|||
streaming_service = state.streaming_service
|
||||
|
||||
# Get reranked documents from the state (will be populated by sub-agents)
|
||||
reranked_documents = getattr(state, 'reranked_documents', None) or []
|
||||
reranked_documents = getattr(state, "reranked_documents", None) or []
|
||||
|
||||
writer(
|
||||
{
|
||||
|
@ -1588,7 +1741,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
|
|||
# Format chat history for the prompt
|
||||
chat_history_xml = "<chat_history>\n"
|
||||
for message in chat_history:
|
||||
if hasattr(message, 'type'):
|
||||
if hasattr(message, "type"):
|
||||
if message.type == "human":
|
||||
chat_history_xml += f"<user>{message.content}</user>\n"
|
||||
elif message.type == "ai":
|
||||
|
@ -1606,13 +1759,13 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
|
|||
source_type = document_info.get("document_type", "UNKNOWN")
|
||||
content = doc.get("content", "")
|
||||
|
||||
documents_xml += f"<document>\n"
|
||||
documents_xml += f"<metadata>\n"
|
||||
documents_xml += "<document>\n"
|
||||
documents_xml += "<metadata>\n"
|
||||
documents_xml += f"<source_id>{source_id}</source_id>\n"
|
||||
documents_xml += f"<source_type>{source_type}</source_type>\n"
|
||||
documents_xml += f"</metadata>\n"
|
||||
documents_xml += "</metadata>\n"
|
||||
documents_xml += f"<content>\n{content}</content>\n"
|
||||
documents_xml += f"</document>\n"
|
||||
documents_xml += "</document>\n"
|
||||
documents_xml += "</documents>"
|
||||
|
||||
# Create the human message content
|
||||
|
@ -1651,7 +1804,7 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
|
|||
# Create messages for the LLM
|
||||
messages = [
|
||||
SystemMessage(content=get_further_questions_system_prompt()),
|
||||
HumanMessage(content=human_message_content)
|
||||
HumanMessage(content=human_message_content),
|
||||
]
|
||||
|
||||
try:
|
||||
|
@ -1662,8 +1815,8 @@ async def generate_further_questions(state: State, config: RunnableConfig, write
|
|||
content = response.content
|
||||
|
||||
# Find the JSON in the content
|
||||
json_start = content.find('{')
|
||||
json_end = content.rfind('}') + 1
|
||||
json_start = content.find("{")
|
||||
json_end = content.rfind("}") + 1
|
||||
if json_start >= 0 and json_end > json_start:
|
||||
json_str = content[json_start:json_end]
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@ Allows fetching issue lists and their comments, projects and more.
|
|||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
@ -119,8 +118,6 @@ class JiraConnector:
|
|||
|
||||
response = requests.get(url, headers=headers, params=params, timeout=500)
|
||||
|
||||
print(json.dumps(response.json(), indent=2))
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
|
@ -227,6 +224,7 @@ class JiraConnector:
|
|||
date_filter = (
|
||||
f"(createdDate >= '{start_date}' AND createdDate <= '{end_date}')"
|
||||
)
|
||||
# TODO : This JQL needs some improvement to work as expected
|
||||
|
||||
jql = f"{date_filter}"
|
||||
if project_key:
|
||||
|
@ -252,7 +250,7 @@ class JiraConnector:
|
|||
fields.append("comment")
|
||||
|
||||
params = {
|
||||
"jql": "",
|
||||
"jql": "", # TODO : Add a JQL query to filter from a date range
|
||||
"fields": ",".join(fields),
|
||||
"maxResults": 100,
|
||||
"startAt": 0,
|
||||
|
@ -263,10 +261,8 @@ class JiraConnector:
|
|||
|
||||
while True:
|
||||
params["startAt"] = start_at
|
||||
print(json.dumps(params, indent=2))
|
||||
result = self.make_api_request("search", params)
|
||||
|
||||
print(json.dumps(result, indent=2))
|
||||
result = self.make_api_request("search", params)
|
||||
|
||||
if not isinstance(result, dict) or "issues" not in result:
|
||||
return [], "Invalid response from Jira API"
|
||||
|
|
|
@ -9,35 +9,58 @@ POST /search-source-connectors/{connector_id}/index - Index content from a conne
|
|||
|
||||
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR, DISCORD_CONNECTOR).
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from typing import List, Dict, Any
|
||||
from app.db import get_async_session, User, SearchSourceConnector, SearchSourceConnectorType, SearchSpace, async_session_maker
|
||||
from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead, SearchSourceConnectorBase
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from app.connectors.github_connector import GitHubConnector
|
||||
from app.db import (
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
User,
|
||||
async_session_maker,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import (
|
||||
SearchSourceConnectorBase,
|
||||
SearchSourceConnectorCreate,
|
||||
SearchSourceConnectorRead,
|
||||
SearchSourceConnectorUpdate,
|
||||
)
|
||||
from app.tasks.connectors_indexing_tasks import (
|
||||
index_discord_messages,
|
||||
index_github_repos,
|
||||
index_jira_issues,
|
||||
index_linear_issues,
|
||||
index_notion_pages,
|
||||
index_slack_messages,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos, index_linear_issues, index_discord_messages, index_jira_issues
|
||||
from app.connectors.github_connector import GitHubConnector
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Use Pydantic's BaseModel here
|
||||
class GitHubPATRequest(BaseModel):
|
||||
github_pat: str = Field(..., description="GitHub Personal Access Token")
|
||||
|
||||
|
||||
# --- New Endpoint to list GitHub Repositories ---
|
||||
@router.post("/github/repositories/", response_model=List[Dict[str, Any]])
|
||||
async def list_github_repositories(
|
||||
pat_request: GitHubPATRequest,
|
||||
user: User = Depends(current_active_user) # Ensure the user is logged in
|
||||
user: User = Depends(current_active_user), # Ensure the user is logged in
|
||||
):
|
||||
"""
|
||||
Fetches a list of repositories accessible by the provided GitHub PAT.
|
||||
|
@ -54,14 +77,19 @@ async def list_github_repositories(
|
|||
logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}")
|
||||
raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail="Failed to fetch GitHub repositories.")
|
||||
logger.error(
|
||||
f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to fetch GitHub repositories."
|
||||
)
|
||||
|
||||
|
||||
@router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead)
|
||||
async def create_search_source_connector(
|
||||
connector: SearchSourceConnectorCreate,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user)
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Create a new search source connector.
|
||||
|
@ -72,17 +100,16 @@ async def create_search_source_connector(
|
|||
try:
|
||||
# Check if a connector with the same type already exists for this user
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector)
|
||||
.filter(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.user_id == user.id,
|
||||
SearchSourceConnector.connector_type == connector.connector_type
|
||||
SearchSourceConnector.connector_type == connector.connector_type,
|
||||
)
|
||||
)
|
||||
existing_connector = result.scalars().first()
|
||||
if existing_connector:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A connector with type {connector.connector_type} already exists. Each user can have only one connector of each type."
|
||||
detail=f"A connector with type {connector.connector_type} already exists. Each user can have only one connector of each type.",
|
||||
)
|
||||
db_connector = SearchSourceConnector(**connector.model_dump(), user_id=user.id)
|
||||
session.add(db_connector)
|
||||
|
@ -91,15 +118,12 @@ async def create_search_source_connector(
|
|||
return db_connector
|
||||
except ValidationError as e:
|
||||
await session.rollback()
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Validation error: {str(e)}"
|
||||
)
|
||||
raise HTTPException(status_code=422, detail=f"Validation error: {str(e)}")
|
||||
except IntegrityError as e:
|
||||
await session.rollback()
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Integrity error: A connector with this type already exists. {str(e)}"
|
||||
detail=f"Integrity error: A connector with this type already exists. {str(e)}",
|
||||
)
|
||||
except HTTPException:
|
||||
await session.rollback()
|
||||
|
@ -109,38 +133,44 @@ async def create_search_source_connector(
|
|||
await session.rollback()
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to create search source connector: {str(e)}"
|
||||
detail=f"Failed to create search source connector: {str(e)}",
|
||||
)
|
||||
|
||||
@router.get("/search-source-connectors/", response_model=List[SearchSourceConnectorRead])
|
||||
|
||||
@router.get(
|
||||
"/search-source-connectors/", response_model=List[SearchSourceConnectorRead]
|
||||
)
|
||||
async def read_search_source_connectors(
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
search_space_id: int = None,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user)
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""List all search source connectors for the current user."""
|
||||
try:
|
||||
query = select(SearchSourceConnector).filter(SearchSourceConnector.user_id == user.id)
|
||||
query = select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.user_id == user.id
|
||||
)
|
||||
|
||||
# No need to filter by search_space_id as connectors are user-owned, not search space specific
|
||||
|
||||
result = await session.execute(
|
||||
query.offset(skip).limit(limit)
|
||||
)
|
||||
result = await session.execute(query.offset(skip).limit(limit))
|
||||
return result.scalars().all()
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch search source connectors: {str(e)}"
|
||||
detail=f"Failed to fetch search source connectors: {str(e)}",
|
||||
)
|
||||
|
||||
@router.get("/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead)
|
||||
|
||||
@router.get(
|
||||
"/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead
|
||||
)
|
||||
async def read_search_source_connector(
|
||||
connector_id: int,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user)
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get a specific search source connector by ID."""
|
||||
try:
|
||||
|
@ -149,22 +179,26 @@ async def read_search_source_connector(
|
|||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch search source connector: {str(e)}"
|
||||
status_code=500, detail=f"Failed to fetch search source connector: {str(e)}"
|
||||
)
|
||||
|
||||
@router.put("/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead)
|
||||
|
||||
@router.put(
|
||||
"/search-source-connectors/{connector_id}", response_model=SearchSourceConnectorRead
|
||||
)
|
||||
async def update_search_source_connector(
|
||||
connector_id: int,
|
||||
connector_update: SearchSourceConnectorUpdate,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user)
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Update a search source connector.
|
||||
Handles partial updates, including merging changes into the 'config' field.
|
||||
"""
|
||||
db_connector = await check_ownership(session, SearchSourceConnector, connector_id, user)
|
||||
db_connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
)
|
||||
|
||||
# Convert the sparse update data (only fields present in request) to a dict
|
||||
update_data = connector_update.model_dump(exclude_unset=True)
|
||||
|
@ -172,7 +206,9 @@ async def update_search_source_connector(
|
|||
# Special handling for 'config' field
|
||||
if "config" in update_data:
|
||||
incoming_config = update_data["config"] # Config data from the request
|
||||
existing_config = db_connector.config if db_connector.config else {} # Current config from DB
|
||||
existing_config = (
|
||||
db_connector.config if db_connector.config else {}
|
||||
) # Current config from DB
|
||||
|
||||
# Merge incoming config into existing config
|
||||
# This preserves existing keys (like GITHUB_PAT) if they are not in the incoming data
|
||||
|
@ -182,7 +218,11 @@ async def update_search_source_connector(
|
|||
# -- Validation after merging --
|
||||
# Validate the *merged* config based on the connector type
|
||||
# We need the connector type - use the one from the update if provided, else the existing one
|
||||
current_connector_type = connector_update.connector_type if connector_update.connector_type is not None else db_connector.connector_type
|
||||
current_connector_type = (
|
||||
connector_update.connector_type
|
||||
if connector_update.connector_type is not None
|
||||
else db_connector.connector_type
|
||||
)
|
||||
|
||||
try:
|
||||
# We can reuse the base validator by creating a temporary base model instance
|
||||
|
@ -192,14 +232,13 @@ async def update_search_source_connector(
|
|||
"connector_type": current_connector_type,
|
||||
"is_indexable": db_connector.is_indexable, # Use existing value
|
||||
"last_indexed_at": db_connector.last_indexed_at, # Not used by validator
|
||||
"config": merged_config
|
||||
"config": merged_config,
|
||||
}
|
||||
SearchSourceConnectorBase.model_validate(temp_data_for_validation)
|
||||
except ValidationError as e:
|
||||
# Raise specific validation error for the merged config
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Validation error for merged config: {str(e)}"
|
||||
status_code=422, detail=f"Validation error for merged config: {str(e)}"
|
||||
)
|
||||
|
||||
# If validation passes, update the main update_data dict with the merged config
|
||||
|
@ -210,18 +249,17 @@ async def update_search_source_connector(
|
|||
# Prevent changing connector_type if it causes a duplicate (check moved here)
|
||||
if key == "connector_type" and value != db_connector.connector_type:
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector)
|
||||
.filter(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.user_id == user.id,
|
||||
SearchSourceConnector.connector_type == value,
|
||||
SearchSourceConnector.id != connector_id
|
||||
SearchSourceConnector.id != connector_id,
|
||||
)
|
||||
)
|
||||
existing_connector = result.scalars().first()
|
||||
if existing_connector:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A connector with type {value} already exists. Each user can have only one connector of each type."
|
||||
detail=f"A connector with type {value} already exists. Each user can have only one connector of each type.",
|
||||
)
|
||||
|
||||
setattr(db_connector, key, value)
|
||||
|
@ -234,26 +272,31 @@ async def update_search_source_connector(
|
|||
await session.rollback()
|
||||
# This might occur if connector_type constraint is violated somehow after the check
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"Database integrity error during update: {str(e)}"
|
||||
status_code=409, detail=f"Database integrity error during update: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Failed to update search source connector {connector_id}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Failed to update search source connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to update search source connector: {str(e)}"
|
||||
detail=f"Failed to update search source connector: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/search-source-connectors/{connector_id}", response_model=dict)
|
||||
async def delete_search_source_connector(
|
||||
connector_id: int,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user)
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Delete a search source connector."""
|
||||
try:
|
||||
db_connector = await check_ownership(session, SearchSourceConnector, connector_id, user)
|
||||
db_connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
)
|
||||
await session.delete(db_connector)
|
||||
await session.commit()
|
||||
return {"message": "Search source connector deleted successfully"}
|
||||
|
@ -263,18 +306,29 @@ async def delete_search_source_connector(
|
|||
await session.rollback()
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to delete search source connector: {str(e)}"
|
||||
detail=f"Failed to delete search source connector: {str(e)}",
|
||||
)
|
||||
|
||||
@router.post("/search-source-connectors/{connector_id}/index", response_model=Dict[str, Any])
|
||||
|
||||
@router.post(
|
||||
"/search-source-connectors/{connector_id}/index", response_model=Dict[str, Any]
|
||||
)
|
||||
async def index_connector_content(
|
||||
connector_id: int,
|
||||
search_space_id: int = Query(..., description="ID of the search space to store indexed content"),
|
||||
start_date: str = Query(None, description="Start date for indexing (YYYY-MM-DD format). If not provided, uses last_indexed_at or defaults to 365 days ago"),
|
||||
end_date: str = Query(None, description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date"),
|
||||
search_space_id: int = Query(
|
||||
..., description="ID of the search space to store indexed content"
|
||||
),
|
||||
start_date: str = Query(
|
||||
None,
|
||||
description="Start date for indexing (YYYY-MM-DD format). If not provided, uses last_indexed_at or defaults to 365 days ago",
|
||||
),
|
||||
end_date: str = Query(
|
||||
None,
|
||||
description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date",
|
||||
),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
background_tasks: BackgroundTasks = None
|
||||
background_tasks: BackgroundTasks = None,
|
||||
):
|
||||
"""
|
||||
Index content from a connector to a search space.
|
||||
|
@ -297,10 +351,14 @@ async def index_connector_content(
|
|||
"""
|
||||
try:
|
||||
# Check if the connector belongs to the user
|
||||
connector = await check_ownership(session, SearchSourceConnector, connector_id, user)
|
||||
connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
)
|
||||
|
||||
# Check if the search space belongs to the user
|
||||
search_space = await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
search_space = await check_ownership(
|
||||
session, SearchSpace, search_space_id, user
|
||||
)
|
||||
|
||||
# Handle different connector types
|
||||
response_message = ""
|
||||
|
@ -317,7 +375,9 @@ async def index_connector_content(
|
|||
else:
|
||||
indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
|
||||
else:
|
||||
indexing_from = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
|
||||
indexing_from = (datetime.now() - timedelta(days=365)).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
else:
|
||||
indexing_from = start_date
|
||||
|
||||
|
@ -328,32 +388,77 @@ async def index_connector_content(
|
|||
|
||||
if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR:
|
||||
# Run indexing in background
|
||||
logger.info(f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||
background_tasks.add_task(run_slack_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||
logger.info(
|
||||
f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_slack_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "Slack indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.NOTION_CONNECTOR:
|
||||
# Run indexing in background
|
||||
logger.info(f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||
background_tasks.add_task(run_notion_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||
logger.info(
|
||||
f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_notion_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "Notion indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
|
||||
# Run indexing in background
|
||||
logger.info(f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||
background_tasks.add_task(run_github_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||
logger.info(
|
||||
f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_github_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "GitHub indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR:
|
||||
# Run indexing in background
|
||||
logger.info(f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||
background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||
logger.info(
|
||||
f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_linear_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "Linear indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.JIRA_CONNECTOR:
|
||||
# Run indexing in background
|
||||
logger.info(f"Triggering Jira indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||
background_tasks.add_task(run_jira_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||
logger.info(
|
||||
f"Triggering Jira indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_jira_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "Jira indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
|
||||
|
@ -362,14 +467,19 @@ async def index_connector_content(
|
|||
f"Triggering Discord indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
background_tasks.add_task(
|
||||
run_discord_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to
|
||||
run_discord_indexing_with_new_session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
indexing_from,
|
||||
indexing_to,
|
||||
)
|
||||
response_message = "Discord indexing started in the background."
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Indexing not supported for connector type: {connector.connector_type}"
|
||||
detail=f"Indexing not supported for connector type: {connector.connector_type}",
|
||||
)
|
||||
|
||||
return {
|
||||
|
@ -377,21 +487,21 @@ async def index_connector_content(
|
|||
"connector_id": connector_id,
|
||||
"search_space_id": search_space_id,
|
||||
"indexing_from": indexing_from,
|
||||
"indexing_to": indexing_to
|
||||
"indexing_to": indexing_to,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initiate indexing for connector {connector_id}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Failed to initiate indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to initiate indexing: {str(e)}"
|
||||
status_code=500, detail=f"Failed to initiate indexing: {str(e)}"
|
||||
)
|
||||
|
||||
async def update_connector_last_indexed(
|
||||
session: AsyncSession,
|
||||
connector_id: int
|
||||
):
|
||||
|
||||
async def update_connector_last_indexed(session: AsyncSession, connector_id: int):
|
||||
"""
|
||||
Update the last_indexed_at timestamp for a connector.
|
||||
|
||||
|
@ -401,8 +511,9 @@ async def update_connector_last_indexed(
|
|||
"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector)
|
||||
.filter(SearchSourceConnector.id == connector_id)
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
|
@ -411,22 +522,28 @@ async def update_connector_last_indexed(
|
|||
await session.commit()
|
||||
logger.info(f"Updated last_indexed_at for connector {connector_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to update last_indexed_at for connector {connector_id}: {str(e)}")
|
||||
logger.error(
|
||||
f"Failed to update last_indexed_at for connector {connector_id}: {str(e)}"
|
||||
)
|
||||
await session.rollback()
|
||||
|
||||
|
||||
async def run_slack_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Create a new session and run the Slack indexing task.
|
||||
This prevents session leaks by creating a dedicated session for the background task.
|
||||
"""
|
||||
async with async_session_maker() as session:
|
||||
await run_slack_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_slack_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
async def run_slack_indexing(
|
||||
session: AsyncSession,
|
||||
|
@ -434,7 +551,7 @@ async def run_slack_indexing(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Slack indexing.
|
||||
|
@ -456,31 +573,39 @@ async def run_slack_indexing(
|
|||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
update_last_indexed=False # Don't update timestamp in the indexing function
|
||||
update_last_indexed=False, # Don't update timestamp in the indexing function
|
||||
)
|
||||
|
||||
# Only update last_indexed_at if indexing was successful (either new docs or updated docs)
|
||||
if documents_processed > 0:
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
logger.info(f"Slack indexing completed successfully: {documents_processed} documents processed")
|
||||
logger.info(
|
||||
f"Slack indexing completed successfully: {documents_processed} documents processed"
|
||||
)
|
||||
else:
|
||||
logger.error(f"Slack indexing failed or no documents processed: {error_or_warning}")
|
||||
logger.error(
|
||||
f"Slack indexing failed or no documents processed: {error_or_warning}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background Slack indexing task: {str(e)}")
|
||||
|
||||
|
||||
async def run_notion_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Create a new session and run the Notion indexing task.
|
||||
This prevents session leaks by creating a dedicated session for the background task.
|
||||
"""
|
||||
async with async_session_maker() as session:
|
||||
await run_notion_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_notion_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
async def run_notion_indexing(
|
||||
session: AsyncSession,
|
||||
|
@ -488,7 +613,7 @@ async def run_notion_indexing(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Notion indexing.
|
||||
|
@ -510,112 +635,158 @@ async def run_notion_indexing(
|
|||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
update_last_indexed=False # Don't update timestamp in the indexing function
|
||||
update_last_indexed=False, # Don't update timestamp in the indexing function
|
||||
)
|
||||
|
||||
# Only update last_indexed_at if indexing was successful (either new docs or updated docs)
|
||||
if documents_processed > 0:
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
logger.info(f"Notion indexing completed successfully: {documents_processed} documents processed")
|
||||
logger.info(
|
||||
f"Notion indexing completed successfully: {documents_processed} documents processed"
|
||||
)
|
||||
else:
|
||||
logger.error(f"Notion indexing failed or no documents processed: {error_or_warning}")
|
||||
logger.error(
|
||||
f"Notion indexing failed or no documents processed: {error_or_warning}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background Notion indexing task: {str(e)}")
|
||||
|
||||
|
||||
# Add new helper functions for GitHub indexing
|
||||
async def run_github_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Wrapper to run GitHub indexing with its own database session."""
|
||||
logger.info(f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id} from {start_date} to {end_date}")
|
||||
logger.info(
|
||||
f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
|
||||
)
|
||||
async with async_session_maker() as session:
|
||||
await run_github_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_github_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
logger.info(f"Background task finished: Indexing GitHub connector {connector_id}")
|
||||
|
||||
|
||||
async def run_github_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Runs the GitHub indexing task and updates the timestamp."""
|
||||
try:
|
||||
indexed_count, error_message = await index_github_repos(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
start_date,
|
||||
end_date,
|
||||
update_last_indexed=False,
|
||||
)
|
||||
if error_message:
|
||||
logger.error(f"GitHub indexing failed for connector {connector_id}: {error_message}")
|
||||
logger.error(
|
||||
f"GitHub indexing failed for connector {connector_id}: {error_message}"
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
else:
|
||||
logger.info(f"GitHub indexing successful for connector {connector_id}. Indexed {indexed_count} documents.")
|
||||
logger.info(
|
||||
f"GitHub indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
|
||||
)
|
||||
# Update the last indexed timestamp only on success
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
await session.commit() # Commit timestamp update
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Critical error in run_github_indexing for connector {connector_id}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Critical error in run_github_indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
|
||||
|
||||
# Add new helper functions for Linear indexing
|
||||
async def run_linear_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Wrapper to run Linear indexing with its own database session."""
|
||||
logger.info(f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id} from {start_date} to {end_date}")
|
||||
logger.info(
|
||||
f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
|
||||
)
|
||||
async with async_session_maker() as session:
|
||||
await run_linear_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_linear_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
logger.info(f"Background task finished: Indexing Linear connector {connector_id}")
|
||||
|
||||
|
||||
async def run_linear_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Runs the Linear indexing task and updates the timestamp."""
|
||||
try:
|
||||
indexed_count, error_message = await index_linear_issues(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
start_date,
|
||||
end_date,
|
||||
update_last_indexed=False,
|
||||
)
|
||||
if error_message:
|
||||
logger.error(f"Linear indexing failed for connector {connector_id}: {error_message}")
|
||||
logger.error(
|
||||
f"Linear indexing failed for connector {connector_id}: {error_message}"
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
else:
|
||||
logger.info(f"Linear indexing successful for connector {connector_id}. Indexed {indexed_count} documents.")
|
||||
logger.info(
|
||||
f"Linear indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
|
||||
)
|
||||
# Update the last indexed timestamp only on success
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
await session.commit() # Commit timestamp update
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Critical error in run_linear_indexing for connector {connector_id}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Critical error in run_linear_indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
|
||||
|
||||
# Add new helper functions for discord indexing
|
||||
async def run_discord_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Create a new session and run the Discord indexing task.
|
||||
This prevents session leaks by creating a dedicated session for the background task.
|
||||
"""
|
||||
async with async_session_maker() as session:
|
||||
await run_discord_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_discord_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
async def run_discord_indexing(
|
||||
session: AsyncSession,
|
||||
|
@ -623,7 +794,7 @@ async def run_discord_indexing(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Discord indexing.
|
||||
|
@ -644,15 +815,19 @@ async def run_discord_indexing(
|
|||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
update_last_indexed=False # Don't update timestamp in the indexing function
|
||||
update_last_indexed=False, # Don't update timestamp in the indexing function
|
||||
)
|
||||
|
||||
# Only update last_indexed_at if indexing was successful (either new docs or updated docs)
|
||||
if documents_processed > 0:
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
logger.info(f"Discord indexing completed successfully: {documents_processed} documents processed")
|
||||
logger.info(
|
||||
f"Discord indexing completed successfully: {documents_processed} documents processed"
|
||||
)
|
||||
else:
|
||||
logger.error(f"Discord indexing failed or no documents processed: {error_or_warning}")
|
||||
logger.error(
|
||||
f"Discord indexing failed or no documents processed: {error_or_warning}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background Discord indexing task: {str(e)}")
|
||||
|
||||
|
@ -663,36 +838,53 @@ async def run_jira_indexing_with_new_session(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Wrapper to run Jira indexing with its own database session."""
|
||||
logger.info(f"Background task started: Indexing Jira connector {connector_id} into space {search_space_id} from {start_date} to {end_date}")
|
||||
logger.info(
|
||||
f"Background task started: Indexing Jira connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
|
||||
)
|
||||
async with async_session_maker() as session:
|
||||
await run_jira_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||
await run_jira_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
logger.info(f"Background task finished: Indexing Jira connector {connector_id}")
|
||||
|
||||
|
||||
async def run_jira_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str
|
||||
end_date: str,
|
||||
):
|
||||
"""Runs the Jira indexing task and updates the timestamp."""
|
||||
try:
|
||||
indexed_count, error_message = await index_jira_issues(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
start_date,
|
||||
end_date,
|
||||
update_last_indexed=False,
|
||||
)
|
||||
if error_message:
|
||||
logger.error(f"Jira indexing failed for connector {connector_id}: {error_message}")
|
||||
logger.error(
|
||||
f"Jira indexing failed for connector {connector_id}: {error_message}"
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
else:
|
||||
logger.info(f"Jira indexing successful for connector {connector_id}. Indexed {indexed_count} documents.")
|
||||
logger.info(
|
||||
f"Jira indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
|
||||
)
|
||||
# Update the last indexed timestamp only on success
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
await session.commit() # Commit timestamp update
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Critical error in run_jira_indexing for connector {connector_id}: {e}", exc_info=True)
|
||||
logger.error(
|
||||
f"Critical error in run_jira_indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
|
@ -992,7 +992,7 @@ class ConnectorService:
|
|||
# Early return if no results
|
||||
if not jira_chunks:
|
||||
return {
|
||||
"id": 10,
|
||||
"id": 30,
|
||||
"name": "Jira Issues",
|
||||
"type": "JIRA_CONNECTOR",
|
||||
"sources": [],
|
||||
|
|
|
@ -60,7 +60,7 @@ import {
|
|||
IconBrandSlack,
|
||||
IconBrandYoutube,
|
||||
IconLayoutKanban,
|
||||
IconBrandTrello,
|
||||
IconTicket,
|
||||
} from "@tabler/icons-react";
|
||||
import {
|
||||
ColumnDef,
|
||||
|
@ -178,7 +178,7 @@ const documentTypeIcons = {
|
|||
YOUTUBE_VIDEO: IconBrandYoutube,
|
||||
GITHUB_CONNECTOR: IconBrandGithub,
|
||||
LINEAR_CONNECTOR: IconLayoutKanban,
|
||||
JIRA_CONNECTOR: IconBrandTrello,
|
||||
JIRA_CONNECTOR: IconTicket,
|
||||
DISCORD_CONNECTOR: IconBrandDiscord,
|
||||
} as const;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue