mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-01 18:19:08 +00:00
update connector indexing / update connector service
This commit is contained in:
parent
ca98693005
commit
cd05a06a91
8 changed files with 1544 additions and 732 deletions
|
@ -186,6 +186,27 @@ async def fetch_documents_by_ids(
|
||||||
else:
|
else:
|
||||||
url = ""
|
url = ""
|
||||||
|
|
||||||
|
elif doc_type == "JIRA_CONNECTOR":
|
||||||
|
# Extract Jira-specific metadata
|
||||||
|
issue_key = metadata.get('issue_key', 'Unknown Issue')
|
||||||
|
issue_title = metadata.get('issue_title', 'Untitled Issue')
|
||||||
|
status = metadata.get('status', '')
|
||||||
|
priority = metadata.get('priority', '')
|
||||||
|
issue_type = metadata.get('issue_type', '')
|
||||||
|
|
||||||
|
title = f"Jira: {issue_key} - {issue_title}"
|
||||||
|
if status:
|
||||||
|
title += f" ({status})"
|
||||||
|
|
||||||
|
description = doc.content[:100] + "..." if len(doc.content) > 100 else doc.content
|
||||||
|
|
||||||
|
# Construct Jira URL if we have the base URL
|
||||||
|
base_url = metadata.get('base_url', '')
|
||||||
|
if base_url and issue_key:
|
||||||
|
url = f"{base_url}/browse/{issue_key}"
|
||||||
|
else:
|
||||||
|
url = ""
|
||||||
|
|
||||||
elif doc_type == "EXTENSION":
|
elif doc_type == "EXTENSION":
|
||||||
# Extract Extension-specific metadata
|
# Extract Extension-specific metadata
|
||||||
webpage_title = metadata.get('VisitedWebPageTitle', doc.title)
|
webpage_title = metadata.get('VisitedWebPageTitle', doc.title)
|
||||||
|
@ -227,6 +248,7 @@ async def fetch_documents_by_ids(
|
||||||
"GITHUB_CONNECTOR": "GitHub (Selected)",
|
"GITHUB_CONNECTOR": "GitHub (Selected)",
|
||||||
"YOUTUBE_VIDEO": "YouTube Videos (Selected)",
|
"YOUTUBE_VIDEO": "YouTube Videos (Selected)",
|
||||||
"DISCORD_CONNECTOR": "Discord (Selected)",
|
"DISCORD_CONNECTOR": "Discord (Selected)",
|
||||||
|
"JIRA_CONNECTOR": "Jira Issues (Selected)",
|
||||||
"EXTENSION": "Browser Extension (Selected)",
|
"EXTENSION": "Browser Extension (Selected)",
|
||||||
"CRAWLED_URL": "Web Pages (Selected)",
|
"CRAWLED_URL": "Web Pages (Selected)",
|
||||||
"FILE": "Files (Selected)"
|
"FILE": "Files (Selected)"
|
||||||
|
@ -741,6 +763,30 @@ async def fetch_relevant_documents(
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elif connector == "JIRA_CONNECTOR":
|
||||||
|
source_object, jira_chunks = await connector_service.search_jira(
|
||||||
|
user_query=reformulated_query,
|
||||||
|
user_id=user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
top_k=top_k,
|
||||||
|
search_mode=search_mode
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to sources and raw documents
|
||||||
|
if source_object:
|
||||||
|
all_sources.append(source_object)
|
||||||
|
all_raw_documents.extend(jira_chunks)
|
||||||
|
|
||||||
|
# Stream found document count
|
||||||
|
if streaming_service and writer:
|
||||||
|
writer(
|
||||||
|
{
|
||||||
|
"yield_value": streaming_service.format_terminal_info_delta(
|
||||||
|
f"🎫 Found {len(jira_chunks)} Jira issues related to your query"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = f"Error searching connector {connector}: {str(e)}"
|
error_message = f"Error searching connector {connector}: {str(e)}"
|
||||||
print(error_message)
|
print(error_message)
|
||||||
|
|
|
@ -15,6 +15,8 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
|
||||||
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
|
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
|
||||||
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
|
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
|
||||||
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
|
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
|
||||||
|
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
|
||||||
|
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
|
||||||
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
|
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
|
||||||
- TAVILY_API: "Tavily search API results" (personalized search results)
|
- TAVILY_API: "Tavily search API results" (personalized search results)
|
||||||
- LINKUP_API: "Linkup search API results" (personalized search results)
|
- LINKUP_API: "Linkup search API results" (personalized search results)
|
||||||
|
|
|
@ -33,6 +33,8 @@ def get_connector_emoji(connector_name: str) -> str:
|
||||||
"NOTION_CONNECTOR": "📘",
|
"NOTION_CONNECTOR": "📘",
|
||||||
"GITHUB_CONNECTOR": "🐙",
|
"GITHUB_CONNECTOR": "🐙",
|
||||||
"LINEAR_CONNECTOR": "📊",
|
"LINEAR_CONNECTOR": "📊",
|
||||||
|
"JIRA_CONNECTOR": "🎫",
|
||||||
|
"DISCORD_CONNECTOR": "🗨️",
|
||||||
"TAVILY_API": "🔍",
|
"TAVILY_API": "🔍",
|
||||||
"LINKUP_API": "🔗"
|
"LINKUP_API": "🔗"
|
||||||
}
|
}
|
||||||
|
@ -50,6 +52,8 @@ def get_connector_friendly_name(connector_name: str) -> str:
|
||||||
"NOTION_CONNECTOR": "Notion",
|
"NOTION_CONNECTOR": "Notion",
|
||||||
"GITHUB_CONNECTOR": "GitHub",
|
"GITHUB_CONNECTOR": "GitHub",
|
||||||
"LINEAR_CONNECTOR": "Linear",
|
"LINEAR_CONNECTOR": "Linear",
|
||||||
|
"JIRA_CONNECTOR": "Jira",
|
||||||
|
"DISCORD_CONNECTOR": "Discord",
|
||||||
"TAVILY_API": "Tavily Search",
|
"TAVILY_API": "Tavily Search",
|
||||||
"LINKUP_API": "Linkup Search"
|
"LINKUP_API": "Linkup Search"
|
||||||
}
|
}
|
||||||
|
|
218
surfsense_backend/app/connectors/test_jira_connector.py
Normal file
218
surfsense_backend/app/connectors/test_jira_connector.py
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import patch, Mock
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Import the JiraConnector
|
||||||
|
from .jira_connector import JiraConnector
|
||||||
|
|
||||||
|
|
||||||
|
class TestJiraConnector(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
"""Set up test fixtures."""
|
||||||
|
self.base_url = "https://test.atlassian.net"
|
||||||
|
self.token = "test_token"
|
||||||
|
self.connector = JiraConnector(base_url=self.base_url, personal_access_token=self.token)
|
||||||
|
|
||||||
|
def test_init(self):
|
||||||
|
"""Test JiraConnector initialization."""
|
||||||
|
self.assertEqual(self.connector.base_url, self.base_url)
|
||||||
|
self.assertEqual(self.connector.personal_access_token, self.token)
|
||||||
|
self.assertEqual(self.connector.api_version, "3")
|
||||||
|
|
||||||
|
def test_init_with_trailing_slash(self):
|
||||||
|
"""Test JiraConnector initialization with trailing slash in URL."""
|
||||||
|
connector = JiraConnector(base_url="https://test.atlassian.net/", personal_access_token=self.token)
|
||||||
|
self.assertEqual(connector.base_url, "https://test.atlassian.net")
|
||||||
|
|
||||||
|
def test_set_credentials(self):
|
||||||
|
"""Test setting credentials."""
|
||||||
|
new_url = "https://newtest.atlassian.net/"
|
||||||
|
new_token = "new_token"
|
||||||
|
|
||||||
|
self.connector.set_credentials(new_url, new_token)
|
||||||
|
|
||||||
|
self.assertEqual(self.connector.base_url, "https://newtest.atlassian.net")
|
||||||
|
self.assertEqual(self.connector.personal_access_token, new_token)
|
||||||
|
|
||||||
|
def test_get_headers(self):
|
||||||
|
"""Test header generation."""
|
||||||
|
headers = self.connector.get_headers()
|
||||||
|
|
||||||
|
self.assertIn('Content-Type', headers)
|
||||||
|
self.assertIn('Authorization', headers)
|
||||||
|
self.assertIn('Accept', headers)
|
||||||
|
self.assertEqual(headers['Content-Type'], 'application/json')
|
||||||
|
self.assertEqual(headers['Accept'], 'application/json')
|
||||||
|
self.assertTrue(headers['Authorization'].startswith('Bearer '))
|
||||||
|
|
||||||
|
def test_get_headers_no_credentials(self):
|
||||||
|
"""Test header generation without credentials."""
|
||||||
|
connector = JiraConnector()
|
||||||
|
|
||||||
|
with self.assertRaises(ValueError) as context:
|
||||||
|
connector.get_headers()
|
||||||
|
|
||||||
|
self.assertIn("Jira credentials not initialized", str(context.exception))
|
||||||
|
|
||||||
|
@patch('requests.get')
|
||||||
|
def test_make_api_request_success(self, mock_get):
|
||||||
|
"""Test successful API request."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {"test": "data"}
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = self.connector.make_api_request("test/endpoint")
|
||||||
|
|
||||||
|
self.assertEqual(result, {"test": "data"})
|
||||||
|
mock_get.assert_called_once()
|
||||||
|
|
||||||
|
@patch('requests.get')
|
||||||
|
def test_make_api_request_failure(self, mock_get):
|
||||||
|
"""Test failed API request."""
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 401
|
||||||
|
mock_response.text = "Unauthorized"
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
with self.assertRaises(Exception) as context:
|
||||||
|
self.connector.make_api_request("test/endpoint")
|
||||||
|
|
||||||
|
self.assertIn("API request failed with status code 401", str(context.exception))
|
||||||
|
|
||||||
|
@patch.object(JiraConnector, 'make_api_request')
|
||||||
|
def test_get_all_projects(self, mock_api_request):
|
||||||
|
"""Test getting all projects."""
|
||||||
|
mock_api_request.return_value = {
|
||||||
|
"values": [
|
||||||
|
{"id": "1", "key": "TEST", "name": "Test Project"},
|
||||||
|
{"id": "2", "key": "DEMO", "name": "Demo Project"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
projects = self.connector.get_all_projects()
|
||||||
|
|
||||||
|
self.assertEqual(len(projects), 2)
|
||||||
|
self.assertEqual(projects[0]["key"], "TEST")
|
||||||
|
self.assertEqual(projects[1]["key"], "DEMO")
|
||||||
|
mock_api_request.assert_called_once_with("project")
|
||||||
|
|
||||||
|
@patch.object(JiraConnector, 'make_api_request')
|
||||||
|
def test_get_all_issues(self, mock_api_request):
|
||||||
|
"""Test getting all issues."""
|
||||||
|
mock_api_request.return_value = {
|
||||||
|
"issues": [
|
||||||
|
{
|
||||||
|
"id": "1",
|
||||||
|
"key": "TEST-1",
|
||||||
|
"fields": {
|
||||||
|
"summary": "Test Issue",
|
||||||
|
"description": "Test Description",
|
||||||
|
"status": {"name": "Open"},
|
||||||
|
"priority": {"name": "High"},
|
||||||
|
"issuetype": {"name": "Bug"},
|
||||||
|
"project": {"key": "TEST"},
|
||||||
|
"created": "2023-01-01T10:00:00.000+0000",
|
||||||
|
"updated": "2023-01-01T12:00:00.000+0000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"total": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
issues = self.connector.get_all_issues()
|
||||||
|
|
||||||
|
self.assertEqual(len(issues), 1)
|
||||||
|
self.assertEqual(issues[0]["key"], "TEST-1")
|
||||||
|
self.assertEqual(issues[0]["fields"]["summary"], "Test Issue")
|
||||||
|
|
||||||
|
def test_format_issue(self):
|
||||||
|
"""Test issue formatting."""
|
||||||
|
raw_issue = {
|
||||||
|
"id": "1",
|
||||||
|
"key": "TEST-1",
|
||||||
|
"fields": {
|
||||||
|
"summary": "Test Issue",
|
||||||
|
"description": "Test Description",
|
||||||
|
"status": {"name": "Open", "statusCategory": {"name": "To Do"}},
|
||||||
|
"priority": {"name": "High"},
|
||||||
|
"issuetype": {"name": "Bug"},
|
||||||
|
"project": {"key": "TEST"},
|
||||||
|
"created": "2023-01-01T10:00:00.000+0000",
|
||||||
|
"updated": "2023-01-01T12:00:00.000+0000",
|
||||||
|
"reporter": {
|
||||||
|
"accountId": "123",
|
||||||
|
"displayName": "John Doe",
|
||||||
|
"emailAddress": "john@example.com"
|
||||||
|
},
|
||||||
|
"assignee": {
|
||||||
|
"accountId": "456",
|
||||||
|
"displayName": "Jane Smith",
|
||||||
|
"emailAddress": "jane@example.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
formatted = self.connector.format_issue(raw_issue)
|
||||||
|
|
||||||
|
self.assertEqual(formatted["id"], "1")
|
||||||
|
self.assertEqual(formatted["key"], "TEST-1")
|
||||||
|
self.assertEqual(formatted["title"], "Test Issue")
|
||||||
|
self.assertEqual(formatted["status"], "Open")
|
||||||
|
self.assertEqual(formatted["priority"], "High")
|
||||||
|
self.assertEqual(formatted["issue_type"], "Bug")
|
||||||
|
self.assertEqual(formatted["project"], "TEST")
|
||||||
|
self.assertEqual(formatted["reporter"]["display_name"], "John Doe")
|
||||||
|
self.assertEqual(formatted["assignee"]["display_name"], "Jane Smith")
|
||||||
|
|
||||||
|
def test_format_date(self):
|
||||||
|
"""Test date formatting."""
|
||||||
|
iso_date = "2023-01-01T10:30:00.000+0000"
|
||||||
|
formatted_date = JiraConnector.format_date(iso_date)
|
||||||
|
|
||||||
|
self.assertEqual(formatted_date, "2023-01-01 10:30:00")
|
||||||
|
|
||||||
|
def test_format_date_invalid(self):
|
||||||
|
"""Test date formatting with invalid input."""
|
||||||
|
formatted_date = JiraConnector.format_date("invalid-date")
|
||||||
|
self.assertEqual(formatted_date, "invalid-date")
|
||||||
|
|
||||||
|
formatted_date = JiraConnector.format_date("")
|
||||||
|
self.assertEqual(formatted_date, "Unknown date")
|
||||||
|
|
||||||
|
formatted_date = JiraConnector.format_date(None)
|
||||||
|
self.assertEqual(formatted_date, "Unknown date")
|
||||||
|
|
||||||
|
def test_format_issue_to_markdown(self):
|
||||||
|
"""Test issue to markdown conversion."""
|
||||||
|
formatted_issue = {
|
||||||
|
"key": "TEST-1",
|
||||||
|
"title": "Test Issue",
|
||||||
|
"status": "Open",
|
||||||
|
"priority": "High",
|
||||||
|
"issue_type": "Bug",
|
||||||
|
"project": "TEST",
|
||||||
|
"assignee": {"display_name": "Jane Smith"},
|
||||||
|
"reporter": {"display_name": "John Doe"},
|
||||||
|
"created_at": "2023-01-01T10:00:00.000+0000",
|
||||||
|
"updated_at": "2023-01-01T12:00:00.000+0000",
|
||||||
|
"description": "Test Description",
|
||||||
|
"comments": []
|
||||||
|
}
|
||||||
|
|
||||||
|
markdown = self.connector.format_issue_to_markdown(formatted_issue)
|
||||||
|
|
||||||
|
self.assertIn("# TEST-1: Test Issue", markdown)
|
||||||
|
self.assertIn("**Status:** Open", markdown)
|
||||||
|
self.assertIn("**Priority:** High", markdown)
|
||||||
|
self.assertIn("**Type:** Bug", markdown)
|
||||||
|
self.assertIn("**Project:** TEST", markdown)
|
||||||
|
self.assertIn("**Assignee:** Jane Smith", markdown)
|
||||||
|
self.assertIn("**Reporter:** John Doe", markdown)
|
||||||
|
self.assertIn("## Description", markdown)
|
||||||
|
self.assertIn("Test Description", markdown)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -19,7 +19,7 @@ from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
from app.utils.check_ownership import check_ownership
|
from app.utils.check_ownership import check_ownership
|
||||||
from pydantic import BaseModel, Field, ValidationError
|
from pydantic import BaseModel, Field, ValidationError
|
||||||
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos, index_linear_issues, index_discord_messages
|
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos, index_linear_issues, index_discord_messages, index_jira_issues
|
||||||
from app.connectors.github_connector import GitHubConnector
|
from app.connectors.github_connector import GitHubConnector
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import logging
|
import logging
|
||||||
|
@ -284,6 +284,7 @@ async def index_connector_content(
|
||||||
- NOTION_CONNECTOR: Indexes pages from all accessible Notion pages
|
- NOTION_CONNECTOR: Indexes pages from all accessible Notion pages
|
||||||
- GITHUB_CONNECTOR: Indexes code and documentation from GitHub repositories
|
- GITHUB_CONNECTOR: Indexes code and documentation from GitHub repositories
|
||||||
- LINEAR_CONNECTOR: Indexes issues and comments from Linear
|
- LINEAR_CONNECTOR: Indexes issues and comments from Linear
|
||||||
|
- JIRA_CONNECTOR: Indexes issues and comments from Jira
|
||||||
- DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels
|
- DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -349,6 +350,12 @@ async def index_connector_content(
|
||||||
background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||||
response_message = "Linear indexing started in the background."
|
response_message = "Linear indexing started in the background."
|
||||||
|
|
||||||
|
elif connector.connector_type == SearchSourceConnectorType.JIRA_CONNECTOR:
|
||||||
|
# Run indexing in background
|
||||||
|
logger.info(f"Triggering Jira indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}")
|
||||||
|
background_tasks.add_task(run_jira_indexing_with_new_session, connector_id, search_space_id, str(user.id), indexing_from, indexing_to)
|
||||||
|
response_message = "Jira indexing started in the background."
|
||||||
|
|
||||||
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
|
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
|
||||||
# Run indexing in background
|
# Run indexing in background
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@ -648,3 +655,44 @@ async def run_discord_indexing(
|
||||||
logger.error(f"Discord indexing failed or no documents processed: {error_or_warning}")
|
logger.error(f"Discord indexing failed or no documents processed: {error_or_warning}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in background Discord indexing task: {str(e)}")
|
logger.error(f"Error in background Discord indexing task: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
# Add new helper functions for Jira indexing
|
||||||
|
async def run_jira_indexing_with_new_session(
|
||||||
|
connector_id: int,
|
||||||
|
search_space_id: int,
|
||||||
|
user_id: str,
|
||||||
|
start_date: str,
|
||||||
|
end_date: str
|
||||||
|
):
|
||||||
|
"""Wrapper to run Jira indexing with its own database session."""
|
||||||
|
logger.info(f"Background task started: Indexing Jira connector {connector_id} into space {search_space_id} from {start_date} to {end_date}")
|
||||||
|
async with async_session_maker() as session:
|
||||||
|
await run_jira_indexing(session, connector_id, search_space_id, user_id, start_date, end_date)
|
||||||
|
logger.info(f"Background task finished: Indexing Jira connector {connector_id}")
|
||||||
|
|
||||||
|
async def run_jira_indexing(
|
||||||
|
session: AsyncSession,
|
||||||
|
connector_id: int,
|
||||||
|
search_space_id: int,
|
||||||
|
user_id: str,
|
||||||
|
start_date: str,
|
||||||
|
end_date: str
|
||||||
|
):
|
||||||
|
"""Runs the Jira indexing task and updates the timestamp."""
|
||||||
|
try:
|
||||||
|
indexed_count, error_message = await index_jira_issues(
|
||||||
|
session, connector_id, search_space_id, user_id, start_date, end_date, update_last_indexed=False
|
||||||
|
)
|
||||||
|
if error_message:
|
||||||
|
logger.error(f"Jira indexing failed for connector {connector_id}: {error_message}")
|
||||||
|
# Optionally update status in DB to indicate failure
|
||||||
|
else:
|
||||||
|
logger.info(f"Jira indexing successful for connector {connector_id}. Indexed {indexed_count} documents.")
|
||||||
|
# Update the last indexed timestamp only on success
|
||||||
|
await update_connector_last_indexed(session, connector_id)
|
||||||
|
await session.commit() # Commit timestamp update
|
||||||
|
except Exception as e:
|
||||||
|
await session.rollback()
|
||||||
|
logger.error(f"Critical error in run_jira_indexing for connector {connector_id}: {e}", exc_info=True)
|
||||||
|
# Optionally update status in DB to indicate failure
|
|
@ -101,6 +101,19 @@ class SearchSourceConnectorBase(BaseModel):
|
||||||
# Ensure the bot token is not empty
|
# Ensure the bot token is not empty
|
||||||
if not config.get("DISCORD_BOT_TOKEN"):
|
if not config.get("DISCORD_BOT_TOKEN"):
|
||||||
raise ValueError("DISCORD_BOT_TOKEN cannot be empty")
|
raise ValueError("DISCORD_BOT_TOKEN cannot be empty")
|
||||||
|
elif connector_type == SearchSourceConnectorType.JIRA_CONNECTOR:
|
||||||
|
# For JIRA_CONNECTOR, allow JIRA_PERSONAL_ACCESS_TOKEN and JIRA_BASE_URL
|
||||||
|
allowed_keys = ["JIRA_PERSONAL_ACCESS_TOKEN", "JIRA_BASE_URL"]
|
||||||
|
if set(config.keys()) != set(allowed_keys):
|
||||||
|
raise ValueError(f"For JIRA_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
|
||||||
|
|
||||||
|
# Ensure the token is not empty
|
||||||
|
if not config.get("JIRA_PERSONAL_ACCESS_TOKEN"):
|
||||||
|
raise ValueError("JIRA_PERSONAL_ACCESS_TOKEN cannot be empty")
|
||||||
|
|
||||||
|
# Ensure the base URL is not empty
|
||||||
|
if not config.get("JIRA_BASE_URL"):
|
||||||
|
raise ValueError("JIRA_BASE_URL cannot be empty")
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
from typing import List, Dict, Optional
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from typing import Dict, List, Optional
|
||||||
from sqlalchemy.future import select
|
|
||||||
from app.retriver.chunks_hybrid_search import ChucksHybridSearchRetriever
|
|
||||||
from app.retriver.documents_hybrid_search import DocumentHybridSearchRetriever
|
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType, Chunk, Document, SearchSpace
|
|
||||||
from tavily import TavilyClient
|
|
||||||
from linkup import LinkupClient
|
|
||||||
from sqlalchemy import func
|
|
||||||
|
|
||||||
from app.agents.researcher.configuration import SearchMode
|
from app.agents.researcher.configuration import SearchMode
|
||||||
|
from app.db import (
|
||||||
|
Chunk,
|
||||||
|
Document,
|
||||||
|
SearchSourceConnector,
|
||||||
|
SearchSourceConnectorType,
|
||||||
|
SearchSpace,
|
||||||
|
)
|
||||||
|
from app.retriver.chunks_hybrid_search import ChucksHybridSearchRetriever
|
||||||
|
from app.retriver.documents_hybrid_search import DocumentHybridSearchRetriever
|
||||||
|
from linkup import LinkupClient
|
||||||
|
from sqlalchemy import func
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from sqlalchemy.future import select
|
||||||
|
from tavily import TavilyClient
|
||||||
|
|
||||||
|
|
||||||
class ConnectorService:
|
class ConnectorService:
|
||||||
|
@ -18,8 +24,12 @@ class ConnectorService:
|
||||||
self.chunk_retriever = ChucksHybridSearchRetriever(session)
|
self.chunk_retriever = ChucksHybridSearchRetriever(session)
|
||||||
self.document_retriever = DocumentHybridSearchRetriever(session)
|
self.document_retriever = DocumentHybridSearchRetriever(session)
|
||||||
self.user_id = user_id
|
self.user_id = user_id
|
||||||
self.source_id_counter = 100000 # High starting value to avoid collisions with existing IDs
|
self.source_id_counter = (
|
||||||
self.counter_lock = asyncio.Lock() # Lock to protect counter in multithreaded environments
|
100000 # High starting value to avoid collisions with existing IDs
|
||||||
|
)
|
||||||
|
self.counter_lock = (
|
||||||
|
asyncio.Lock()
|
||||||
|
) # Lock to protect counter in multithreaded environments
|
||||||
|
|
||||||
async def initialize_counter(self):
|
async def initialize_counter(self):
|
||||||
"""
|
"""
|
||||||
|
@ -38,13 +48,22 @@ class ConnectorService:
|
||||||
)
|
)
|
||||||
chunk_count = result.scalar() or 0
|
chunk_count = result.scalar() or 0
|
||||||
self.source_id_counter = chunk_count + 1
|
self.source_id_counter = chunk_count + 1
|
||||||
print(f"Initialized source_id_counter to {self.source_id_counter} for user {self.user_id}")
|
print(
|
||||||
|
f"Initialized source_id_counter to {self.source_id_counter} for user {self.user_id}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error initializing source_id_counter: {str(e)}")
|
print(f"Error initializing source_id_counter: {str(e)}")
|
||||||
# Fallback to default value
|
# Fallback to default value
|
||||||
self.source_id_counter = 1
|
self.source_id_counter = 1
|
||||||
|
|
||||||
async def search_crawled_urls(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_crawled_urls(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for crawled URLs and return both the source information and langchain documents
|
Search for crawled URLs and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -57,7 +76,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="CRAWLED_URL"
|
document_type="CRAWLED_URL",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
crawled_urls_chunks = await self.document_retriever.hybrid_search(
|
crawled_urls_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -65,7 +84,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="CRAWLED_URL"
|
document_type="CRAWLED_URL",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
crawled_urls_chunks = self._transform_document_results(crawled_urls_chunks)
|
crawled_urls_chunks = self._transform_document_results(crawled_urls_chunks)
|
||||||
|
@ -84,15 +103,18 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(crawled_urls_chunks):
|
for _i, chunk in enumerate(crawled_urls_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a source entry
|
# Create a source entry
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": document.get('title', 'Untitled Document'),
|
"title": document.get("title", "Untitled Document"),
|
||||||
"description": metadata.get('og:description', metadata.get('ogDescription', chunk.get('content', '')[:100])),
|
"description": metadata.get(
|
||||||
"url": metadata.get('url', '')
|
"og:description",
|
||||||
|
metadata.get("ogDescription", chunk.get("content", "")[:100]),
|
||||||
|
),
|
||||||
|
"url": metadata.get("url", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -108,7 +130,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, crawled_urls_chunks
|
return result_object, crawled_urls_chunks
|
||||||
|
|
||||||
async def search_files(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_files(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for files and return both the source information and langchain documents
|
Search for files and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -121,7 +150,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="FILE"
|
document_type="FILE",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
files_chunks = await self.document_retriever.hybrid_search(
|
files_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -129,7 +158,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="FILE"
|
document_type="FILE",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
files_chunks = self._transform_document_results(files_chunks)
|
files_chunks = self._transform_document_results(files_chunks)
|
||||||
|
@ -148,15 +177,18 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(files_chunks):
|
for _i, chunk in enumerate(files_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a source entry
|
# Create a source entry
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": document.get('title', 'Untitled Document'),
|
"title": document.get("title", "Untitled Document"),
|
||||||
"description": metadata.get('og:description', metadata.get('ogDescription', chunk.get('content', '')[:100])),
|
"description": metadata.get(
|
||||||
"url": metadata.get('url', '')
|
"og:description",
|
||||||
|
metadata.get("ogDescription", chunk.get("content", "")[:100]),
|
||||||
|
),
|
||||||
|
"url": metadata.get("url", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -185,19 +217,23 @@ class ConnectorService:
|
||||||
"""
|
"""
|
||||||
transformed_results = []
|
transformed_results = []
|
||||||
for doc in document_results:
|
for doc in document_results:
|
||||||
transformed_results.append({
|
transformed_results.append(
|
||||||
'document': {
|
{
|
||||||
'id': doc.get('document_id'),
|
"document": {
|
||||||
'title': doc.get('title', 'Untitled Document'),
|
"id": doc.get("document_id"),
|
||||||
'document_type': doc.get('document_type'),
|
"title": doc.get("title", "Untitled Document"),
|
||||||
'metadata': doc.get('metadata', {}),
|
"document_type": doc.get("document_type"),
|
||||||
|
"metadata": doc.get("metadata", {}),
|
||||||
},
|
},
|
||||||
'content': doc.get('chunks_content', doc.get('content', '')),
|
"content": doc.get("chunks_content", doc.get("content", "")),
|
||||||
'score': doc.get('score', 0.0)
|
"score": doc.get("score", 0.0),
|
||||||
})
|
}
|
||||||
|
)
|
||||||
return transformed_results
|
return transformed_results
|
||||||
|
|
||||||
async def get_connector_by_type(self, user_id: str, connector_type: SearchSourceConnectorType) -> Optional[SearchSourceConnector]:
|
async def get_connector_by_type(
|
||||||
|
self, user_id: str, connector_type: SearchSourceConnectorType
|
||||||
|
) -> Optional[SearchSourceConnector]:
|
||||||
"""
|
"""
|
||||||
Get a connector by type for a specific user
|
Get a connector by type for a specific user
|
||||||
|
|
||||||
|
@ -209,15 +245,16 @@ class ConnectorService:
|
||||||
Optional[SearchSourceConnector]: The connector if found, None otherwise
|
Optional[SearchSourceConnector]: The connector if found, None otherwise
|
||||||
"""
|
"""
|
||||||
result = await self.session.execute(
|
result = await self.session.execute(
|
||||||
select(SearchSourceConnector)
|
select(SearchSourceConnector).filter(
|
||||||
.filter(
|
|
||||||
SearchSourceConnector.user_id == user_id,
|
SearchSourceConnector.user_id == user_id,
|
||||||
SearchSourceConnector.connector_type == connector_type
|
SearchSourceConnector.connector_type == connector_type,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return result.scalars().first()
|
return result.scalars().first()
|
||||||
|
|
||||||
async def search_tavily(self, user_query: str, user_id: str, top_k: int = 20) -> tuple:
|
async def search_tavily(
|
||||||
|
self, user_query: str, user_id: str, top_k: int = 20
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search using Tavily API and return both the source information and documents
|
Search using Tavily API and return both the source information and documents
|
||||||
|
|
||||||
|
@ -230,7 +267,9 @@ class ConnectorService:
|
||||||
tuple: (sources_info, documents)
|
tuple: (sources_info, documents)
|
||||||
"""
|
"""
|
||||||
# Get Tavily connector configuration
|
# Get Tavily connector configuration
|
||||||
tavily_connector = await self.get_connector_by_type(user_id, SearchSourceConnectorType.TAVILY_API)
|
tavily_connector = await self.get_connector_by_type(
|
||||||
|
user_id, SearchSourceConnectorType.TAVILY_API
|
||||||
|
)
|
||||||
|
|
||||||
if not tavily_connector:
|
if not tavily_connector:
|
||||||
# Return empty results if no Tavily connector is configured
|
# Return empty results if no Tavily connector is configured
|
||||||
|
@ -250,7 +289,7 @@ class ConnectorService:
|
||||||
response = tavily_client.search(
|
response = tavily_client.search(
|
||||||
query=user_query,
|
query=user_query,
|
||||||
max_results=top_k,
|
max_results=top_k,
|
||||||
search_depth="advanced" # Use advanced search for better results
|
search_depth="advanced", # Use advanced search for better results
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract results from Tavily response
|
# Extract results from Tavily response
|
||||||
|
@ -271,13 +310,12 @@ class ConnectorService:
|
||||||
|
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for i, result in enumerate(tavily_results):
|
for i, result in enumerate(tavily_results):
|
||||||
|
|
||||||
# Create a source entry
|
# Create a source entry
|
||||||
source = {
|
source = {
|
||||||
"id": self.source_id_counter,
|
"id": self.source_id_counter,
|
||||||
"title": result.get("title", "Tavily Result"),
|
"title": result.get("title", "Tavily Result"),
|
||||||
"description": result.get("content", "")[:100],
|
"description": result.get("content", "")[:100],
|
||||||
"url": result.get("url", "")
|
"url": result.get("url", ""),
|
||||||
}
|
}
|
||||||
sources_list.append(source)
|
sources_list.append(source)
|
||||||
|
|
||||||
|
@ -293,9 +331,9 @@ class ConnectorService:
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"url": result.get("url", ""),
|
"url": result.get("url", ""),
|
||||||
"published_date": result.get("published_date", ""),
|
"published_date": result.get("published_date", ""),
|
||||||
"source": "TAVILY_API"
|
"source": "TAVILY_API",
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -320,7 +358,14 @@ class ConnectorService:
|
||||||
"sources": [],
|
"sources": [],
|
||||||
}, []
|
}, []
|
||||||
|
|
||||||
async def search_slack(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_slack(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for slack and return both the source information and langchain documents
|
Search for slack and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -333,7 +378,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="SLACK_CONNECTOR"
|
document_type="SLACK_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
slack_chunks = await self.document_retriever.hybrid_search(
|
slack_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -341,7 +386,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="SLACK_CONNECTOR"
|
document_type="SLACK_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
slack_chunks = self._transform_document_results(slack_chunks)
|
slack_chunks = self._transform_document_results(slack_chunks)
|
||||||
|
@ -360,13 +405,13 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(slack_chunks):
|
for _i, chunk in enumerate(slack_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a mapped source entry with Slack-specific metadata
|
# Create a mapped source entry with Slack-specific metadata
|
||||||
channel_name = metadata.get('channel_name', 'Unknown Channel')
|
channel_name = metadata.get("channel_name", "Unknown Channel")
|
||||||
channel_id = metadata.get('channel_id', '')
|
channel_id = metadata.get("channel_id", "")
|
||||||
message_date = metadata.get('start_date', '')
|
message_date = metadata.get("start_date", "")
|
||||||
|
|
||||||
# Create a more descriptive title for Slack messages
|
# Create a more descriptive title for Slack messages
|
||||||
title = f"Slack: {channel_name}"
|
title = f"Slack: {channel_name}"
|
||||||
|
@ -374,7 +419,7 @@ class ConnectorService:
|
||||||
title += f" ({message_date})"
|
title += f" ({message_date})"
|
||||||
|
|
||||||
# Create a more descriptive description for Slack messages
|
# Create a more descriptive description for Slack messages
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -384,7 +429,7 @@ class ConnectorService:
|
||||||
url = f"https://slack.com/app_redirect?channel={channel_id}"
|
url = f"https://slack.com/app_redirect?channel={channel_id}"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -403,7 +448,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, slack_chunks
|
return result_object, slack_chunks
|
||||||
|
|
||||||
async def search_notion(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_notion(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for Notion pages and return both the source information and langchain documents
|
Search for Notion pages and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -422,7 +474,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="NOTION_CONNECTOR"
|
document_type="NOTION_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
notion_chunks = await self.document_retriever.hybrid_search(
|
notion_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -430,7 +482,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="NOTION_CONNECTOR"
|
document_type="NOTION_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
notion_chunks = self._transform_document_results(notion_chunks)
|
notion_chunks = self._transform_document_results(notion_chunks)
|
||||||
|
@ -449,13 +501,13 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(notion_chunks):
|
for _i, chunk in enumerate(notion_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a mapped source entry with Notion-specific metadata
|
# Create a mapped source entry with Notion-specific metadata
|
||||||
page_title = metadata.get('page_title', 'Untitled Page')
|
page_title = metadata.get("page_title", "Untitled Page")
|
||||||
page_id = metadata.get('page_id', '')
|
page_id = metadata.get("page_id", "")
|
||||||
indexed_at = metadata.get('indexed_at', '')
|
indexed_at = metadata.get("indexed_at", "")
|
||||||
|
|
||||||
# Create a more descriptive title for Notion pages
|
# Create a more descriptive title for Notion pages
|
||||||
title = f"Notion: {page_title}"
|
title = f"Notion: {page_title}"
|
||||||
|
@ -463,7 +515,7 @@ class ConnectorService:
|
||||||
title += f" (indexed: {indexed_at})"
|
title += f" (indexed: {indexed_at})"
|
||||||
|
|
||||||
# Create a more descriptive description for Notion pages
|
# Create a more descriptive description for Notion pages
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -474,7 +526,7 @@ class ConnectorService:
|
||||||
url = f"https://notion.so/{page_id.replace('-', '')}"
|
url = f"https://notion.so/{page_id.replace('-', '')}"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -493,7 +545,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, notion_chunks
|
return result_object, notion_chunks
|
||||||
|
|
||||||
async def search_extension(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_extension(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for extension data and return both the source information and langchain documents
|
Search for extension data and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -512,7 +571,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="EXTENSION"
|
document_type="EXTENSION",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
extension_chunks = await self.document_retriever.hybrid_search(
|
extension_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -520,7 +579,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="EXTENSION"
|
document_type="EXTENSION",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
extension_chunks = self._transform_document_results(extension_chunks)
|
extension_chunks = self._transform_document_results(extension_chunks)
|
||||||
|
@ -539,15 +598,17 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for i, chunk in enumerate(extension_chunks):
|
for i, chunk in enumerate(extension_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Extract extension-specific metadata
|
# Extract extension-specific metadata
|
||||||
webpage_title = metadata.get('VisitedWebPageTitle', 'Untitled Page')
|
webpage_title = metadata.get("VisitedWebPageTitle", "Untitled Page")
|
||||||
webpage_url = metadata.get('VisitedWebPageURL', '')
|
webpage_url = metadata.get("VisitedWebPageURL", "")
|
||||||
visit_date = metadata.get('VisitedWebPageDateWithTimeInISOString', '')
|
visit_date = metadata.get("VisitedWebPageDateWithTimeInISOString", "")
|
||||||
visit_duration = metadata.get('VisitedWebPageVisitDurationInMilliseconds', '')
|
visit_duration = metadata.get(
|
||||||
browsing_session_id = metadata.get('BrowsingSessionId', '')
|
"VisitedWebPageVisitDurationInMilliseconds", ""
|
||||||
|
)
|
||||||
|
browsing_session_id = metadata.get("BrowsingSessionId", "")
|
||||||
|
|
||||||
# Create a more descriptive title for extension data
|
# Create a more descriptive title for extension data
|
||||||
title = webpage_title
|
title = webpage_title
|
||||||
|
@ -555,14 +616,18 @@ class ConnectorService:
|
||||||
# Format the date for display (simplified)
|
# Format the date for display (simplified)
|
||||||
try:
|
try:
|
||||||
# Just extract the date part for display
|
# Just extract the date part for display
|
||||||
formatted_date = visit_date.split('T')[0] if 'T' in visit_date else visit_date
|
formatted_date = (
|
||||||
|
visit_date.split("T")[0]
|
||||||
|
if "T" in visit_date
|
||||||
|
else visit_date
|
||||||
|
)
|
||||||
title += f" (visited: {formatted_date})"
|
title += f" (visited: {formatted_date})"
|
||||||
except:
|
except:
|
||||||
# Fallback if date parsing fails
|
# Fallback if date parsing fails
|
||||||
title += f" (visited: {visit_date})"
|
title += f" (visited: {visit_date})"
|
||||||
|
|
||||||
# Create a more descriptive description for extension data
|
# Create a more descriptive description for extension data
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -573,7 +638,7 @@ class ConnectorService:
|
||||||
if duration_seconds < 60:
|
if duration_seconds < 60:
|
||||||
duration_text = f"{duration_seconds:.1f} seconds"
|
duration_text = f"{duration_seconds:.1f} seconds"
|
||||||
else:
|
else:
|
||||||
duration_text = f"{duration_seconds/60:.1f} minutes"
|
duration_text = f"{duration_seconds / 60:.1f} minutes"
|
||||||
|
|
||||||
if description:
|
if description:
|
||||||
description += f" | Duration: {duration_text}"
|
description += f" | Duration: {duration_text}"
|
||||||
|
@ -582,10 +647,10 @@ class ConnectorService:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": webpage_url
|
"url": webpage_url,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -601,7 +666,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, extension_chunks
|
return result_object, extension_chunks
|
||||||
|
|
||||||
async def search_youtube(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_youtube(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for YouTube videos and return both the source information and langchain documents
|
Search for YouTube videos and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -620,7 +692,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="YOUTUBE_VIDEO"
|
document_type="YOUTUBE_VIDEO",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
youtube_chunks = await self.document_retriever.hybrid_search(
|
youtube_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -628,7 +700,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="YOUTUBE_VIDEO"
|
document_type="YOUTUBE_VIDEO",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
youtube_chunks = self._transform_document_results(youtube_chunks)
|
youtube_chunks = self._transform_document_results(youtube_chunks)
|
||||||
|
@ -647,13 +719,13 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(youtube_chunks):
|
for _i, chunk in enumerate(youtube_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Extract YouTube-specific metadata
|
# Extract YouTube-specific metadata
|
||||||
video_title = metadata.get('video_title', 'Untitled Video')
|
video_title = metadata.get("video_title", "Untitled Video")
|
||||||
video_id = metadata.get('video_id', '')
|
video_id = metadata.get("video_id", "")
|
||||||
channel_name = metadata.get('channel_name', '')
|
channel_name = metadata.get("channel_name", "")
|
||||||
# published_date = metadata.get('published_date', '')
|
# published_date = metadata.get('published_date', '')
|
||||||
|
|
||||||
# Create a more descriptive title for YouTube videos
|
# Create a more descriptive title for YouTube videos
|
||||||
|
@ -662,7 +734,9 @@ class ConnectorService:
|
||||||
title += f" - {channel_name}"
|
title += f" - {channel_name}"
|
||||||
|
|
||||||
# Create a more descriptive description for YouTube videos
|
# Create a more descriptive description for YouTube videos
|
||||||
description = metadata.get('description', chunk.get('content', '')[:100])
|
description = metadata.get(
|
||||||
|
"description", chunk.get("content", "")[:100]
|
||||||
|
)
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -670,12 +744,12 @@ class ConnectorService:
|
||||||
url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
|
url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
"video_id": video_id, # Additional field for YouTube videos
|
"video_id": video_id, # Additional field for YouTube videos
|
||||||
"channel_name": channel_name # Additional field for YouTube videos
|
"channel_name": channel_name, # Additional field for YouTube videos
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -691,7 +765,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, youtube_chunks
|
return result_object, youtube_chunks
|
||||||
|
|
||||||
async def search_github(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_github(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: int,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for GitHub documents and return both the source information and langchain documents
|
Search for GitHub documents and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -704,7 +785,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="GITHUB_CONNECTOR"
|
document_type="GITHUB_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
github_chunks = await self.document_retriever.hybrid_search(
|
github_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -712,7 +793,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="GITHUB_CONNECTOR"
|
document_type="GITHUB_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
github_chunks = self._transform_document_results(github_chunks)
|
github_chunks = self._transform_document_results(github_chunks)
|
||||||
|
@ -731,15 +812,19 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(github_chunks):
|
for _i, chunk in enumerate(github_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a source entry
|
# Create a source entry
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": document.get('title', 'GitHub Document'), # Use specific title if available
|
"title": document.get(
|
||||||
"description": metadata.get('description', chunk.get('content', '')[:100]), # Use description or content preview
|
"title", "GitHub Document"
|
||||||
"url": metadata.get('url', '') # Use URL if available in metadata
|
), # Use specific title if available
|
||||||
|
"description": metadata.get(
|
||||||
|
"description", chunk.get("content", "")[:100]
|
||||||
|
), # Use description or content preview
|
||||||
|
"url": metadata.get("url", ""), # Use URL if available in metadata
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -755,7 +840,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, github_chunks
|
return result_object, github_chunks
|
||||||
|
|
||||||
async def search_linear(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_linear(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for Linear issues and comments and return both the source information and langchain documents
|
Search for Linear issues and comments and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -774,7 +866,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="LINEAR_CONNECTOR"
|
document_type="LINEAR_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
linear_chunks = await self.document_retriever.hybrid_search(
|
linear_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -782,7 +874,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="LINEAR_CONNECTOR"
|
document_type="LINEAR_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
linear_chunks = self._transform_document_results(linear_chunks)
|
linear_chunks = self._transform_document_results(linear_chunks)
|
||||||
|
@ -801,14 +893,14 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(linear_chunks):
|
for _i, chunk in enumerate(linear_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Extract Linear-specific metadata
|
# Extract Linear-specific metadata
|
||||||
issue_identifier = metadata.get('issue_identifier', '')
|
issue_identifier = metadata.get("issue_identifier", "")
|
||||||
issue_title = metadata.get('issue_title', 'Untitled Issue')
|
issue_title = metadata.get("issue_title", "Untitled Issue")
|
||||||
issue_state = metadata.get('state', '')
|
issue_state = metadata.get("state", "")
|
||||||
comment_count = metadata.get('comment_count', 0)
|
comment_count = metadata.get("comment_count", 0)
|
||||||
|
|
||||||
# Create a more descriptive title for Linear issues
|
# Create a more descriptive title for Linear issues
|
||||||
title = f"Linear: {issue_identifier} - {issue_title}"
|
title = f"Linear: {issue_identifier} - {issue_title}"
|
||||||
|
@ -816,7 +908,7 @@ class ConnectorService:
|
||||||
title += f" ({issue_state})"
|
title += f" ({issue_state})"
|
||||||
|
|
||||||
# Create a more descriptive description for Linear issues
|
# Create a more descriptive description for Linear issues
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -835,13 +927,13 @@ class ConnectorService:
|
||||||
url = f"https://linear.app/issue/{issue_identifier}"
|
url = f"https://linear.app/issue/{issue_identifier}"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
"issue_identifier": issue_identifier,
|
"issue_identifier": issue_identifier,
|
||||||
"state": issue_state,
|
"state": issue_state,
|
||||||
"comment_count": comment_count
|
"comment_count": comment_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -857,7 +949,14 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, linear_chunks
|
return result_object, linear_chunks
|
||||||
|
|
||||||
async def search_jira(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_jira(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for Jira issues and comments and return both the source information and langchain documents
|
Search for Jira issues and comments and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -877,7 +976,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="JIRA_CONNECTOR"
|
document_type="JIRA_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
jira_chunks = await self.document_retriever.hybrid_search(
|
jira_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -885,7 +984,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="JIRA_CONNECTOR"
|
document_type="JIRA_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
jira_chunks = self._transform_document_results(jira_chunks)
|
jira_chunks = self._transform_document_results(jira_chunks)
|
||||||
|
@ -904,16 +1003,16 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for _i, chunk in enumerate(jira_chunks):
|
for _i, chunk in enumerate(jira_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Extract Jira-specific metadata
|
# Extract Jira-specific metadata
|
||||||
issue_key = metadata.get('issue_key', '')
|
issue_key = metadata.get("issue_key", "")
|
||||||
issue_title = metadata.get('issue_title', 'Untitled Issue')
|
issue_title = metadata.get("issue_title", "Untitled Issue")
|
||||||
status = metadata.get('status', '')
|
status = metadata.get("status", "")
|
||||||
priority = metadata.get('priority', '')
|
priority = metadata.get("priority", "")
|
||||||
issue_type = metadata.get('issue_type', '')
|
issue_type = metadata.get("issue_type", "")
|
||||||
comment_count = metadata.get('comment_count', 0)
|
comment_count = metadata.get("comment_count", 0)
|
||||||
|
|
||||||
# Create a more descriptive title for Jira issues
|
# Create a more descriptive title for Jira issues
|
||||||
title = f"Jira: {issue_key} - {issue_title}"
|
title = f"Jira: {issue_key} - {issue_title}"
|
||||||
|
@ -921,7 +1020,7 @@ class ConnectorService:
|
||||||
title += f" ({status})"
|
title += f" ({status})"
|
||||||
|
|
||||||
# Create a more descriptive description for Jira issues
|
# Create a more descriptive description for Jira issues
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
|
@ -938,16 +1037,16 @@ class ConnectorService:
|
||||||
if description:
|
if description:
|
||||||
description += f" | {' | '.join(info_parts)}"
|
description += f" | {' | '.join(info_parts)}"
|
||||||
else:
|
else:
|
||||||
description = ' | '.join(info_parts)
|
description = " | ".join(info_parts)
|
||||||
|
|
||||||
# For URL, we could construct a URL to the Jira issue if we have the base URL
|
# For URL, we could construct a URL to the Jira issue if we have the base URL
|
||||||
# For now, use a generic placeholder
|
# For now, use a generic placeholder
|
||||||
url = ""
|
url = ""
|
||||||
if issue_key and metadata.get('base_url'):
|
if issue_key and metadata.get("base_url"):
|
||||||
url = f"{metadata.get('base_url')}/browse/{issue_key}"
|
url = f"{metadata.get('base_url')}/browse/{issue_key}"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -955,7 +1054,7 @@ class ConnectorService:
|
||||||
"status": status,
|
"status": status,
|
||||||
"priority": priority,
|
"priority": priority,
|
||||||
"issue_type": issue_type,
|
"issue_type": issue_type,
|
||||||
"comment_count": comment_count
|
"comment_count": comment_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -971,7 +1070,9 @@ class ConnectorService:
|
||||||
|
|
||||||
return result_object, jira_chunks
|
return result_object, jira_chunks
|
||||||
|
|
||||||
async def search_linkup(self, user_query: str, user_id: str, mode: str = "standard") -> tuple:
|
async def search_linkup(
|
||||||
|
self, user_query: str, user_id: str, mode: str = "standard"
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search using Linkup API and return both the source information and documents
|
Search using Linkup API and return both the source information and documents
|
||||||
|
|
||||||
|
@ -984,7 +1085,9 @@ class ConnectorService:
|
||||||
tuple: (sources_info, documents)
|
tuple: (sources_info, documents)
|
||||||
"""
|
"""
|
||||||
# Get Linkup connector configuration
|
# Get Linkup connector configuration
|
||||||
linkup_connector = await self.get_connector_by_type(user_id, SearchSourceConnectorType.LINKUP_API)
|
linkup_connector = await self.get_connector_by_type(
|
||||||
|
user_id, SearchSourceConnectorType.LINKUP_API
|
||||||
|
)
|
||||||
|
|
||||||
if not linkup_connector:
|
if not linkup_connector:
|
||||||
# Return empty results if no Linkup connector is configured
|
# Return empty results if no Linkup connector is configured
|
||||||
|
@ -1008,7 +1111,7 @@ class ConnectorService:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract results from Linkup response - access as attribute instead of using .get()
|
# Extract results from Linkup response - access as attribute instead of using .get()
|
||||||
linkup_results = response.results if hasattr(response, 'results') else []
|
linkup_results = response.results if hasattr(response, "results") else []
|
||||||
|
|
||||||
# Only proceed if we have results
|
# Only proceed if we have results
|
||||||
if not linkup_results:
|
if not linkup_results:
|
||||||
|
@ -1026,33 +1129,41 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for i, result in enumerate(linkup_results):
|
for i, result in enumerate(linkup_results):
|
||||||
# Only process results that have content
|
# Only process results that have content
|
||||||
if not hasattr(result, 'content') or not result.content:
|
if not hasattr(result, "content") or not result.content:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Create a source entry
|
# Create a source entry
|
||||||
source = {
|
source = {
|
||||||
"id": self.source_id_counter,
|
"id": self.source_id_counter,
|
||||||
"title": result.name if hasattr(result, 'name') else "Linkup Result",
|
"title": (
|
||||||
"description": result.content[:100] if hasattr(result, 'content') else "",
|
result.name if hasattr(result, "name") else "Linkup Result"
|
||||||
"url": result.url if hasattr(result, 'url') else ""
|
),
|
||||||
|
"description": (
|
||||||
|
result.content[:100] if hasattr(result, "content") else ""
|
||||||
|
),
|
||||||
|
"url": result.url if hasattr(result, "url") else "",
|
||||||
}
|
}
|
||||||
sources_list.append(source)
|
sources_list.append(source)
|
||||||
|
|
||||||
# Create a document entry
|
# Create a document entry
|
||||||
document = {
|
document = {
|
||||||
"chunk_id": f"linkup_chunk_{i}",
|
"chunk_id": f"linkup_chunk_{i}",
|
||||||
"content": result.content if hasattr(result, 'content') else "",
|
"content": result.content if hasattr(result, "content") else "",
|
||||||
"score": 1.0, # Default score since not provided by Linkup
|
"score": 1.0, # Default score since not provided by Linkup
|
||||||
"document": {
|
"document": {
|
||||||
"id": self.source_id_counter,
|
"id": self.source_id_counter,
|
||||||
"title": result.name if hasattr(result, 'name') else "Linkup Result",
|
"title": (
|
||||||
|
result.name
|
||||||
|
if hasattr(result, "name")
|
||||||
|
else "Linkup Result"
|
||||||
|
),
|
||||||
"document_type": "LINKUP_API",
|
"document_type": "LINKUP_API",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"url": result.url if hasattr(result, 'url') else "",
|
"url": result.url if hasattr(result, "url") else "",
|
||||||
"type": result.type if hasattr(result, 'type') else "",
|
"type": result.type if hasattr(result, "type") else "",
|
||||||
"source": "LINKUP_API"
|
"source": "LINKUP_API",
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
self.source_id_counter += 1
|
self.source_id_counter += 1
|
||||||
|
@ -1077,7 +1188,14 @@ class ConnectorService:
|
||||||
"sources": [],
|
"sources": [],
|
||||||
}, []
|
}, []
|
||||||
|
|
||||||
async def search_discord(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
async def search_discord(
|
||||||
|
self,
|
||||||
|
user_query: str,
|
||||||
|
user_id: str,
|
||||||
|
search_space_id: int,
|
||||||
|
top_k: int = 20,
|
||||||
|
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||||
|
) -> tuple:
|
||||||
"""
|
"""
|
||||||
Search for Discord messages and return both the source information and langchain documents
|
Search for Discord messages and return both the source information and langchain documents
|
||||||
|
|
||||||
|
@ -1096,7 +1214,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="DISCORD_CONNECTOR"
|
document_type="DISCORD_CONNECTOR",
|
||||||
)
|
)
|
||||||
elif search_mode == SearchMode.DOCUMENTS:
|
elif search_mode == SearchMode.DOCUMENTS:
|
||||||
discord_chunks = await self.document_retriever.hybrid_search(
|
discord_chunks = await self.document_retriever.hybrid_search(
|
||||||
|
@ -1104,7 +1222,7 @@ class ConnectorService:
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
document_type="DISCORD_CONNECTOR"
|
document_type="DISCORD_CONNECTOR",
|
||||||
)
|
)
|
||||||
# Transform document retriever results to match expected format
|
# Transform document retriever results to match expected format
|
||||||
discord_chunks = self._transform_document_results(discord_chunks)
|
discord_chunks = self._transform_document_results(discord_chunks)
|
||||||
|
@ -1123,13 +1241,13 @@ class ConnectorService:
|
||||||
async with self.counter_lock:
|
async with self.counter_lock:
|
||||||
for i, chunk in enumerate(discord_chunks):
|
for i, chunk in enumerate(discord_chunks):
|
||||||
# Extract document metadata
|
# Extract document metadata
|
||||||
document = chunk.get('document', {})
|
document = chunk.get("document", {})
|
||||||
metadata = document.get('metadata', {})
|
metadata = document.get("metadata", {})
|
||||||
|
|
||||||
# Create a mapped source entry with Discord-specific metadata
|
# Create a mapped source entry with Discord-specific metadata
|
||||||
channel_name = metadata.get('channel_name', 'Unknown Channel')
|
channel_name = metadata.get("channel_name", "Unknown Channel")
|
||||||
channel_id = metadata.get('channel_id', '')
|
channel_id = metadata.get("channel_id", "")
|
||||||
message_date = metadata.get('start_date', '')
|
message_date = metadata.get("start_date", "")
|
||||||
|
|
||||||
# Create a more descriptive title for Discord messages
|
# Create a more descriptive title for Discord messages
|
||||||
title = f"Discord: {channel_name}"
|
title = f"Discord: {channel_name}"
|
||||||
|
@ -1137,12 +1255,12 @@ class ConnectorService:
|
||||||
title += f" ({message_date})"
|
title += f" ({message_date})"
|
||||||
|
|
||||||
# Create a more descriptive description for Discord messages
|
# Create a more descriptive description for Discord messages
|
||||||
description = chunk.get('content', '')[:100]
|
description = chunk.get("content", "")[:100]
|
||||||
if len(description) == 100:
|
if len(description) == 100:
|
||||||
description += "..."
|
description += "..."
|
||||||
|
|
||||||
url = ""
|
url = ""
|
||||||
guild_id = metadata.get('guild_id', '')
|
guild_id = metadata.get("guild_id", "")
|
||||||
if guild_id and channel_id:
|
if guild_id and channel_id:
|
||||||
url = f"https://discord.com/channels/{guild_id}/{channel_id}"
|
url = f"https://discord.com/channels/{guild_id}/{channel_id}"
|
||||||
elif channel_id:
|
elif channel_id:
|
||||||
|
@ -1150,7 +1268,7 @@ class ConnectorService:
|
||||||
url = f"https://discord.com/channels/@me/{channel_id}"
|
url = f"https://discord.com/channels/@me/{channel_id}"
|
||||||
|
|
||||||
source = {
|
source = {
|
||||||
"id": document.get('id', self.source_id_counter),
|
"id": document.get("id", self.source_id_counter),
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -1168,5 +1286,3 @@ class ConnectorService:
|
||||||
}
|
}
|
||||||
|
|
||||||
return result_object, discord_chunks
|
return result_object, discord_chunks
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue