feat: discord knowledge retrieval

This commit is contained in:
Muhamad Aji Wibisono 2025-06-02 18:43:32 +07:00
parent 158976e802
commit 1d67a87b82
5 changed files with 124 additions and 1 deletions

View file

@ -400,6 +400,23 @@ async def fetch_relevant_documents(
if streaming_service and writer:
streaming_service.only_update_terminal(f"🔗 Found {len(linkup_chunks)} Linkup results related to your query")
writer({"yeild_value": streaming_service._format_annotations()})
elif connector == "DISCORD_CONNECTOR":
source_object, discord_chunks = await connector_service.search_discord(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
search_mode=search_mode
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(discord_chunks)
# Stream found document count
if streaming_service and writer:
streaming_service.only_update_terminal(f"🗨️ Found {len(discord_chunks)} Discord messages related to your query")
writer({"yeild_value": streaming_service._format_annotations()})
except Exception as e:

View file

@ -15,6 +15,7 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)
</knowledge_sources>

View file

@ -7,7 +7,7 @@ PUT /search-source-connectors/{connector_id} - Update a specific connector
DELETE /search-source-connectors/{connector_id} - Delete a specific connector
POST /search-source-connectors/{connector_id}/index - Index content from a connector to a search space
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR).
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR, DISCORD_CONNECTOR).
"""
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
from sqlalchemy.ext.asyncio import AsyncSession
@ -282,6 +282,7 @@ async def index_connector_content(
- NOTION_CONNECTOR: Indexes pages from all accessible Notion pages
- GITHUB_CONNECTOR: Indexes code and documentation from GitHub repositories
- LINEAR_CONNECTOR: Indexes issues and comments from Linear
- DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels
Args:
connector_id: ID of the connector to use

View file

@ -81,6 +81,7 @@ class SearchSourceConnectorBase(BaseModel):
repo_full_names = config.get("repo_full_names")
if not isinstance(repo_full_names, list) or not repo_full_names:
raise ValueError("repo_full_names must be a non-empty list of strings")
elif connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR:
# For LINEAR_CONNECTOR, only allow LINEAR_API_KEY
allowed_keys = ["LINEAR_API_KEY"]
@ -90,6 +91,16 @@ class SearchSourceConnectorBase(BaseModel):
# Ensure the token is not empty
if not config.get("LINEAR_API_KEY"):
raise ValueError("LINEAR_API_KEY cannot be empty")
elif connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
# For DISCORD_CONNECTOR, only allow DISCORD_BOT_TOKEN
allowed_keys = ["DISCORD_BOT_TOKEN"]
if set(config.keys()) != set(allowed_keys):
raise ValueError(f"For DISCORD_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
# Ensure the bot token is not empty
if not config.get("DISCORD_BOT_TOKEN"):
raise ValueError("DISCORD_BOT_TOKEN cannot be empty")
return config

View file

@ -959,3 +959,96 @@ class ConnectorService:
"type": "LINKUP_API",
"sources": [],
}, []
async def search_discord(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
"""
Search for Discord messages and return both the source information and langchain documents
Args:
user_query: The user's query
user_id: The user's ID
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
Returns:
tuple: (sources_info, langchain_documents)
"""
if search_mode == SearchMode.CHUNKS:
discord_chunks = await self.chunk_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="DISCORD_CONNECTOR"
)
elif search_mode == SearchMode.DOCUMENTS:
discord_chunks = await self.document_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="DISCORD_CONNECTOR"
)
# Transform document retriever results to match expected format
discord_chunks = self._transform_document_results(discord_chunks)
# Early return if no results
if not discord_chunks:
return {
"id": 11,
"name": "Discord",
"type": "DISCORD_CONNECTOR",
"sources": [],
}, []
# Process each chunk and create sources directly without deduplication
sources_list = []
async with self.counter_lock:
for i, chunk in enumerate(discord_chunks):
# Fix for UI
discord_chunks[i]['document']['id'] = self.source_id_counter
# Extract document metadata
document = chunk.get('document', {})
metadata = document.get('metadata', {})
# Create a mapped source entry with Discord-specific metadata
channel_name = metadata.get('channel_name', 'Unknown Channel')
channel_id = metadata.get('channel_id', '')
message_date = metadata.get('start_date', '')
# Create a more descriptive title for Discord messages
title = f"Discord: {channel_name}"
if message_date:
title += f" ({message_date})"
# Create a more descriptive description for Discord messages
description = chunk.get('content', '')[:100]
if len(description) == 100:
description += "..."
# For URL, we can use a placeholder or construct a URL to the Discord channel if available
url = ""
if channel_id:
url = f"https://discord.com/channels/@me/{channel_id}"
source = {
"id": self.source_id_counter,
"title": title,
"description": description,
"url": url,
}
self.source_id_counter += 1
sources_list.append(source)
# Create result object
result_object = {
"id": 11,
"name": "Discord",
"type": "DISCORD_CONNECTOR",
"sources": sources_list,
}
return result_object, discord_chunks