mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-01 10:09:08 +00:00
feat: discord knowledge retrieval
This commit is contained in:
parent
158976e802
commit
1d67a87b82
5 changed files with 124 additions and 1 deletions
|
@ -400,6 +400,23 @@ async def fetch_relevant_documents(
|
|||
if streaming_service and writer:
|
||||
streaming_service.only_update_terminal(f"🔗 Found {len(linkup_chunks)} Linkup results related to your query")
|
||||
writer({"yeild_value": streaming_service._format_annotations()})
|
||||
|
||||
elif connector == "DISCORD_CONNECTOR":
|
||||
source_object, discord_chunks = await connector_service.search_discord(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode
|
||||
)
|
||||
# Add to sources and raw documents
|
||||
if source_object:
|
||||
all_sources.append(source_object)
|
||||
all_raw_documents.extend(discord_chunks)
|
||||
# Stream found document count
|
||||
if streaming_service and writer:
|
||||
streaming_service.only_update_terminal(f"🗨️ Found {len(discord_chunks)} Discord messages related to your query")
|
||||
writer({"yeild_value": streaming_service._format_annotations()})
|
||||
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
@ -15,6 +15,7 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio
|
|||
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
|
||||
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
|
||||
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
|
||||
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
|
||||
- TAVILY_API: "Tavily search API results" (personalized search results)
|
||||
- LINKUP_API: "Linkup search API results" (personalized search results)
|
||||
</knowledge_sources>
|
||||
|
|
|
@ -7,7 +7,7 @@ PUT /search-source-connectors/{connector_id} - Update a specific connector
|
|||
DELETE /search-source-connectors/{connector_id} - Delete a specific connector
|
||||
POST /search-source-connectors/{connector_id}/index - Index content from a connector to a search space
|
||||
|
||||
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR).
|
||||
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR, GITHUB_CONNECTOR, LINEAR_CONNECTOR, DISCORD_CONNECTOR).
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
@ -282,6 +282,7 @@ async def index_connector_content(
|
|||
- NOTION_CONNECTOR: Indexes pages from all accessible Notion pages
|
||||
- GITHUB_CONNECTOR: Indexes code and documentation from GitHub repositories
|
||||
- LINEAR_CONNECTOR: Indexes issues and comments from Linear
|
||||
- DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels
|
||||
|
||||
Args:
|
||||
connector_id: ID of the connector to use
|
||||
|
|
|
@ -81,6 +81,7 @@ class SearchSourceConnectorBase(BaseModel):
|
|||
repo_full_names = config.get("repo_full_names")
|
||||
if not isinstance(repo_full_names, list) or not repo_full_names:
|
||||
raise ValueError("repo_full_names must be a non-empty list of strings")
|
||||
|
||||
elif connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR:
|
||||
# For LINEAR_CONNECTOR, only allow LINEAR_API_KEY
|
||||
allowed_keys = ["LINEAR_API_KEY"]
|
||||
|
@ -90,6 +91,16 @@ class SearchSourceConnectorBase(BaseModel):
|
|||
# Ensure the token is not empty
|
||||
if not config.get("LINEAR_API_KEY"):
|
||||
raise ValueError("LINEAR_API_KEY cannot be empty")
|
||||
|
||||
elif connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
|
||||
# For DISCORD_CONNECTOR, only allow DISCORD_BOT_TOKEN
|
||||
allowed_keys = ["DISCORD_BOT_TOKEN"]
|
||||
if set(config.keys()) != set(allowed_keys):
|
||||
raise ValueError(f"For DISCORD_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
|
||||
|
||||
# Ensure the bot token is not empty
|
||||
if not config.get("DISCORD_BOT_TOKEN"):
|
||||
raise ValueError("DISCORD_BOT_TOKEN cannot be empty")
|
||||
|
||||
return config
|
||||
|
||||
|
|
|
@ -959,3 +959,96 @@ class ConnectorService:
|
|||
"type": "LINKUP_API",
|
||||
"sources": [],
|
||||
}, []
|
||||
|
||||
async def search_discord(self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20, search_mode: SearchMode = SearchMode.CHUNKS) -> tuple:
|
||||
"""
|
||||
Search for Discord messages and return both the source information and langchain documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
if search_mode == SearchMode.CHUNKS:
|
||||
discord_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR"
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
discord_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR"
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
discord_chunks = self._transform_document_results(discord_chunks)
|
||||
|
||||
# Early return if no results
|
||||
if not discord_chunks:
|
||||
return {
|
||||
"id": 11,
|
||||
"name": "Discord",
|
||||
"type": "DISCORD_CONNECTOR",
|
||||
"sources": [],
|
||||
}, []
|
||||
|
||||
# Process each chunk and create sources directly without deduplication
|
||||
sources_list = []
|
||||
async with self.counter_lock:
|
||||
for i, chunk in enumerate(discord_chunks):
|
||||
# Fix for UI
|
||||
discord_chunks[i]['document']['id'] = self.source_id_counter
|
||||
# Extract document metadata
|
||||
document = chunk.get('document', {})
|
||||
metadata = document.get('metadata', {})
|
||||
|
||||
# Create a mapped source entry with Discord-specific metadata
|
||||
channel_name = metadata.get('channel_name', 'Unknown Channel')
|
||||
channel_id = metadata.get('channel_id', '')
|
||||
message_date = metadata.get('start_date', '')
|
||||
|
||||
# Create a more descriptive title for Discord messages
|
||||
title = f"Discord: {channel_name}"
|
||||
if message_date:
|
||||
title += f" ({message_date})"
|
||||
|
||||
# Create a more descriptive description for Discord messages
|
||||
description = chunk.get('content', '')[:100]
|
||||
if len(description) == 100:
|
||||
description += "..."
|
||||
|
||||
# For URL, we can use a placeholder or construct a URL to the Discord channel if available
|
||||
url = ""
|
||||
if channel_id:
|
||||
url = f"https://discord.com/channels/@me/{channel_id}"
|
||||
|
||||
source = {
|
||||
"id": self.source_id_counter,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"url": url,
|
||||
}
|
||||
|
||||
self.source_id_counter += 1
|
||||
sources_list.append(source)
|
||||
|
||||
# Create result object
|
||||
result_object = {
|
||||
"id": 11,
|
||||
"name": "Discord",
|
||||
"type": "DISCORD_CONNECTOR",
|
||||
"sources": sources_list,
|
||||
}
|
||||
|
||||
return result_object, discord_chunks
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue