Biome: Merge upstream main to incldue confluence changes

This commit is contained in:
Utkarsh-Patel-13 2025-07-28 09:10:29 -07:00
commit 1148426009
17 changed files with 1431 additions and 8 deletions

View file

@ -1 +0,0 @@
{"c301dd3ad9b4036af1d031ecc966d2f02ae1eda4":{"files":{"surfsense_web/hooks/use-documents.ts":["J01fJFm4gXaHAA83Vu5dtOmk/sw=",true],"surfsense_web/components/chat/DocumentsDataTable.tsx":["wgAyJblucK9D3MKKwPe6W9kZphk=",true]},"modified":1753499058926}}

View file

@ -1 +0,0 @@
{"2d0ec64d93969318101ee479b664221b32241665":{"files":{"surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx":["EHKKvlOK0vfy0GgHwlG/J2Bx5rw=",true]},"modified":1753426633288}}

View file

@ -0,0 +1,61 @@
"""Add CONFLUENCE_CONNECTOR to enums
Revision ID: 14
Revises: 13
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "14"
down_revision: str | None = "13"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Safely add 'CONFLUENCE_CONNECTOR' to enum types if missing."""
# Add to searchsourceconnectortype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'CONFLUENCE_CONNECTOR'
) THEN
ALTER TYPE searchsourceconnectortype ADD VALUE 'CONFLUENCE_CONNECTOR';
END IF;
END
$$;
"""
)
# Add to documenttype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'documenttype' AND e.enumlabel = 'CONFLUENCE_CONNECTOR'
) THEN
ALTER TYPE documenttype ADD VALUE 'CONFLUENCE_CONNECTOR';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""
Downgrade logic not implemented since PostgreSQL
does not support removing enum values.
"""
pass

View file

@ -919,6 +919,32 @@ async def fetch_relevant_documents(
)
}
)
elif connector == "CONFLUENCE_CONNECTOR":
(
source_object,
confluence_chunks,
) = await connector_service.search_confluence(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
search_mode=search_mode,
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(confluence_chunks)
# Stream found document count
if streaming_service and writer:
writer(
{
"yield_value": streaming_service.format_terminal_info_delta(
f"📚 Found {len(confluence_chunks)} Confluence pages related to your query"
)
}
)
except Exception as e:
error_message = f"Error searching connector {connector}: {e!s}"

View file

@ -16,6 +16,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)

View file

@ -0,0 +1,354 @@
"""
Confluence Connector Module
A module for retrieving data from Confluence.
Allows fetching pages and their comments from specified spaces.
"""
import base64
from typing import Any
import requests
class ConfluenceConnector:
"""Class for retrieving data from Confluence."""
def __init__(
self,
base_url: str | None = None,
email: str | None = None,
api_token: str | None = None,
):
"""
Initialize the ConfluenceConnector class.
Args:
base_url: Confluence instance base URL (e.g., 'https://yourcompany.atlassian.net') (optional)
email: Confluence account email address (optional)
api_token: Confluence API token (optional)
"""
self.base_url = base_url.rstrip("/") if base_url else None
self.email = email
self.api_token = api_token
self.api_version = "v2" # Confluence Cloud API version
def set_credentials(self, base_url: str, email: str, api_token: str) -> None:
"""
Set the Confluence credentials.
Args:
base_url: Confluence instance base URL
email: Confluence account email address
api_token: Confluence API token
"""
self.base_url = base_url.rstrip("/")
self.email = email
self.api_token = api_token
def set_email(self, email: str) -> None:
"""
Set the Confluence account email.
Args:
email: Confluence account email address
"""
self.email = email
def set_api_token(self, api_token: str) -> None:
"""
Set the Confluence API token.
Args:
api_token: Confluence API token
"""
self.api_token = api_token
def get_headers(self) -> dict[str, str]:
"""
Get headers for Confluence API requests using Basic Authentication.
Returns:
Dictionary of headers
Raises:
ValueError: If email, api_token, or base_url have not been set
"""
if not all([self.base_url, self.email, self.api_token]):
raise ValueError(
"Confluence credentials not initialized. Call set_credentials() first."
)
# Create Basic Auth header using email:api_token
auth_str = f"{self.email}:{self.api_token}"
auth_bytes = auth_str.encode("utf-8")
auth_header = "Basic " + base64.b64encode(auth_bytes).decode("ascii")
return {
"Content-Type": "application/json",
"Authorization": auth_header,
"Accept": "application/json",
}
def make_api_request(
self, endpoint: str, params: dict[str, Any] | None = None
) -> dict[str, Any]:
"""
Make a request to the Confluence API.
Args:
endpoint: API endpoint (without base URL)
params: Query parameters for the request (optional)
Returns:
Response data from the API
Raises:
ValueError: If email, api_token, or base_url have not been set
Exception: If the API request fails
"""
if not all([self.base_url, self.email, self.api_token]):
raise ValueError(
"Confluence credentials not initialized. Call set_credentials() first."
)
url = f"{self.base_url}/wiki/api/{self.api_version}/{endpoint}"
headers = self.get_headers()
try:
response = requests.get(url, headers=headers, params=params, timeout=30)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"Confluence API request failed: {e!s}") from e
def get_all_spaces(self) -> list[dict[str, Any]]:
"""
Fetch all spaces from Confluence.
Returns:
List of space objects
Raises:
ValueError: If credentials have not been set
Exception: If the API request fails
"""
params = {
"limit": 100,
}
all_spaces = []
cursor = None
while True:
if cursor:
params["cursor"] = cursor
result = self.make_api_request("spaces", params)
if not isinstance(result, dict) or "results" not in result:
raise Exception("Invalid response from Confluence API")
spaces = result["results"]
all_spaces.extend(spaces)
# Check if there are more spaces to fetch
links = result.get("_links", {})
if "next" not in links:
break
# Extract cursor from next link if available
next_link = links["next"]
if "cursor=" in next_link:
cursor = next_link.split("cursor=")[1].split("&")[0]
else:
break
return all_spaces
def get_pages_in_space(
self, space_id: str, include_body: bool = True
) -> list[dict[str, Any]]:
"""
Fetch all pages in a specific space.
Args:
space_id: The ID of the space to fetch pages from
include_body: Whether to include page body content
Returns:
List of page objects
Raises:
ValueError: If credentials have not been set
Exception: If the API request fails
"""
params = {
"limit": 100,
}
if include_body:
params["body-format"] = "storage"
all_pages = []
cursor = None
while True:
if cursor:
params["cursor"] = cursor
result = self.make_api_request(f"spaces/{space_id}/pages", params)
if not isinstance(result, dict) or "results" not in result:
raise Exception("Invalid response from Confluence API")
pages = result["results"]
all_pages.extend(pages)
# Check if there are more pages to fetch
links = result.get("_links", {})
if "next" not in links:
break
# Extract cursor from next link if available
next_link = links["next"]
if "cursor=" in next_link:
cursor = next_link.split("cursor=")[1].split("&")[0]
else:
break
return all_pages
def get_page_comments(self, page_id: str) -> list[dict[str, Any]]:
"""
Fetch all comments for a specific page (both footer and inline comments).
Args:
page_id: The ID of the page to fetch comments from
Returns:
List of comment objects
Raises:
ValueError: If credentials have not been set
Exception: If the API request fails
"""
all_comments = []
# Get footer comments
footer_comments = self._get_comments_for_page(page_id, "footer-comments")
all_comments.extend(footer_comments)
# Get inline comments
inline_comments = self._get_comments_for_page(page_id, "inline-comments")
all_comments.extend(inline_comments)
return all_comments
def _get_comments_for_page(
self, page_id: str, comment_type: str
) -> list[dict[str, Any]]:
"""
Helper method to fetch comments of a specific type for a page.
Args:
page_id: The ID of the page
comment_type: Type of comments ('footer-comments' or 'inline-comments')
Returns:
List of comment objects
"""
params = {
"limit": 100,
"body-format": "storage",
}
all_comments = []
cursor = None
while True:
if cursor:
params["cursor"] = cursor
result = self.make_api_request(f"pages/{page_id}/{comment_type}", params)
if not isinstance(result, dict) or "results" not in result:
break # No comments or invalid response
comments = result["results"]
all_comments.extend(comments)
# Check if there are more comments to fetch
links = result.get("_links", {})
if "next" not in links:
break
# Extract cursor from next link if available
next_link = links["next"]
if "cursor=" in next_link:
cursor = next_link.split("cursor=")[1].split("&")[0]
else:
break
return all_comments
def get_pages_by_date_range(
self,
start_date: str,
end_date: str,
space_ids: list[str] | None = None,
include_comments: bool = True,
) -> tuple[list[dict[str, Any]], str | None]:
"""
Fetch pages within a date range, optionally filtered by spaces.
Args:
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format (inclusive)
space_ids: Optional list of space IDs to filter pages
include_comments: Whether to include comments for each page
Returns:
Tuple containing (pages list with comments, error message or None)
"""
try:
all_pages = []
if space_ids:
# Fetch pages from specific spaces
for space_id in space_ids:
pages = self.get_pages_in_space(space_id, include_body=True)
all_pages.extend(pages)
else:
# Fetch all pages (this might be expensive for large instances)
params = {
"limit": 100,
"body-format": "storage",
}
cursor = None
while True:
if cursor:
params["cursor"] = cursor
result = self.make_api_request("pages", params)
if not isinstance(result, dict) or "results" not in result:
break
pages = result["results"]
all_pages.extend(pages)
links = result.get("_links", {})
if "next" not in links:
break
next_link = links["next"]
if "cursor=" in next_link:
cursor = next_link.split("cursor=")[1].split("&")[0]
else:
break
return all_pages, None
except Exception as e:
return [], f"Error fetching pages: {e!s}"

View file

@ -43,6 +43,7 @@ class DocumentType(str, Enum):
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
class SearchSourceConnectorType(str, Enum):
@ -55,6 +56,7 @@ class SearchSourceConnectorType(str, Enum):
LINEAR_CONNECTOR = "LINEAR_CONNECTOR"
DISCORD_CONNECTOR = "DISCORD_CONNECTOR"
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
class ChatType(str, Enum):

View file

@ -36,6 +36,7 @@ from app.schemas import (
SearchSourceConnectorUpdate,
)
from app.tasks.connectors_indexing_tasks import (
index_confluence_pages,
index_discord_messages,
index_github_repos,
index_jira_issues,
@ -457,6 +458,21 @@ async def index_connector_content(
)
response_message = "Jira indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.CONFLUENCE_CONNECTOR:
# Run indexing in background
logger.info(
f"Triggering Confluence indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
background_tasks.add_task(
run_confluence_indexing_with_new_session,
connector_id,
search_space_id,
str(user.id),
indexing_from,
indexing_to,
)
response_message = "Confluence indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
# Run indexing in background
logger.info(
@ -884,3 +900,63 @@ async def run_jira_indexing(
exc_info=True,
)
# Optionally update status in DB to indicate failure
# Add new helper functions for Confluence indexing
async def run_confluence_indexing_with_new_session(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Wrapper to run Confluence indexing with its own database session."""
logger.info(
f"Background task started: Indexing Confluence connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session:
await run_confluence_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(
f"Background task finished: Indexing Confluence connector {connector_id}"
)
async def run_confluence_indexing(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Runs the Confluence indexing task and updates the timestamp."""
try:
indexed_count, error_message = await index_confluence_pages(
session,
connector_id,
search_space_id,
user_id,
start_date,
end_date,
update_last_indexed=False,
)
if error_message:
logger.error(
f"Confluence indexing failed for connector {connector_id}: {error_message}"
)
# Optionally update status in DB to indicate failure
else:
logger.info(
f"Confluence indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
)
# Update the last indexed timestamp only on success
await update_connector_last_indexed(session, connector_id)
await session.commit() # Commit timestamp update
except Exception as e:
logger.error(
f"Critical error in run_confluence_indexing for connector {connector_id}: {e}",
exc_info=True,
)
# Optionally update status in DB to indicate failure

View file

@ -143,6 +143,30 @@ class SearchSourceConnectorBase(BaseModel):
if not config.get("JIRA_BASE_URL"):
raise ValueError("JIRA_BASE_URL cannot be empty")
elif connector_type == SearchSourceConnectorType.CONFLUENCE_CONNECTOR:
# For CONFLUENCE_CONNECTOR, only allow specific keys
allowed_keys = [
"CONFLUENCE_BASE_URL",
"CONFLUENCE_EMAIL",
"CONFLUENCE_API_TOKEN",
]
if set(config.keys()) != set(allowed_keys):
raise ValueError(
f"For CONFLUENCE_CONNECTOR connector type, config must only contain these keys: {allowed_keys}"
)
# Ensure the email is not empty
if not config.get("CONFLUENCE_EMAIL"):
raise ValueError("CONFLUENCE_EMAIL cannot be empty")
# Ensure the API token is not empty
if not config.get("CONFLUENCE_API_TOKEN"):
raise ValueError("CONFLUENCE_API_TOKEN cannot be empty")
# Ensure the base URL is not empty
if not config.get("CONFLUENCE_BASE_URL"):
raise ValueError("CONFLUENCE_BASE_URL cannot be empty")
return config

View file

@ -1073,6 +1073,103 @@ class ConnectorService:
return result_object, jira_chunks
async def search_confluence(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
search_mode: SearchMode = SearchMode.CHUNKS,
) -> tuple:
"""
Search for Confluence pages and return both the source information and langchain documents
Args:
user_query: The user's query
user_id: The user's ID
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
search_mode: Search mode (CHUNKS or DOCUMENTS)
Returns:
tuple: (sources_info, langchain_documents)
"""
if search_mode == SearchMode.CHUNKS:
confluence_chunks = await self.chunk_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="CONFLUENCE_CONNECTOR",
)
elif search_mode == SearchMode.DOCUMENTS:
confluence_chunks = await self.document_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="CONFLUENCE_CONNECTOR",
)
# Transform document retriever results to match expected format
confluence_chunks = self._transform_document_results(confluence_chunks)
# Early return if no results
if not confluence_chunks:
return {
"id": 40,
"name": "Confluence",
"type": "CONFLUENCE_CONNECTOR",
"sources": [],
}, []
# Process each chunk and create sources directly without deduplication
sources_list = []
async with self.counter_lock:
for _i, chunk in enumerate(confluence_chunks):
# Extract document metadata
document = chunk.get("document", {})
metadata = document.get("metadata", {})
# Extract Confluence-specific metadata
page_title = metadata.get("page_title", "Untitled Page")
page_id = metadata.get("page_id", "")
space_key = metadata.get("space_key", "")
# Create a more descriptive title for Confluence pages
title = f"Confluence: {page_title}"
if space_key:
title += f" ({space_key})"
# Create a more descriptive description for Confluence pages
description = chunk.get("content", "")[:100]
if len(description) == 100:
description += "..."
# For URL, we can use a placeholder or construct a URL to the Confluence page if available
url = "" # TODO: Add base_url to metadata
if page_id:
url = f"{metadata.get('base_url')}/pages/{page_id}"
source = {
"id": document.get("id", self.source_id_counter),
"title": title,
"description": description,
"url": url,
}
self.source_id_counter += 1
sources_list.append(source)
# Create result object
result_object = {
"id": 40,
"name": "Confluence",
"type": "CONFLUENCE_CONNECTOR",
"sources": sources_list,
}
return result_object, confluence_chunks
async def search_linkup(
self, user_query: str, user_id: str, mode: str = "standard"
) -> tuple:

View file

@ -8,6 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.config import config
from app.connectors.confluence_connector import ConfluenceConnector
from app.connectors.discord_connector import DiscordConnector
from app.connectors.github_connector import GitHubConnector
from app.connectors.jira_connector import JiraConnector
@ -2329,3 +2330,371 @@ async def index_jira_issues(
)
logger.error(f"Failed to index JIRA issues: {e!s}", exc_info=True)
return 0, f"Failed to index JIRA issues: {e!s}"
async def index_confluence_pages(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str | None = None,
end_date: str | None = None,
update_last_indexed: bool = True,
) -> tuple[int, str | None]:
"""
Index Confluence pages and comments.
Args:
session: Database session
connector_id: ID of the Confluence connector
search_space_id: ID of the search space to store documents in
user_id: User ID
start_date: Start date for indexing (YYYY-MM-DD format)
end_date: End date for indexing (YYYY-MM-DD format)
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
Returns:
Tuple containing (number of documents indexed, error message or None)
"""
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="confluence_pages_indexing",
source="connector_indexing_task",
message=f"Starting Confluence pages indexing for connector {connector_id}",
metadata={
"connector_id": connector_id,
"user_id": str(user_id),
"start_date": start_date,
"end_date": end_date,
},
)
try:
# Get the connector from the database
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == connector_id,
SearchSourceConnector.connector_type
== SearchSourceConnectorType.CONFLUENCE_CONNECTOR,
)
)
connector = result.scalars().first()
if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found",
"Connector not found",
{"error_type": "ConnectorNotFound"},
)
return 0, f"Connector with ID {connector_id} not found"
# Get the Confluence credentials from the connector config
confluence_email = connector.config.get("CONFLUENCE_EMAIL")
confluence_api_token = connector.config.get("CONFLUENCE_API_TOKEN")
confluence_base_url = connector.config.get("CONFLUENCE_BASE_URL")
if not confluence_email or not confluence_api_token or not confluence_base_url:
await task_logger.log_task_failure(
log_entry,
f"Confluence credentials not found in connector config for connector {connector_id}",
"Missing Confluence credentials",
{"error_type": "MissingCredentials"},
)
return 0, "Confluence credentials not found in connector config"
# Initialize Confluence client
await task_logger.log_task_progress(
log_entry,
f"Initializing Confluence client for connector {connector_id}",
{"stage": "client_initialization"},
)
confluence_client = ConfluenceConnector(
base_url=confluence_base_url,
email=confluence_email,
api_token=confluence_api_token,
)
# Calculate date range
if start_date is None or end_date is None:
# Fall back to calculating dates based on last_indexed_at
calculated_end_date = datetime.now()
# Use last_indexed_at as start date if available, otherwise use 365 days ago
if connector.last_indexed_at:
# Convert dates to be comparable (both timezone-naive)
last_indexed_naive = (
connector.last_indexed_at.replace(tzinfo=None)
if connector.last_indexed_at.tzinfo
else connector.last_indexed_at
)
# Check if last_indexed_at is in the future or after end_date
if last_indexed_naive > calculated_end_date:
logger.warning(
f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
)
calculated_start_date = calculated_end_date - timedelta(days=365)
else:
calculated_start_date = last_indexed_naive
logger.info(
f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
)
else:
calculated_start_date = calculated_end_date - timedelta(
days=365
) # Use 365 days as default
logger.info(
f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
)
# Use calculated dates if not provided
start_date_str = (
start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
)
end_date_str = (
end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
)
else:
# Use provided dates
start_date_str = start_date
end_date_str = end_date
await task_logger.log_task_progress(
log_entry,
f"Fetching Confluence pages from {start_date_str} to {end_date_str}",
{
"stage": "fetching_pages",
"start_date": start_date_str,
"end_date": end_date_str,
},
)
# Get pages within date range
try:
pages, error = confluence_client.get_pages_by_date_range(
start_date=start_date_str, end_date=end_date_str, include_comments=True
)
if error:
logger.error(f"Failed to get Confluence pages: {error}")
# Don't treat "No pages found" as an error that should stop indexing
if "No pages found" in error:
logger.info(
"No pages found is not a critical error, continuing with update"
)
if update_last_indexed:
connector.last_indexed_at = datetime.now()
await session.commit()
logger.info(
f"Updated last_indexed_at to {connector.last_indexed_at} despite no pages found"
)
await task_logger.log_task_success(
log_entry,
f"No Confluence pages found in date range {start_date_str} to {end_date_str}",
{"pages_found": 0},
)
return 0, None
else:
await task_logger.log_task_failure(
log_entry,
f"Failed to get Confluence pages: {error}",
"API Error",
{"error_type": "APIError"},
)
return 0, f"Failed to get Confluence pages: {error}"
logger.info(f"Retrieved {len(pages)} pages from Confluence API")
except Exception as e:
logger.error(f"Error fetching Confluence pages: {e!s}", exc_info=True)
return 0, f"Error fetching Confluence pages: {e!s}"
# Process and index each page
documents_indexed = 0
skipped_pages = []
documents_skipped = 0
for page in pages:
try:
page_id = page.get("id")
page_title = page.get("title", "")
space_id = page.get("spaceId", "")
if not page_id or not page_title:
logger.warning(
f"Skipping page with missing ID or title: {page_id or 'Unknown'}"
)
skipped_pages.append(f"{page_title or 'Unknown'} (missing data)")
documents_skipped += 1
continue
# Extract page content
page_content = ""
if page.get("body") and page["body"].get("storage"):
page_content = page["body"]["storage"].get("value", "")
# Add comments to content
comments = page.get("comments", [])
comments_content = ""
if comments:
comments_content = "\n\n## Comments\n\n"
for comment in comments:
comment_body = ""
if comment.get("body") and comment["body"].get("storage"):
comment_body = comment["body"]["storage"].get("value", "")
comment_author = comment.get("version", {}).get(
"authorId", "Unknown"
)
comment_date = comment.get("version", {}).get("createdAt", "")
comments_content += f"**Comment by {comment_author}** ({comment_date}):\n{comment_body}\n\n"
# Combine page content with comments
full_content = f"# {page_title}\n\n{page_content}{comments_content}"
if not full_content.strip():
logger.warning(f"Skipping page with no content: {page_title}")
skipped_pages.append(f"{page_title} (no content)")
documents_skipped += 1
continue
# Create a simple summary
summary_content = (
f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
)
if page_content:
# Take first 500 characters of content for summary
content_preview = page_content[:500]
if len(page_content) > 500:
content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n"
# Add comment count
comment_count = len(comments)
summary_content += f"Comments: {comment_count}"
# Generate content hash
content_hash = generate_content_hash(full_content, search_space_id)
# Check if document already exists
existing_doc_by_hash_result = await session.execute(
select(Document).where(Document.content_hash == content_hash)
)
existing_document_by_hash = (
existing_doc_by_hash_result.scalars().first()
)
if existing_document_by_hash:
logger.info(
f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing."
)
documents_skipped += 1
continue
# Generate embedding for the summary
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks - using the full page content with comments
chunks = [
Chunk(
content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text),
)
for chunk in config.chunker_instance.chunk(full_content)
]
# Create and store new document
logger.info(f"Creating new document for page {page_title}")
document = Document(
search_space_id=search_space_id,
title=f"Confluence - {page_title}",
document_type=DocumentType.CONFLUENCE_CONNECTOR,
document_metadata={
"page_id": page_id,
"page_title": page_title,
"space_id": space_id,
"comment_count": comment_count,
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
},
content=summary_content,
content_hash=content_hash,
embedding=summary_embedding,
chunks=chunks,
)
session.add(document)
documents_indexed += 1
logger.info(f"Successfully indexed new page {page_title}")
except Exception as e:
logger.error(
f"Error processing page {page.get('title', 'Unknown')}: {e!s}",
exc_info=True,
)
skipped_pages.append(
f"{page.get('title', 'Unknown')} (processing error)"
)
documents_skipped += 1
continue # Skip this page and continue with others
# Update the last_indexed_at timestamp for the connector only if requested
total_processed = documents_indexed
if update_last_indexed:
connector.last_indexed_at = datetime.now()
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
# Commit all changes
await session.commit()
logger.info(
"Successfully committed all Confluence document changes to database"
)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Confluence indexing for connector {connector_id}",
{
"pages_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_pages_count": len(skipped_pages),
},
)
logger.info(
f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
)
return (
total_processed,
None,
) # Return None as the error message to indicate success
except SQLAlchemyError as db_error:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Confluence indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"},
)
logger.error(f"Database error: {db_error!s}", exc_info=True)
return 0, f"Database error: {db_error!s}"
except Exception as e:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Confluence pages for connector {connector_id}",
str(e),
{"error_type": type(e).__name__},
)
logger.error(f"Failed to index Confluence pages: {e!s}", exc_info=True)
return 0, f"Failed to index Confluence pages: {e!s}"

View file

@ -201,6 +201,33 @@ export default function EditConnectorPage() {
</div>
)}
{/* == Confluence == */}
{connector.connector_type === "CONFLUENCE_CONNECTOR" && (
<div className="space-y-4">
<EditSimpleTokenForm
control={editForm.control}
fieldName="CONFLUENCE_BASE_URL"
fieldLabel="Confluence Base URL"
fieldDescription="Update your Confluence instance URL if needed."
placeholder="https://yourcompany.atlassian.net"
/>
<EditSimpleTokenForm
control={editForm.control}
fieldName="CONFLUENCE_EMAIL"
fieldLabel="Confluence Email"
fieldDescription="Update your Atlassian account email if needed."
placeholder="your.email@company.com"
/>
<EditSimpleTokenForm
control={editForm.control}
fieldName="CONFLUENCE_API_TOKEN"
fieldLabel="Confluence API Token"
fieldDescription="Update your Confluence API Token if needed."
placeholder="Your Confluence API Token"
/>
</div>
)}
{/* == Linkup == */}
{connector.connector_type === "LINKUP_API" && (
<EditSimpleTokenForm

View file

@ -0,0 +1,297 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { motion } from "framer-motion";
import { ArrowLeft, Check, Info, Loader2 } from "lucide-react";
import { useParams, useRouter } from "next/navigation";
import { useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import * as z from "zod";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import {
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors";
// Define the form schema with Zod
const confluenceConnectorFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
base_url: z
.string()
.url({
message: "Please enter a valid Confluence URL (e.g., https://yourcompany.atlassian.net)",
})
.refine(
(url) => {
return url.includes("atlassian.net") || url.includes("confluence");
},
{
message: "Please enter a valid Confluence instance URL",
}
),
email: z.string().email({
message: "Please enter a valid email address.",
}),
api_token: z.string().min(10, {
message: "Confluence API Token is required and must be valid.",
}),
});
// Define the type for the form values
type ConfluenceConnectorFormValues = z.infer<typeof confluenceConnectorFormSchema>;
export default function ConfluenceConnectorPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors();
// Initialize the form
const form = useForm<ConfluenceConnectorFormValues>({
resolver: zodResolver(confluenceConnectorFormSchema),
defaultValues: {
name: "Confluence Connector",
base_url: "",
email: "",
api_token: "",
},
});
// Handle form submission
const onSubmit = async (values: ConfluenceConnectorFormValues) => {
setIsSubmitting(true);
try {
await createConnector({
name: values.name,
connector_type: "CONFLUENCE_CONNECTOR",
config: {
CONFLUENCE_BASE_URL: values.base_url,
CONFLUENCE_EMAIL: values.email,
CONFLUENCE_API_TOKEN: values.api_token,
},
is_indexable: true,
last_indexed_at: null,
});
toast.success("Confluence connector created successfully!");
// Navigate back to connectors page
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) {
console.error("Error creating connector:", error);
toast.error(error instanceof Error ? error.message : "Failed to create connector");
} finally {
setIsSubmitting(false);
}
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Connectors
</Button>
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
>
<Tabs defaultValue="connect" className="w-full">
<TabsList className="grid w-full grid-cols-2 mb-6">
<TabsTrigger value="connect">Connect</TabsTrigger>
<TabsTrigger value="documentation">Documentation</TabsTrigger>
</TabsList>
<TabsContent value="connect">
<Card>
<CardHeader>
<CardTitle>Connect to Confluence</CardTitle>
<CardDescription>
Connect your Confluence instance to index pages and comments from your spaces.
</CardDescription>
</CardHeader>
<CardContent className="space-y-6">
<Alert>
<Info className="h-4 w-4" />
<AlertDescription>
You'll need to create an API token from your{" "}
<a
href="https://id.atlassian.com/manage-profile/security/api-tokens"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4"
>
Atlassian Account Settings
</a>
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My Confluence Connector" {...field} />
</FormControl>
<FormDescription>
A friendly name to identify this connector.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="base_url"
render={({ field }) => (
<FormItem>
<FormLabel>Confluence Instance URL</FormLabel>
<FormControl>
<Input placeholder="https://yourcompany.atlassian.net" {...field} />
</FormControl>
<FormDescription>
Your Confluence instance URL. For Atlassian Cloud, this is typically
https://yourcompany.atlassian.net
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="email"
render={({ field }) => (
<FormItem>
<FormLabel>Email Address</FormLabel>
<FormControl>
<Input type="email" placeholder="your.email@company.com" {...field} />
</FormControl>
<FormDescription>Your Atlassian account email address.</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="api_token"
render={({ field }) => (
<FormItem>
<FormLabel>API Token</FormLabel>
<FormControl>
<Input
type="password"
placeholder="Your Confluence API Token"
{...field}
/>
</FormControl>
<FormDescription>
Your Confluence API Token will be encrypted and stored securely.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<div className="flex justify-end">
<Button type="submit" disabled={isSubmitting} className="w-full sm:w-auto">
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect Confluence
</>
)}
</Button>
</div>
</form>
</Form>
</CardContent>
</Card>
</TabsContent>
<TabsContent value="documentation">
<Card>
<CardHeader>
<CardTitle>Confluence Integration Guide</CardTitle>
<CardDescription>
Learn how to set up and use the Confluence connector.
</CardDescription>
</CardHeader>
<CardContent className="space-y-6">
<div>
<h3 className="text-lg font-semibold mb-3">What gets indexed?</h3>
<ul className="list-disc list-inside space-y-2 text-sm text-muted-foreground">
<li>All pages from accessible spaces</li>
<li>Page content and metadata</li>
<li>Comments on pages (both footer and inline comments)</li>
<li>Page titles and descriptions</li>
</ul>
</div>
<div>
<h3 className="text-lg font-semibold mb-3">Setup Instructions</h3>
<ol className="list-decimal list-inside space-y-2 text-sm text-muted-foreground">
<li>Go to your Atlassian Account Settings</li>
<li>Navigate to Security API tokens</li>
<li>Create a new API token with appropriate permissions</li>
<li>Copy the token and paste it in the form above</li>
<li>Ensure your account has read access to the spaces you want to index</li>
</ol>
</div>
<div>
<h3 className="text-lg font-semibold mb-3">Permissions Required</h3>
<ul className="list-disc list-inside space-y-2 text-sm text-muted-foreground">
<li>Read access to Confluence spaces</li>
<li>View pages and comments</li>
<li>Access to space metadata</li>
</ul>
</div>
<Alert>
<Info className="h-4 w-4" />
<AlertDescription>
The connector will only index content that your account has permission to view.
Make sure your API token has the necessary permissions for the spaces you want
to index.
</AlertDescription>
</Alert>
</CardContent>
</Card>
</TabsContent>
</Tabs>
</motion.div>
</div>
);
}

View file

@ -1,5 +1,7 @@
"use client";
import {
IconBook,
IconBrandDiscord,
IconBrandGithub,
IconBrandNotion,
@ -14,7 +16,7 @@ import {
IconTicket,
IconWorldWww,
} from "@tabler/icons-react";
import { AnimatePresence, motion } from "framer-motion";
import { AnimatePresence, motion, type Variants } from "framer-motion";
import Link from "next/link";
import { useParams } from "next/navigation";
import { useState } from "react";
@ -125,6 +127,13 @@ const connectorCategories: ConnectorCategory[] = [
icon: <IconBrandGithub className="h-6 w-6" />,
status: "available",
},
{
id: "confluence-connector",
title: "Confluence",
description: "Connect to Confluence to search pages, comments and documentation.",
icon: <IconBook className="h-6 w-6" />,
status: "available",
},
],
},
{
@ -165,7 +174,7 @@ const staggerContainer = {
},
};
const cardVariants = {
const cardVariants: Variants = {
hidden: { opacity: 0, y: 20 },
visible: {
opacity: 1,

View file

@ -1,6 +1,7 @@
"use client";
import {
IconBook,
IconBrandDiscord,
IconBrandGithub,
IconBrandNotion,
@ -144,6 +145,7 @@ const documentTypeIcons = {
LINEAR_CONNECTOR: IconLayoutKanban,
JIRA_CONNECTOR: IconTicket,
DISCORD_CONNECTOR: IconBrandDiscord,
CONFLUENCE_CONNECTOR: IconBook,
} as const;
const columns: ColumnDef<Document>[] = [
@ -964,7 +966,13 @@ export default function DocumentsTable() {
function RowActions({ row }: { row: Row<Document> }) {
const [isOpen, setIsOpen] = useState(false);
const [isDeleting, setIsDeleting] = useState(false);
const { deleteDocument, refreshDocuments } = useContext(DocumentsContext)!;
const context = useContext(DocumentsContext);
if (!context) {
throw new Error("DocumentsContext not found");
}
const { deleteDocument, refreshDocuments } = context;
const document = row.original;
const handleDelete = async () => {

View file

@ -33,5 +33,11 @@ export const editConnectorSchema = z.object({
LINEAR_API_KEY: z.string().optional(),
LINKUP_API_KEY: z.string().optional(),
DISCORD_BOT_TOKEN: z.string().optional(),
CONFLUENCE_BASE_URL: z.string().optional(),
CONFLUENCE_EMAIL: z.string().optional(),
CONFLUENCE_API_TOKEN: z.string().optional(),
JIRA_BASE_URL: z.string().optional(),
JIRA_EMAIL: z.string().optional(),
JIRA_API_TOKEN: z.string().optional(),
});
export type EditConnectorFormValues = z.infer<typeof editConnectorSchema>;

View file

@ -46,6 +46,12 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
TAVILY_API_KEY: "",
LINEAR_API_KEY: "",
DISCORD_BOT_TOKEN: "",
CONFLUENCE_BASE_URL: "",
CONFLUENCE_EMAIL: "",
CONFLUENCE_API_TOKEN: "",
JIRA_BASE_URL: "",
JIRA_EMAIL: "",
JIRA_API_TOKEN: "",
},
});
@ -66,6 +72,12 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
LINEAR_API_KEY: config.LINEAR_API_KEY || "",
LINKUP_API_KEY: config.LINKUP_API_KEY || "",
DISCORD_BOT_TOKEN: config.DISCORD_BOT_TOKEN || "",
CONFLUENCE_BASE_URL: config.CONFLUENCE_BASE_URL || "",
CONFLUENCE_EMAIL: config.CONFLUENCE_EMAIL || "",
CONFLUENCE_API_TOKEN: config.CONFLUENCE_API_TOKEN || "",
JIRA_BASE_URL: config.JIRA_BASE_URL || "",
JIRA_EMAIL: config.JIRA_EMAIL || "",
JIRA_API_TOKEN: config.JIRA_API_TOKEN || "",
});
if (currentConnector.connector_type === "GITHUB_CONNECTOR") {
const savedRepos = config.repo_full_names || [];
@ -104,7 +116,10 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/github/repositories/`,
{
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${token}` },
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({ github_pat: values.github_pat }),
}
);
@ -165,7 +180,10 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
setIsSaving(false);
return;
}
newConfig = { GITHUB_PAT: currentPatInForm, repo_full_names: newSelectedRepos };
newConfig = {
GITHUB_PAT: currentPatInForm,
repo_full_names: newSelectedRepos,
};
if (reposChanged && newSelectedRepos.length === 0) {
toast.warning("Warning: No repositories selected.");
}
@ -189,7 +207,9 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
setIsSaving(false);
return;
}
newConfig = { NOTION_INTEGRATION_TOKEN: formData.NOTION_INTEGRATION_TOKEN };
newConfig = {
NOTION_INTEGRATION_TOKEN: formData.NOTION_INTEGRATION_TOKEN,
};
}
break;
case "SERPER_API":
@ -243,6 +263,46 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
newConfig = { DISCORD_BOT_TOKEN: formData.DISCORD_BOT_TOKEN };
}
break;
case "CONFLUENCE_CONNECTOR":
if (
formData.CONFLUENCE_BASE_URL !== originalConfig.CONFLUENCE_BASE_URL ||
formData.CONFLUENCE_EMAIL !== originalConfig.CONFLUENCE_EMAIL ||
formData.CONFLUENCE_API_TOKEN !== originalConfig.CONFLUENCE_API_TOKEN
) {
if (
!formData.CONFLUENCE_BASE_URL ||
!formData.CONFLUENCE_EMAIL ||
!formData.CONFLUENCE_API_TOKEN
) {
toast.error("All Confluence fields are required.");
setIsSaving(false);
return;
}
newConfig = {
CONFLUENCE_BASE_URL: formData.CONFLUENCE_BASE_URL,
CONFLUENCE_EMAIL: formData.CONFLUENCE_EMAIL,
CONFLUENCE_API_TOKEN: formData.CONFLUENCE_API_TOKEN,
};
}
break;
case "JIRA_CONNECTOR":
if (
formData.JIRA_BASE_URL !== originalConfig.JIRA_BASE_URL ||
formData.JIRA_EMAIL !== originalConfig.JIRA_EMAIL ||
formData.JIRA_API_TOKEN !== originalConfig.JIRA_API_TOKEN
) {
if (!formData.JIRA_BASE_URL || !formData.JIRA_EMAIL || !formData.JIRA_API_TOKEN) {
toast.error("All Jira fields are required.");
setIsSaving(false);
return;
}
newConfig = {
JIRA_BASE_URL: formData.JIRA_BASE_URL,
JIRA_EMAIL: formData.JIRA_EMAIL,
JIRA_API_TOKEN: formData.JIRA_API_TOKEN,
};
}
break;
}
if (newConfig !== null) {
@ -297,6 +357,14 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
editForm.setValue("LINKUP_API_KEY", newlySavedConfig.LINKUP_API_KEY || "");
} else if (connector.connector_type === "DISCORD_CONNECTOR") {
editForm.setValue("DISCORD_BOT_TOKEN", newlySavedConfig.DISCORD_BOT_TOKEN || "");
} else if (connector.connector_type === "CONFLUENCE_CONNECTOR") {
editForm.setValue("CONFLUENCE_BASE_URL", newlySavedConfig.CONFLUENCE_BASE_URL || "");
editForm.setValue("CONFLUENCE_EMAIL", newlySavedConfig.CONFLUENCE_EMAIL || "");
editForm.setValue("CONFLUENCE_API_TOKEN", newlySavedConfig.CONFLUENCE_API_TOKEN || "");
} else if (connector.connector_type === "JIRA_CONNECTOR") {
editForm.setValue("JIRA_BASE_URL", newlySavedConfig.JIRA_BASE_URL || "");
editForm.setValue("JIRA_EMAIL", newlySavedConfig.JIRA_EMAIL || "");
editForm.setValue("JIRA_API_TOKEN", newlySavedConfig.JIRA_API_TOKEN || "");
}
}
if (connector.connector_type === "GITHUB_CONNECTOR") {