diff --git a/node_modules/.cache/prettier/.prettier-caches/37bd945444dc76999f7aface662ff267baf1dbca.json b/node_modules/.cache/prettier/.prettier-caches/37bd945444dc76999f7aface662ff267baf1dbca.json deleted file mode 100644 index e94f98f..0000000 --- a/node_modules/.cache/prettier/.prettier-caches/37bd945444dc76999f7aface662ff267baf1dbca.json +++ /dev/null @@ -1 +0,0 @@ -{"c301dd3ad9b4036af1d031ecc966d2f02ae1eda4":{"files":{"surfsense_web/hooks/use-documents.ts":["J01fJFm4gXaHAA83Vu5dtOmk/sw=",true],"surfsense_web/components/chat/DocumentsDataTable.tsx":["wgAyJblucK9D3MKKwPe6W9kZphk=",true]},"modified":1753499058926}} \ No newline at end of file diff --git a/node_modules/.cache/prettier/.prettier-caches/a2ecb2962bf19c1099cfe708e42daa0097f94976.json b/node_modules/.cache/prettier/.prettier-caches/a2ecb2962bf19c1099cfe708e42daa0097f94976.json deleted file mode 100644 index e744e3a..0000000 --- a/node_modules/.cache/prettier/.prettier-caches/a2ecb2962bf19c1099cfe708e42daa0097f94976.json +++ /dev/null @@ -1 +0,0 @@ -{"2d0ec64d93969318101ee479b664221b32241665":{"files":{"surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx":["EHKKvlOK0vfy0GgHwlG/J2Bx5rw=",true]},"modified":1753426633288}} \ No newline at end of file diff --git a/surfsense_backend/alembic/versions/14_add_confluence_connector_enums.py b/surfsense_backend/alembic/versions/14_add_confluence_connector_enums.py new file mode 100644 index 0000000..f4364b0 --- /dev/null +++ b/surfsense_backend/alembic/versions/14_add_confluence_connector_enums.py @@ -0,0 +1,61 @@ +"""Add CONFLUENCE_CONNECTOR to enums + +Revision ID: 14 +Revises: 13 +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "14" +down_revision: str | None = "13" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Safely add 'CONFLUENCE_CONNECTOR' to enum types if missing.""" + + # Add to searchsourceconnectortype enum + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'CONFLUENCE_CONNECTOR' + ) THEN + ALTER TYPE searchsourceconnectortype ADD VALUE 'CONFLUENCE_CONNECTOR'; + END IF; + END + $$; + """ + ) + + # Add to documenttype enum + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'CONFLUENCE_CONNECTOR' + ) THEN + ALTER TYPE documenttype ADD VALUE 'CONFLUENCE_CONNECTOR'; + END IF; + END + $$; + """ + ) + + +def downgrade() -> None: + """ + Downgrade logic not implemented since PostgreSQL + does not support removing enum values. + """ + pass diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 68a81bb..6f036b7 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -919,6 +919,32 @@ async def fetch_relevant_documents( ) } ) + elif connector == "CONFLUENCE_CONNECTOR": + ( + source_object, + confluence_chunks, + ) = await connector_service.search_confluence( + user_query=reformulated_query, + user_id=user_id, + search_space_id=search_space_id, + top_k=top_k, + search_mode=search_mode, + ) + + # Add to sources and raw documents + if source_object: + all_sources.append(source_object) + all_raw_documents.extend(confluence_chunks) + + # Stream found document count + if streaming_service and writer: + writer( + { + "yield_value": streaming_service.format_terminal_info_delta( + f"📚 Found {len(confluence_chunks)} Confluence pages related to your query" + ) + } + ) except Exception as e: error_message = f"Error searching connector {connector}: {e!s}" diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index eed8e55..bcd799c 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -16,6 +16,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) +- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications) - TAVILY_API: "Tavily search API results" (personalized search results) - LINKUP_API: "Linkup search API results" (personalized search results) diff --git a/surfsense_backend/app/connectors/confluence_connector.py b/surfsense_backend/app/connectors/confluence_connector.py new file mode 100644 index 0000000..23b81e0 --- /dev/null +++ b/surfsense_backend/app/connectors/confluence_connector.py @@ -0,0 +1,354 @@ +""" +Confluence Connector Module + +A module for retrieving data from Confluence. +Allows fetching pages and their comments from specified spaces. +""" + +import base64 +from typing import Any + +import requests + + +class ConfluenceConnector: + """Class for retrieving data from Confluence.""" + + def __init__( + self, + base_url: str | None = None, + email: str | None = None, + api_token: str | None = None, + ): + """ + Initialize the ConfluenceConnector class. + + Args: + base_url: Confluence instance base URL (e.g., 'https://yourcompany.atlassian.net') (optional) + email: Confluence account email address (optional) + api_token: Confluence API token (optional) + """ + self.base_url = base_url.rstrip("/") if base_url else None + self.email = email + self.api_token = api_token + self.api_version = "v2" # Confluence Cloud API version + + def set_credentials(self, base_url: str, email: str, api_token: str) -> None: + """ + Set the Confluence credentials. + + Args: + base_url: Confluence instance base URL + email: Confluence account email address + api_token: Confluence API token + """ + self.base_url = base_url.rstrip("/") + self.email = email + self.api_token = api_token + + def set_email(self, email: str) -> None: + """ + Set the Confluence account email. + + Args: + email: Confluence account email address + """ + self.email = email + + def set_api_token(self, api_token: str) -> None: + """ + Set the Confluence API token. + + Args: + api_token: Confluence API token + """ + self.api_token = api_token + + def get_headers(self) -> dict[str, str]: + """ + Get headers for Confluence API requests using Basic Authentication. + + Returns: + Dictionary of headers + + Raises: + ValueError: If email, api_token, or base_url have not been set + """ + if not all([self.base_url, self.email, self.api_token]): + raise ValueError( + "Confluence credentials not initialized. Call set_credentials() first." + ) + + # Create Basic Auth header using email:api_token + auth_str = f"{self.email}:{self.api_token}" + auth_bytes = auth_str.encode("utf-8") + auth_header = "Basic " + base64.b64encode(auth_bytes).decode("ascii") + + return { + "Content-Type": "application/json", + "Authorization": auth_header, + "Accept": "application/json", + } + + def make_api_request( + self, endpoint: str, params: dict[str, Any] | None = None + ) -> dict[str, Any]: + """ + Make a request to the Confluence API. + + Args: + endpoint: API endpoint (without base URL) + params: Query parameters for the request (optional) + + Returns: + Response data from the API + + Raises: + ValueError: If email, api_token, or base_url have not been set + Exception: If the API request fails + """ + if not all([self.base_url, self.email, self.api_token]): + raise ValueError( + "Confluence credentials not initialized. Call set_credentials() first." + ) + + url = f"{self.base_url}/wiki/api/{self.api_version}/{endpoint}" + headers = self.get_headers() + + try: + response = requests.get(url, headers=headers, params=params, timeout=30) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + raise Exception(f"Confluence API request failed: {e!s}") from e + + def get_all_spaces(self) -> list[dict[str, Any]]: + """ + Fetch all spaces from Confluence. + + Returns: + List of space objects + + Raises: + ValueError: If credentials have not been set + Exception: If the API request fails + """ + params = { + "limit": 100, + } + + all_spaces = [] + cursor = None + + while True: + if cursor: + params["cursor"] = cursor + + result = self.make_api_request("spaces", params) + + if not isinstance(result, dict) or "results" not in result: + raise Exception("Invalid response from Confluence API") + + spaces = result["results"] + all_spaces.extend(spaces) + + # Check if there are more spaces to fetch + links = result.get("_links", {}) + if "next" not in links: + break + + # Extract cursor from next link if available + next_link = links["next"] + if "cursor=" in next_link: + cursor = next_link.split("cursor=")[1].split("&")[0] + else: + break + + return all_spaces + + def get_pages_in_space( + self, space_id: str, include_body: bool = True + ) -> list[dict[str, Any]]: + """ + Fetch all pages in a specific space. + + Args: + space_id: The ID of the space to fetch pages from + include_body: Whether to include page body content + + Returns: + List of page objects + + Raises: + ValueError: If credentials have not been set + Exception: If the API request fails + """ + params = { + "limit": 100, + } + + if include_body: + params["body-format"] = "storage" + + all_pages = [] + cursor = None + + while True: + if cursor: + params["cursor"] = cursor + + result = self.make_api_request(f"spaces/{space_id}/pages", params) + + if not isinstance(result, dict) or "results" not in result: + raise Exception("Invalid response from Confluence API") + + pages = result["results"] + all_pages.extend(pages) + + # Check if there are more pages to fetch + links = result.get("_links", {}) + if "next" not in links: + break + + # Extract cursor from next link if available + next_link = links["next"] + if "cursor=" in next_link: + cursor = next_link.split("cursor=")[1].split("&")[0] + else: + break + + return all_pages + + def get_page_comments(self, page_id: str) -> list[dict[str, Any]]: + """ + Fetch all comments for a specific page (both footer and inline comments). + + Args: + page_id: The ID of the page to fetch comments from + + Returns: + List of comment objects + + Raises: + ValueError: If credentials have not been set + Exception: If the API request fails + """ + all_comments = [] + + # Get footer comments + footer_comments = self._get_comments_for_page(page_id, "footer-comments") + all_comments.extend(footer_comments) + + # Get inline comments + inline_comments = self._get_comments_for_page(page_id, "inline-comments") + all_comments.extend(inline_comments) + + return all_comments + + def _get_comments_for_page( + self, page_id: str, comment_type: str + ) -> list[dict[str, Any]]: + """ + Helper method to fetch comments of a specific type for a page. + + Args: + page_id: The ID of the page + comment_type: Type of comments ('footer-comments' or 'inline-comments') + + Returns: + List of comment objects + """ + params = { + "limit": 100, + "body-format": "storage", + } + + all_comments = [] + cursor = None + + while True: + if cursor: + params["cursor"] = cursor + + result = self.make_api_request(f"pages/{page_id}/{comment_type}", params) + + if not isinstance(result, dict) or "results" not in result: + break # No comments or invalid response + + comments = result["results"] + all_comments.extend(comments) + + # Check if there are more comments to fetch + links = result.get("_links", {}) + if "next" not in links: + break + + # Extract cursor from next link if available + next_link = links["next"] + if "cursor=" in next_link: + cursor = next_link.split("cursor=")[1].split("&")[0] + else: + break + + return all_comments + + def get_pages_by_date_range( + self, + start_date: str, + end_date: str, + space_ids: list[str] | None = None, + include_comments: bool = True, + ) -> tuple[list[dict[str, Any]], str | None]: + """ + Fetch pages within a date range, optionally filtered by spaces. + + Args: + start_date: Start date in YYYY-MM-DD format + end_date: End date in YYYY-MM-DD format (inclusive) + space_ids: Optional list of space IDs to filter pages + include_comments: Whether to include comments for each page + + Returns: + Tuple containing (pages list with comments, error message or None) + """ + try: + all_pages = [] + + if space_ids: + # Fetch pages from specific spaces + for space_id in space_ids: + pages = self.get_pages_in_space(space_id, include_body=True) + all_pages.extend(pages) + else: + # Fetch all pages (this might be expensive for large instances) + params = { + "limit": 100, + "body-format": "storage", + } + + cursor = None + while True: + if cursor: + params["cursor"] = cursor + + result = self.make_api_request("pages", params) + if not isinstance(result, dict) or "results" not in result: + break + + pages = result["results"] + all_pages.extend(pages) + + links = result.get("_links", {}) + if "next" not in links: + break + + next_link = links["next"] + if "cursor=" in next_link: + cursor = next_link.split("cursor=")[1].split("&")[0] + else: + break + + return all_pages, None + + except Exception as e: + return [], f"Error fetching pages: {e!s}" diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 1a7aa57..b067752 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -43,6 +43,7 @@ class DocumentType(str, Enum): LINEAR_CONNECTOR = "LINEAR_CONNECTOR" DISCORD_CONNECTOR = "DISCORD_CONNECTOR" JIRA_CONNECTOR = "JIRA_CONNECTOR" + CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR" class SearchSourceConnectorType(str, Enum): @@ -55,6 +56,7 @@ class SearchSourceConnectorType(str, Enum): LINEAR_CONNECTOR = "LINEAR_CONNECTOR" DISCORD_CONNECTOR = "DISCORD_CONNECTOR" JIRA_CONNECTOR = "JIRA_CONNECTOR" + CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR" class ChatType(str, Enum): diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 4c3d691..6a10910 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -36,6 +36,7 @@ from app.schemas import ( SearchSourceConnectorUpdate, ) from app.tasks.connectors_indexing_tasks import ( + index_confluence_pages, index_discord_messages, index_github_repos, index_jira_issues, @@ -457,6 +458,21 @@ async def index_connector_content( ) response_message = "Jira indexing started in the background." + elif connector.connector_type == SearchSourceConnectorType.CONFLUENCE_CONNECTOR: + # Run indexing in background + logger.info( + f"Triggering Confluence indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" + ) + background_tasks.add_task( + run_confluence_indexing_with_new_session, + connector_id, + search_space_id, + str(user.id), + indexing_from, + indexing_to, + ) + response_message = "Confluence indexing started in the background." + elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR: # Run indexing in background logger.info( @@ -884,3 +900,63 @@ async def run_jira_indexing( exc_info=True, ) # Optionally update status in DB to indicate failure + + +# Add new helper functions for Confluence indexing +async def run_confluence_indexing_with_new_session( + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Wrapper to run Confluence indexing with its own database session.""" + logger.info( + f"Background task started: Indexing Confluence connector {connector_id} into space {search_space_id} from {start_date} to {end_date}" + ) + async with async_session_maker() as session: + await run_confluence_indexing( + session, connector_id, search_space_id, user_id, start_date, end_date + ) + logger.info( + f"Background task finished: Indexing Confluence connector {connector_id}" + ) + + +async def run_confluence_indexing( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Runs the Confluence indexing task and updates the timestamp.""" + try: + indexed_count, error_message = await index_confluence_pages( + session, + connector_id, + search_space_id, + user_id, + start_date, + end_date, + update_last_indexed=False, + ) + if error_message: + logger.error( + f"Confluence indexing failed for connector {connector_id}: {error_message}" + ) + # Optionally update status in DB to indicate failure + else: + logger.info( + f"Confluence indexing successful for connector {connector_id}. Indexed {indexed_count} documents." + ) + # Update the last indexed timestamp only on success + await update_connector_last_indexed(session, connector_id) + await session.commit() # Commit timestamp update + except Exception as e: + logger.error( + f"Critical error in run_confluence_indexing for connector {connector_id}: {e}", + exc_info=True, + ) + # Optionally update status in DB to indicate failure diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py index 9c43d07..a49d332 100644 --- a/surfsense_backend/app/schemas/search_source_connector.py +++ b/surfsense_backend/app/schemas/search_source_connector.py @@ -143,6 +143,30 @@ class SearchSourceConnectorBase(BaseModel): if not config.get("JIRA_BASE_URL"): raise ValueError("JIRA_BASE_URL cannot be empty") + elif connector_type == SearchSourceConnectorType.CONFLUENCE_CONNECTOR: + # For CONFLUENCE_CONNECTOR, only allow specific keys + allowed_keys = [ + "CONFLUENCE_BASE_URL", + "CONFLUENCE_EMAIL", + "CONFLUENCE_API_TOKEN", + ] + if set(config.keys()) != set(allowed_keys): + raise ValueError( + f"For CONFLUENCE_CONNECTOR connector type, config must only contain these keys: {allowed_keys}" + ) + + # Ensure the email is not empty + if not config.get("CONFLUENCE_EMAIL"): + raise ValueError("CONFLUENCE_EMAIL cannot be empty") + + # Ensure the API token is not empty + if not config.get("CONFLUENCE_API_TOKEN"): + raise ValueError("CONFLUENCE_API_TOKEN cannot be empty") + + # Ensure the base URL is not empty + if not config.get("CONFLUENCE_BASE_URL"): + raise ValueError("CONFLUENCE_BASE_URL cannot be empty") + return config diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 1c6d612..fb87a7b 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -1073,6 +1073,103 @@ class ConnectorService: return result_object, jira_chunks + async def search_confluence( + self, + user_query: str, + user_id: str, + search_space_id: int, + top_k: int = 20, + search_mode: SearchMode = SearchMode.CHUNKS, + ) -> tuple: + """ + Search for Confluence pages and return both the source information and langchain documents + + Args: + user_query: The user's query + user_id: The user's ID + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + search_mode: Search mode (CHUNKS or DOCUMENTS) + + Returns: + tuple: (sources_info, langchain_documents) + """ + if search_mode == SearchMode.CHUNKS: + confluence_chunks = await self.chunk_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="CONFLUENCE_CONNECTOR", + ) + elif search_mode == SearchMode.DOCUMENTS: + confluence_chunks = await self.document_retriever.hybrid_search( + query_text=user_query, + top_k=top_k, + user_id=user_id, + search_space_id=search_space_id, + document_type="CONFLUENCE_CONNECTOR", + ) + # Transform document retriever results to match expected format + confluence_chunks = self._transform_document_results(confluence_chunks) + + # Early return if no results + if not confluence_chunks: + return { + "id": 40, + "name": "Confluence", + "type": "CONFLUENCE_CONNECTOR", + "sources": [], + }, [] + + # Process each chunk and create sources directly without deduplication + sources_list = [] + async with self.counter_lock: + for _i, chunk in enumerate(confluence_chunks): + # Extract document metadata + document = chunk.get("document", {}) + metadata = document.get("metadata", {}) + + # Extract Confluence-specific metadata + page_title = metadata.get("page_title", "Untitled Page") + page_id = metadata.get("page_id", "") + space_key = metadata.get("space_key", "") + + # Create a more descriptive title for Confluence pages + title = f"Confluence: {page_title}" + if space_key: + title += f" ({space_key})" + + # Create a more descriptive description for Confluence pages + description = chunk.get("content", "")[:100] + if len(description) == 100: + description += "..." + + # For URL, we can use a placeholder or construct a URL to the Confluence page if available + url = "" # TODO: Add base_url to metadata + if page_id: + url = f"{metadata.get('base_url')}/pages/{page_id}" + + source = { + "id": document.get("id", self.source_id_counter), + "title": title, + "description": description, + "url": url, + } + + self.source_id_counter += 1 + sources_list.append(source) + + # Create result object + result_object = { + "id": 40, + "name": "Confluence", + "type": "CONFLUENCE_CONNECTOR", + "sources": sources_list, + } + + return result_object, confluence_chunks + async def search_linkup( self, user_query: str, user_id: str, mode: str = "standard" ) -> tuple: diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py index e028a47..6829136 100644 --- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py +++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py @@ -8,6 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.config import config +from app.connectors.confluence_connector import ConfluenceConnector from app.connectors.discord_connector import DiscordConnector from app.connectors.github_connector import GitHubConnector from app.connectors.jira_connector import JiraConnector @@ -2329,3 +2330,371 @@ async def index_jira_issues( ) logger.error(f"Failed to index JIRA issues: {e!s}", exc_info=True) return 0, f"Failed to index JIRA issues: {e!s}" + + +async def index_confluence_pages( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str | None = None, + end_date: str | None = None, + update_last_indexed: bool = True, +) -> tuple[int, str | None]: + """ + Index Confluence pages and comments. + + Args: + session: Database session + connector_id: ID of the Confluence connector + search_space_id: ID of the search space to store documents in + user_id: User ID + start_date: Start date for indexing (YYYY-MM-DD format) + end_date: End date for indexing (YYYY-MM-DD format) + update_last_indexed: Whether to update the last_indexed_at timestamp (default: True) + + Returns: + Tuple containing (number of documents indexed, error message or None) + """ + task_logger = TaskLoggingService(session, search_space_id) + + # Log task start + log_entry = await task_logger.log_task_start( + task_name="confluence_pages_indexing", + source="connector_indexing_task", + message=f"Starting Confluence pages indexing for connector {connector_id}", + metadata={ + "connector_id": connector_id, + "user_id": str(user_id), + "start_date": start_date, + "end_date": end_date, + }, + ) + + try: + # Get the connector from the database + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.CONFLUENCE_CONNECTOR, + ) + ) + connector = result.scalars().first() + + if not connector: + await task_logger.log_task_failure( + log_entry, + f"Connector with ID {connector_id} not found", + "Connector not found", + {"error_type": "ConnectorNotFound"}, + ) + return 0, f"Connector with ID {connector_id} not found" + + # Get the Confluence credentials from the connector config + confluence_email = connector.config.get("CONFLUENCE_EMAIL") + confluence_api_token = connector.config.get("CONFLUENCE_API_TOKEN") + confluence_base_url = connector.config.get("CONFLUENCE_BASE_URL") + + if not confluence_email or not confluence_api_token or not confluence_base_url: + await task_logger.log_task_failure( + log_entry, + f"Confluence credentials not found in connector config for connector {connector_id}", + "Missing Confluence credentials", + {"error_type": "MissingCredentials"}, + ) + return 0, "Confluence credentials not found in connector config" + + # Initialize Confluence client + await task_logger.log_task_progress( + log_entry, + f"Initializing Confluence client for connector {connector_id}", + {"stage": "client_initialization"}, + ) + + confluence_client = ConfluenceConnector( + base_url=confluence_base_url, + email=confluence_email, + api_token=confluence_api_token, + ) + + # Calculate date range + if start_date is None or end_date is None: + # Fall back to calculating dates based on last_indexed_at + calculated_end_date = datetime.now() + + # Use last_indexed_at as start date if available, otherwise use 365 days ago + if connector.last_indexed_at: + # Convert dates to be comparable (both timezone-naive) + last_indexed_naive = ( + connector.last_indexed_at.replace(tzinfo=None) + if connector.last_indexed_at.tzinfo + else connector.last_indexed_at + ) + + # Check if last_indexed_at is in the future or after end_date + if last_indexed_naive > calculated_end_date: + logger.warning( + f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead." + ) + calculated_start_date = calculated_end_date - timedelta(days=365) + else: + calculated_start_date = last_indexed_naive + logger.info( + f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date" + ) + else: + calculated_start_date = calculated_end_date - timedelta( + days=365 + ) # Use 365 days as default + logger.info( + f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date" + ) + + # Use calculated dates if not provided + start_date_str = ( + start_date if start_date else calculated_start_date.strftime("%Y-%m-%d") + ) + end_date_str = ( + end_date if end_date else calculated_end_date.strftime("%Y-%m-%d") + ) + else: + # Use provided dates + start_date_str = start_date + end_date_str = end_date + + await task_logger.log_task_progress( + log_entry, + f"Fetching Confluence pages from {start_date_str} to {end_date_str}", + { + "stage": "fetching_pages", + "start_date": start_date_str, + "end_date": end_date_str, + }, + ) + + # Get pages within date range + try: + pages, error = confluence_client.get_pages_by_date_range( + start_date=start_date_str, end_date=end_date_str, include_comments=True + ) + + if error: + logger.error(f"Failed to get Confluence pages: {error}") + + # Don't treat "No pages found" as an error that should stop indexing + if "No pages found" in error: + logger.info( + "No pages found is not a critical error, continuing with update" + ) + if update_last_indexed: + connector.last_indexed_at = datetime.now() + await session.commit() + logger.info( + f"Updated last_indexed_at to {connector.last_indexed_at} despite no pages found" + ) + + await task_logger.log_task_success( + log_entry, + f"No Confluence pages found in date range {start_date_str} to {end_date_str}", + {"pages_found": 0}, + ) + return 0, None + else: + await task_logger.log_task_failure( + log_entry, + f"Failed to get Confluence pages: {error}", + "API Error", + {"error_type": "APIError"}, + ) + return 0, f"Failed to get Confluence pages: {error}" + + logger.info(f"Retrieved {len(pages)} pages from Confluence API") + + except Exception as e: + logger.error(f"Error fetching Confluence pages: {e!s}", exc_info=True) + return 0, f"Error fetching Confluence pages: {e!s}" + + # Process and index each page + documents_indexed = 0 + skipped_pages = [] + documents_skipped = 0 + + for page in pages: + try: + page_id = page.get("id") + page_title = page.get("title", "") + space_id = page.get("spaceId", "") + + if not page_id or not page_title: + logger.warning( + f"Skipping page with missing ID or title: {page_id or 'Unknown'}" + ) + skipped_pages.append(f"{page_title or 'Unknown'} (missing data)") + documents_skipped += 1 + continue + + # Extract page content + page_content = "" + if page.get("body") and page["body"].get("storage"): + page_content = page["body"]["storage"].get("value", "") + + # Add comments to content + comments = page.get("comments", []) + comments_content = "" + if comments: + comments_content = "\n\n## Comments\n\n" + for comment in comments: + comment_body = "" + if comment.get("body") and comment["body"].get("storage"): + comment_body = comment["body"]["storage"].get("value", "") + + comment_author = comment.get("version", {}).get( + "authorId", "Unknown" + ) + comment_date = comment.get("version", {}).get("createdAt", "") + + comments_content += f"**Comment by {comment_author}** ({comment_date}):\n{comment_body}\n\n" + + # Combine page content with comments + full_content = f"# {page_title}\n\n{page_content}{comments_content}" + + if not full_content.strip(): + logger.warning(f"Skipping page with no content: {page_title}") + skipped_pages.append(f"{page_title} (no content)") + documents_skipped += 1 + continue + + # Create a simple summary + summary_content = ( + f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n" + ) + if page_content: + # Take first 500 characters of content for summary + content_preview = page_content[:500] + if len(page_content) > 500: + content_preview += "..." + summary_content += f"Content Preview: {content_preview}\n\n" + + # Add comment count + comment_count = len(comments) + summary_content += f"Comments: {comment_count}" + + # Generate content hash + content_hash = generate_content_hash(full_content, search_space_id) + + # Check if document already exists + existing_doc_by_hash_result = await session.execute( + select(Document).where(Document.content_hash == content_hash) + ) + existing_document_by_hash = ( + existing_doc_by_hash_result.scalars().first() + ) + + if existing_document_by_hash: + logger.info( + f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing." + ) + documents_skipped += 1 + continue + + # Generate embedding for the summary + summary_embedding = config.embedding_model_instance.embed( + summary_content + ) + + # Process chunks - using the full page content with comments + chunks = [ + Chunk( + content=chunk.text, + embedding=config.embedding_model_instance.embed(chunk.text), + ) + for chunk in config.chunker_instance.chunk(full_content) + ] + + # Create and store new document + logger.info(f"Creating new document for page {page_title}") + document = Document( + search_space_id=search_space_id, + title=f"Confluence - {page_title}", + document_type=DocumentType.CONFLUENCE_CONNECTOR, + document_metadata={ + "page_id": page_id, + "page_title": page_title, + "space_id": space_id, + "comment_count": comment_count, + "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + }, + content=summary_content, + content_hash=content_hash, + embedding=summary_embedding, + chunks=chunks, + ) + + session.add(document) + documents_indexed += 1 + logger.info(f"Successfully indexed new page {page_title}") + + except Exception as e: + logger.error( + f"Error processing page {page.get('title', 'Unknown')}: {e!s}", + exc_info=True, + ) + skipped_pages.append( + f"{page.get('title', 'Unknown')} (processing error)" + ) + documents_skipped += 1 + continue # Skip this page and continue with others + + # Update the last_indexed_at timestamp for the connector only if requested + total_processed = documents_indexed + if update_last_indexed: + connector.last_indexed_at = datetime.now() + logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}") + + # Commit all changes + await session.commit() + logger.info( + "Successfully committed all Confluence document changes to database" + ) + + # Log success + await task_logger.log_task_success( + log_entry, + f"Successfully completed Confluence indexing for connector {connector_id}", + { + "pages_processed": total_processed, + "documents_indexed": documents_indexed, + "documents_skipped": documents_skipped, + "skipped_pages_count": len(skipped_pages), + }, + ) + + logger.info( + f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped" + ) + return ( + total_processed, + None, + ) # Return None as the error message to indicate success + + except SQLAlchemyError as db_error: + await session.rollback() + await task_logger.log_task_failure( + log_entry, + f"Database error during Confluence indexing for connector {connector_id}", + str(db_error), + {"error_type": "SQLAlchemyError"}, + ) + logger.error(f"Database error: {db_error!s}", exc_info=True) + return 0, f"Database error: {db_error!s}" + except Exception as e: + await session.rollback() + await task_logger.log_task_failure( + log_entry, + f"Failed to index Confluence pages for connector {connector_id}", + str(e), + {"error_type": type(e).__name__}, + ) + logger.error(f"Failed to index Confluence pages: {e!s}", exc_info=True) + return 0, f"Failed to index Confluence pages: {e!s}" diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx index 7f1f3d4..a535319 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/edit/page.tsx @@ -201,6 +201,33 @@ export default function EditConnectorPage() { )} + {/* == Confluence == */} + {connector.connector_type === "CONFLUENCE_CONNECTOR" && ( +
+ + + +
+ )} + {/* == Linkup == */} {connector.connector_type === "LINKUP_API" && ( { + return url.includes("atlassian.net") || url.includes("confluence"); + }, + { + message: "Please enter a valid Confluence instance URL", + } + ), + email: z.string().email({ + message: "Please enter a valid email address.", + }), + api_token: z.string().min(10, { + message: "Confluence API Token is required and must be valid.", + }), +}); + +// Define the type for the form values +type ConfluenceConnectorFormValues = z.infer; + +export default function ConfluenceConnectorPage() { + const router = useRouter(); + const params = useParams(); + const searchSpaceId = params.search_space_id as string; + const [isSubmitting, setIsSubmitting] = useState(false); + const { createConnector } = useSearchSourceConnectors(); + + // Initialize the form + const form = useForm({ + resolver: zodResolver(confluenceConnectorFormSchema), + defaultValues: { + name: "Confluence Connector", + base_url: "", + email: "", + api_token: "", + }, + }); + + // Handle form submission + const onSubmit = async (values: ConfluenceConnectorFormValues) => { + setIsSubmitting(true); + try { + await createConnector({ + name: values.name, + connector_type: "CONFLUENCE_CONNECTOR", + config: { + CONFLUENCE_BASE_URL: values.base_url, + CONFLUENCE_EMAIL: values.email, + CONFLUENCE_API_TOKEN: values.api_token, + }, + is_indexable: true, + last_indexed_at: null, + }); + + toast.success("Confluence connector created successfully!"); + + // Navigate back to connectors page + router.push(`/dashboard/${searchSpaceId}/connectors`); + } catch (error) { + console.error("Error creating connector:", error); + toast.error(error instanceof Error ? error.message : "Failed to create connector"); + } finally { + setIsSubmitting(false); + } + }; + + return ( +
+ + + + + + Connect + Documentation + + + + + + Connect to Confluence + + Connect your Confluence instance to index pages and comments from your spaces. + + + + + + + You'll need to create an API token from your{" "} + + Atlassian Account Settings + + + + +
+ + ( + + Connector Name + + + + + A friendly name to identify this connector. + + + + )} + /> + + ( + + Confluence Instance URL + + + + + Your Confluence instance URL. For Atlassian Cloud, this is typically + https://yourcompany.atlassian.net + + + + )} + /> + + ( + + Email Address + + + + Your Atlassian account email address. + + + )} + /> + + ( + + API Token + + + + + Your Confluence API Token will be encrypted and stored securely. + + + + )} + /> + +
+ +
+ + +
+
+
+ + + + + Confluence Integration Guide + + Learn how to set up and use the Confluence connector. + + + +
+

What gets indexed?

+
    +
  • All pages from accessible spaces
  • +
  • Page content and metadata
  • +
  • Comments on pages (both footer and inline comments)
  • +
  • Page titles and descriptions
  • +
+
+ +
+

Setup Instructions

+
    +
  1. Go to your Atlassian Account Settings
  2. +
  3. Navigate to Security → API tokens
  4. +
  5. Create a new API token with appropriate permissions
  6. +
  7. Copy the token and paste it in the form above
  8. +
  9. Ensure your account has read access to the spaces you want to index
  10. +
+
+ +
+

Permissions Required

+
    +
  • Read access to Confluence spaces
  • +
  • View pages and comments
  • +
  • Access to space metadata
  • +
+
+ + + + + The connector will only index content that your account has permission to view. + Make sure your API token has the necessary permissions for the spaces you want + to index. + + +
+
+
+
+
+
+ ); +} diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx index 71de94b..f6dbd51 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx @@ -1,5 +1,7 @@ "use client"; + import { + IconBook, IconBrandDiscord, IconBrandGithub, IconBrandNotion, @@ -14,7 +16,7 @@ import { IconTicket, IconWorldWww, } from "@tabler/icons-react"; -import { AnimatePresence, motion } from "framer-motion"; +import { AnimatePresence, motion, type Variants } from "framer-motion"; import Link from "next/link"; import { useParams } from "next/navigation"; import { useState } from "react"; @@ -125,6 +127,13 @@ const connectorCategories: ConnectorCategory[] = [ icon: , status: "available", }, + { + id: "confluence-connector", + title: "Confluence", + description: "Connect to Confluence to search pages, comments and documentation.", + icon: , + status: "available", + }, ], }, { @@ -165,7 +174,7 @@ const staggerContainer = { }, }; -const cardVariants = { +const cardVariants: Variants = { hidden: { opacity: 0, y: 20 }, visible: { opacity: 1, diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx index d9183fd..e9f49c9 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx @@ -1,6 +1,7 @@ "use client"; import { + IconBook, IconBrandDiscord, IconBrandGithub, IconBrandNotion, @@ -144,6 +145,7 @@ const documentTypeIcons = { LINEAR_CONNECTOR: IconLayoutKanban, JIRA_CONNECTOR: IconTicket, DISCORD_CONNECTOR: IconBrandDiscord, + CONFLUENCE_CONNECTOR: IconBook, } as const; const columns: ColumnDef[] = [ @@ -964,7 +966,13 @@ export default function DocumentsTable() { function RowActions({ row }: { row: Row }) { const [isOpen, setIsOpen] = useState(false); const [isDeleting, setIsDeleting] = useState(false); - const { deleteDocument, refreshDocuments } = useContext(DocumentsContext)!; + const context = useContext(DocumentsContext); + + if (!context) { + throw new Error("DocumentsContext not found"); + } + + const { deleteDocument, refreshDocuments } = context; const document = row.original; const handleDelete = async () => { diff --git a/surfsense_web/components/editConnector/types.ts b/surfsense_web/components/editConnector/types.ts index 435f320..4523593 100644 --- a/surfsense_web/components/editConnector/types.ts +++ b/surfsense_web/components/editConnector/types.ts @@ -33,5 +33,11 @@ export const editConnectorSchema = z.object({ LINEAR_API_KEY: z.string().optional(), LINKUP_API_KEY: z.string().optional(), DISCORD_BOT_TOKEN: z.string().optional(), + CONFLUENCE_BASE_URL: z.string().optional(), + CONFLUENCE_EMAIL: z.string().optional(), + CONFLUENCE_API_TOKEN: z.string().optional(), + JIRA_BASE_URL: z.string().optional(), + JIRA_EMAIL: z.string().optional(), + JIRA_API_TOKEN: z.string().optional(), }); export type EditConnectorFormValues = z.infer; diff --git a/surfsense_web/hooks/useConnectorEditPage.ts b/surfsense_web/hooks/useConnectorEditPage.ts index b3a65e5..f50baa6 100644 --- a/surfsense_web/hooks/useConnectorEditPage.ts +++ b/surfsense_web/hooks/useConnectorEditPage.ts @@ -46,6 +46,12 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) TAVILY_API_KEY: "", LINEAR_API_KEY: "", DISCORD_BOT_TOKEN: "", + CONFLUENCE_BASE_URL: "", + CONFLUENCE_EMAIL: "", + CONFLUENCE_API_TOKEN: "", + JIRA_BASE_URL: "", + JIRA_EMAIL: "", + JIRA_API_TOKEN: "", }, }); @@ -66,6 +72,12 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) LINEAR_API_KEY: config.LINEAR_API_KEY || "", LINKUP_API_KEY: config.LINKUP_API_KEY || "", DISCORD_BOT_TOKEN: config.DISCORD_BOT_TOKEN || "", + CONFLUENCE_BASE_URL: config.CONFLUENCE_BASE_URL || "", + CONFLUENCE_EMAIL: config.CONFLUENCE_EMAIL || "", + CONFLUENCE_API_TOKEN: config.CONFLUENCE_API_TOKEN || "", + JIRA_BASE_URL: config.JIRA_BASE_URL || "", + JIRA_EMAIL: config.JIRA_EMAIL || "", + JIRA_API_TOKEN: config.JIRA_API_TOKEN || "", }); if (currentConnector.connector_type === "GITHUB_CONNECTOR") { const savedRepos = config.repo_full_names || []; @@ -104,7 +116,10 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/github/repositories/`, { method: "POST", - headers: { "Content-Type": "application/json", Authorization: `Bearer ${token}` }, + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${token}`, + }, body: JSON.stringify({ github_pat: values.github_pat }), } ); @@ -165,7 +180,10 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) setIsSaving(false); return; } - newConfig = { GITHUB_PAT: currentPatInForm, repo_full_names: newSelectedRepos }; + newConfig = { + GITHUB_PAT: currentPatInForm, + repo_full_names: newSelectedRepos, + }; if (reposChanged && newSelectedRepos.length === 0) { toast.warning("Warning: No repositories selected."); } @@ -189,7 +207,9 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) setIsSaving(false); return; } - newConfig = { NOTION_INTEGRATION_TOKEN: formData.NOTION_INTEGRATION_TOKEN }; + newConfig = { + NOTION_INTEGRATION_TOKEN: formData.NOTION_INTEGRATION_TOKEN, + }; } break; case "SERPER_API": @@ -243,6 +263,46 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) newConfig = { DISCORD_BOT_TOKEN: formData.DISCORD_BOT_TOKEN }; } break; + case "CONFLUENCE_CONNECTOR": + if ( + formData.CONFLUENCE_BASE_URL !== originalConfig.CONFLUENCE_BASE_URL || + formData.CONFLUENCE_EMAIL !== originalConfig.CONFLUENCE_EMAIL || + formData.CONFLUENCE_API_TOKEN !== originalConfig.CONFLUENCE_API_TOKEN + ) { + if ( + !formData.CONFLUENCE_BASE_URL || + !formData.CONFLUENCE_EMAIL || + !formData.CONFLUENCE_API_TOKEN + ) { + toast.error("All Confluence fields are required."); + setIsSaving(false); + return; + } + newConfig = { + CONFLUENCE_BASE_URL: formData.CONFLUENCE_BASE_URL, + CONFLUENCE_EMAIL: formData.CONFLUENCE_EMAIL, + CONFLUENCE_API_TOKEN: formData.CONFLUENCE_API_TOKEN, + }; + } + break; + case "JIRA_CONNECTOR": + if ( + formData.JIRA_BASE_URL !== originalConfig.JIRA_BASE_URL || + formData.JIRA_EMAIL !== originalConfig.JIRA_EMAIL || + formData.JIRA_API_TOKEN !== originalConfig.JIRA_API_TOKEN + ) { + if (!formData.JIRA_BASE_URL || !formData.JIRA_EMAIL || !formData.JIRA_API_TOKEN) { + toast.error("All Jira fields are required."); + setIsSaving(false); + return; + } + newConfig = { + JIRA_BASE_URL: formData.JIRA_BASE_URL, + JIRA_EMAIL: formData.JIRA_EMAIL, + JIRA_API_TOKEN: formData.JIRA_API_TOKEN, + }; + } + break; } if (newConfig !== null) { @@ -297,6 +357,14 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string) editForm.setValue("LINKUP_API_KEY", newlySavedConfig.LINKUP_API_KEY || ""); } else if (connector.connector_type === "DISCORD_CONNECTOR") { editForm.setValue("DISCORD_BOT_TOKEN", newlySavedConfig.DISCORD_BOT_TOKEN || ""); + } else if (connector.connector_type === "CONFLUENCE_CONNECTOR") { + editForm.setValue("CONFLUENCE_BASE_URL", newlySavedConfig.CONFLUENCE_BASE_URL || ""); + editForm.setValue("CONFLUENCE_EMAIL", newlySavedConfig.CONFLUENCE_EMAIL || ""); + editForm.setValue("CONFLUENCE_API_TOKEN", newlySavedConfig.CONFLUENCE_API_TOKEN || ""); + } else if (connector.connector_type === "JIRA_CONNECTOR") { + editForm.setValue("JIRA_BASE_URL", newlySavedConfig.JIRA_BASE_URL || ""); + editForm.setValue("JIRA_EMAIL", newlySavedConfig.JIRA_EMAIL || ""); + editForm.setValue("JIRA_API_TOKEN", newlySavedConfig.JIRA_API_TOKEN || ""); } } if (connector.connector_type === "GITHUB_CONNECTOR") {