From 08f0a34ef51a72ec0edf722f04772f4ee86e246c Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 00:49:37 +0200
Subject: [PATCH 01/14] add gmail connector add route

---
 surfsense_backend/app/routes/__init__.py      |   4 +
 .../google_gmail_add_connector_route.py       | 160 ++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 surfsense_backend/app/routes/google_gmail_add_connector_route.py

diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index 3e9c6ba..e10db1e 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -5,6 +5,9 @@ from .documents_routes import router as documents_router
 from .google_calendar_add_connector_route import (
     router as google_calendar_add_connector_router,
 )
+from .google_gmail_add_connector_route import (
+    router as google_gmail_add_connector_router,
+)
 from .llm_config_routes import router as llm_config_router
 from .logs_routes import router as logs_router
 from .podcasts_routes import router as podcasts_router
@@ -19,5 +22,6 @@ router.include_router(podcasts_router)
 router.include_router(chats_router)
 router.include_router(search_source_connectors_router)
 router.include_router(google_calendar_add_connector_router)
+router.include_router(google_gmail_add_connector_router)
 router.include_router(llm_config_router)
 router.include_router(logs_router)
diff --git a/surfsense_backend/app/routes/google_gmail_add_connector_route.py b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
new file mode 100644
index 0000000..678f43d
--- /dev/null
+++ b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
@@ -0,0 +1,160 @@
+# app/routes/google_gmail.py
+import base64
+import json
+import logging
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import RedirectResponse
+from google_auth_oauthlib.flow import Flow
+from pydantic import ValidationError
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.config import config
+from app.db import (
+    SearchSourceConnector,
+    SearchSourceConnectorType,
+    User,
+    get_async_session,
+)
+from app.users import current_active_user
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+def get_google_flow():
+    """Create and return a Google OAuth flow for Gmail API."""
+    flow = Flow.from_client_config(
+        {
+            "web": {
+                "client_id": config.GOOGLE_OAUTH_CLIENT_ID,
+                "client_secret": config.GOOGLE_OAUTH_CLIENT_SECRET,
+                "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+                "token_uri": "https://oauth2.googleapis.com/token",
+                "redirect_uris": [config.GOOGLE_GMAIL_REDIRECT_URI],
+            }
+        },
+        scopes=[
+            "https://www.googleapis.com/auth/gmail.readonly",
+            "https://www.googleapis.com/auth/userinfo.email",
+            "https://www.googleapis.com/auth/userinfo.profile",
+            "openid",
+        ],
+    )
+    flow.redirect_uri = config.GOOGLE_GMAIL_REDIRECT_URI
+    return flow
+
+
+@router.get("/auth/google/gmail/connector/add/")
+async def connect_gmail(space_id: int, user: User = Depends(current_active_user)):
+    try:
+        if not space_id:
+            raise HTTPException(status_code=400, detail="space_id is required")
+
+        flow = get_google_flow()
+
+        # Encode space_id and user_id in state
+        state_payload = json.dumps(
+            {
+                "space_id": space_id,
+                "user_id": str(user.id),
+            }
+        )
+        state_encoded = base64.urlsafe_b64encode(state_payload.encode()).decode()
+
+        auth_url, _ = flow.authorization_url(
+            access_type="offline",
+            prompt="consent",
+            include_granted_scopes="true",
+            state=state_encoded,
+        )
+        return {"auth_url": auth_url}
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Failed to initiate Google OAuth: {e!s}"
+        ) from e
+
+
+@router.get("/auth/google/gmail/connector/callback/")
+async def gmail_callback(
+    request: Request,
+    code: str,
+    state: str,
+    session: AsyncSession = Depends(get_async_session),
+):
+    try:
+        # Decode and parse the state
+        decoded_state = base64.urlsafe_b64decode(state.encode()).decode()
+        data = json.loads(decoded_state)
+
+        user_id = UUID(data["user_id"])
+        space_id = data["space_id"]
+
+        flow = get_google_flow()
+        flow.fetch_token(code=code)
+
+        creds = flow.credentials
+        creds_dict = json.loads(creds.to_json())
+
+        try:
+            # Check if a connector with the same type already exists for this user
+            result = await session.execute(
+                select(SearchSourceConnector).filter(
+                    SearchSourceConnector.user_id == user_id,
+                    SearchSourceConnector.connector_type
+                    == SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
+                )
+            )
+            existing_connector = result.scalars().first()
+            if existing_connector:
+                raise HTTPException(
+                    status_code=409,
+                    detail="A GOOGLE_GMAIL_CONNECTOR connector already exists. Each user can have only one connector of each type.",
+                )
+            db_connector = SearchSourceConnector(
+                name="Google Gmail Connector",
+                connector_type=SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
+                config=creds_dict,
+                user_id=user_id,
+                is_indexable=True,
+            )
+            session.add(db_connector)
+            await session.commit()
+            await session.refresh(db_connector)
+
+            logger.info(
+                f"Successfully created Gmail connector for user {user_id} with ID {db_connector.id}"
+            )
+
+            # Redirect to the frontend success page
+            return RedirectResponse(
+                url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/add/google-gmail-connector?success=true"
+            )
+
+        except IntegrityError as e:
+            await session.rollback()
+            logger.error(f"Database integrity error: {e!s}")
+            raise HTTPException(
+                status_code=409,
+                detail="A connector with this configuration already exists.",
+            ) from e
+        except ValidationError as e:
+            await session.rollback()
+            logger.error(f"Validation error: {e!s}")
+            raise HTTPException(
+                status_code=400, detail=f"Invalid connector configuration: {e!s}"
+            ) from e
+
+    except HTTPException:
+        # Re-raise HTTP exceptions as-is
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error in Gmail callback: {e!s}", exc_info=True)
+        # Redirect to frontend with error
+        return RedirectResponse(
+            url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/add/google-gmail-connector?error=auth_failed"
+        )

From a1bc9a1c60dc57035102e85277ee5e560e25bda6 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 00:52:07 +0200
Subject: [PATCH 02/14] update search source connector / add gmail enum value

---
 surfsense_backend/app/db.py                              | 2 ++
 surfsense_backend/app/schemas/search_source_connector.py | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index f572438..49e227f 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -47,6 +47,7 @@ class DocumentType(str, Enum):
     CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
     CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
     GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
+    GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
 
 
 class SearchSourceConnectorType(str, Enum):
@@ -62,6 +63,7 @@ class SearchSourceConnectorType(str, Enum):
     CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
     CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
     GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
+    GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
 
 
 class ChatType(str, Enum):
diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py
index 4c36893..c44b2bf 100644
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@@ -188,6 +188,14 @@ class SearchSourceConnectorBase(BaseModel):
                 if key not in config or config[key] in (None, ""):
                     raise ValueError(f"{key} is required and cannot be empty")
 
+        elif connector_type == SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR:
+            # Required fields for Gmail connector (same as Calendar - uses Google OAuth)
+            required_keys = list(GoogleAuthCredentialsBase.model_fields.keys())
+
+            for key in required_keys:
+                if key not in config or config[key] in (None, ""):
+                    raise ValueError(f"{key} is required and cannot be empty")
+
         return config
 
 

From 1de0f0309c921dfbd6eabbbd6c84b66778542105 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 00:57:37 +0200
Subject: [PATCH 03/14] support google mail indexing

---
 .../app/tasks/connectors_indexing_tasks.py    | 3578 +++++++++++++++++
 1 file changed, 3578 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/connectors_indexing_tasks.py

diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
new file mode 100644
index 0000000..0c678b3
--- /dev/null
+++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
@@ -0,0 +1,3578 @@
+import asyncio
+import logging
+from datetime import UTC, datetime, timedelta
+
+from google.oauth2.credentials import Credentials
+from slack_sdk.errors import SlackApiError
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.config import config
+from app.connectors.clickup_connector import ClickUpConnector
+from app.connectors.confluence_connector import ConfluenceConnector
+from app.connectors.discord_connector import DiscordConnector
+from app.connectors.github_connector import GitHubConnector
+from app.connectors.google_calendar_connector import GoogleCalendarConnector
+from app.connectors.google_gmail_connector import GoogleGmailConnector
+from app.connectors.jira_connector import JiraConnector
+from app.connectors.linear_connector import LinearConnector
+from app.connectors.notion_history import NotionHistoryConnector
+from app.connectors.slack_history import SlackHistory
+from app.db import (
+    Chunk,
+    Document,
+    DocumentType,
+    SearchSourceConnector,
+    SearchSourceConnectorType,
+)
+from app.prompts import SUMMARY_PROMPT_TEMPLATE
+from app.services.llm_service import get_user_long_context_llm
+from app.services.task_logging_service import TaskLoggingService
+from app.utils.document_converters import generate_content_hash
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+
+async def index_slack_messages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Slack messages from all accessible channels.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Slack connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="slack_messages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Slack messages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Retrieving Slack connector {connector_id} from database",
+            {"stage": "connector_retrieval"},
+        )
+
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.SLACK_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a Slack connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return (
+                0,
+                f"Connector with ID {connector_id} not found or is not a Slack connector",
+            )
+
+        # Get the Slack token from the connector config
+        slack_token = connector.config.get("SLACK_BOT_TOKEN")
+        if not slack_token:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Slack token not found in connector config for connector {connector_id}",
+                "Missing Slack token",
+                {"error_type": "MissingToken"},
+            )
+            return 0, "Slack token not found in connector config"
+
+        # Initialize Slack client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Slack client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        slack_client = SlackHistory(token=slack_token)
+
+        # Calculate date range
+        await task_logger.log_task_progress(
+            log_entry,
+            "Calculating date range for Slack indexing",
+            {
+                "stage": "date_calculation",
+                "provided_start_date": start_date,
+                "provided_end_date": end_date,
+            },
+        )
+
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now()
+
+            # Use last_indexed_at as start date if available, otherwise use 365 days ago
+            if connector.last_indexed_at:
+                # Convert dates to be comparable (both timezone-naive)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
+                )
+
+                # Check if last_indexed_at is in the future or after end_date
+                if last_indexed_naive > calculated_end_date:
+                    logger.warning(
+                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
+                    )
+                    calculated_start_date = calculated_end_date - timedelta(days=365)
+                else:
+                    calculated_start_date = last_indexed_naive
+                    logger.info(
+                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                    )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(
+                    days=365
+                )  # Use 365 days as default
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided
+            start_date_str = (
+                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+            )
+            end_date_str = (
+                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+            )
+        else:
+            # Use provided dates
+            start_date_str = start_date
+            end_date_str = end_date
+
+        logger.info(f"Indexing Slack messages from {start_date_str} to {end_date_str}")
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Slack channels from {start_date_str} to {end_date_str}",
+            {
+                "stage": "fetch_channels",
+                "start_date": start_date_str,
+                "end_date": end_date_str,
+            },
+        )
+
+        # Get all channels
+        try:
+            channels = slack_client.get_all_channels()
+        except Exception as e:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Failed to get Slack channels for connector {connector_id}",
+                str(e),
+                {"error_type": "ChannelFetchError"},
+            )
+            return 0, f"Failed to get Slack channels: {e!s}"
+
+        if not channels:
+            await task_logger.log_task_success(
+                log_entry,
+                f"No Slack channels found for connector {connector_id}",
+                {"channels_found": 0},
+            )
+            return 0, "No Slack channels found"
+
+        # Track the number of documents indexed
+        documents_indexed = 0
+        documents_skipped = 0
+        skipped_channels = []
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Starting to process {len(channels)} Slack channels",
+            {"stage": "process_channels", "total_channels": len(channels)},
+        )
+
+        # Process each channel
+        for (
+            channel_obj
+        ) in channels:  # Modified loop to iterate over list of channel objects
+            channel_id = channel_obj["id"]
+            channel_name = channel_obj["name"]
+            is_private = channel_obj["is_private"]
+            is_member = channel_obj[
+                "is_member"
+            ]  # This might be False for public channels too
+
+            try:
+                # If it's a private channel and the bot is not a member, skip.
+                # For public channels, if they are listed by conversations.list, the bot can typically read history.
+                # The `not_in_channel` error in get_conversation_history will be the ultimate gatekeeper if history is inaccessible.
+                if is_private and not is_member:
+                    logger.warning(
+                        f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping."
+                    )
+                    skipped_channels.append(
+                        f"{channel_name} (private, bot not a member)"
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Get messages for this channel
+                # The get_history_by_date_range now uses get_conversation_history,
+                # which handles 'not_in_channel' by returning [] and logging.
+                messages, error = slack_client.get_history_by_date_range(
+                    channel_id=channel_id,
+                    start_date=start_date_str,
+                    end_date=end_date_str,
+                    limit=1000,  # Limit to 1000 messages per channel
+                )
+
+                if error:
+                    logger.warning(
+                        f"Error getting messages from channel {channel_name}: {error}"
+                    )
+                    skipped_channels.append(f"{channel_name} (error: {error})")
+                    documents_skipped += 1
+                    continue  # Skip this channel if there's an error
+
+                if not messages:
+                    logger.info(
+                        f"No messages found in channel {channel_name} for the specified date range."
+                    )
+                    documents_skipped += 1
+                    continue  # Skip if no messages
+
+                # Format messages with user info
+                formatted_messages = []
+                for msg in messages:
+                    # Skip bot messages and system messages
+                    if msg.get("subtype") in [
+                        "bot_message",
+                        "channel_join",
+                        "channel_leave",
+                    ]:
+                        continue
+
+                    formatted_msg = slack_client.format_message(
+                        msg, include_user_info=True
+                    )
+                    formatted_messages.append(formatted_msg)
+
+                if not formatted_messages:
+                    logger.info(
+                        f"No valid messages found in channel {channel_name} after filtering."
+                    )
+                    documents_skipped += 1
+                    continue  # Skip if no valid messages after filtering
+
+                # Convert messages to markdown format
+                channel_content = f"# Slack Channel: {channel_name}\n\n"
+
+                for msg in formatted_messages:
+                    user_name = msg.get("user_name", "Unknown User")
+                    timestamp = msg.get("datetime", "Unknown Time")
+                    text = msg.get("text", "")
+
+                    channel_content += (
+                        f"## {user_name} ({timestamp})\n\n{text}\n\n---\n\n"
+                    )
+
+                # Format document metadata
+                metadata_sections = [
+                    (
+                        "METADATA",
+                        [
+                            f"CHANNEL_NAME: {channel_name}",
+                            f"CHANNEL_ID: {channel_id}",
+                            # f"START_DATE: {start_date_str}",
+                            # f"END_DATE: {end_date_str}",
+                            f"MESSAGE_COUNT: {len(formatted_messages)}",
+                        ],
+                    ),
+                    (
+                        "CONTENT",
+                        ["FORMAT: markdown", "TEXT_START", channel_content, "TEXT_END"],
+                    ),
+                ]
+
+                # Build the document string
+                document_parts = []
+                document_parts.append("<DOCUMENT>")
+
+                for section_title, section_content in metadata_sections:
+                    document_parts.append(f"<{section_title}>")
+                    document_parts.extend(section_content)
+                    document_parts.append(f"</{section_title}>")
+
+                document_parts.append("</DOCUMENT>")
+                combined_document_string = "\n".join(document_parts)
+                content_hash = generate_content_hash(
+                    combined_document_string, search_space_id
+                )
+
+                # Check if document with this content hash already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for channel {channel_name}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Get user's long context LLM
+                user_llm = await get_user_long_context_llm(session, user_id)
+                if not user_llm:
+                    logger.error(f"No long context LLM configured for user {user_id}")
+                    skipped_channels.append(f"{channel_name} (no LLM configured)")
+                    documents_skipped += 1
+                    continue
+
+                # Generate summary
+                summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
+                summary_result = await summary_chain.ainvoke(
+                    {"document": combined_document_string}
+                )
+                summary_content = summary_result.content
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(channel_content)
+                ]
+
+                # Create and store new document
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Slack - {channel_name}",
+                    document_type=DocumentType.SLACK_CONNECTOR,
+                    document_metadata={
+                        "channel_name": channel_name,
+                        "channel_id": channel_id,
+                        "start_date": start_date_str,
+                        "end_date": end_date_str,
+                        "message_count": len(formatted_messages),
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                    content_hash=content_hash,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(
+                    f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages"
+                )
+
+            except SlackApiError as slack_error:
+                logger.error(
+                    f"Slack API error for channel {channel_name}: {slack_error!s}"
+                )
+                skipped_channels.append(f"{channel_name} (Slack API error)")
+                documents_skipped += 1
+                continue  # Skip this channel and continue with others
+            except Exception as e:
+                logger.error(f"Error processing channel {channel_name}: {e!s}")
+                skipped_channels.append(f"{channel_name} (processing error)")
+                documents_skipped += 1
+                continue  # Skip this channel and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        # and if we successfully indexed at least one channel
+        total_processed = documents_indexed
+        if update_last_indexed and total_processed > 0:
+            connector.last_indexed_at = datetime.now()
+
+        # Commit all changes
+        await session.commit()
+
+        # Prepare result message
+        result_message = None
+        if skipped_channels:
+            result_message = f"Processed {total_processed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
+        else:
+            result_message = f"Processed {total_processed} channels."
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Slack indexing for connector {connector_id}",
+            {
+                "channels_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_channels_count": len(skipped_channels),
+                "result_message": result_message,
+            },
+        )
+
+        logger.info(
+            f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
+        )
+        return total_processed, result_message
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Slack indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}")
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Slack messages for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Slack messages: {e!s}")
+        return 0, f"Failed to index Slack messages: {e!s}"
+
+
+async def index_notion_pages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Notion pages from all accessible pages.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Notion connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="notion_pages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Notion pages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Retrieving Notion connector {connector_id} from database",
+            {"stage": "connector_retrieval"},
+        )
+
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.NOTION_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a Notion connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return (
+                0,
+                f"Connector with ID {connector_id} not found or is not a Notion connector",
+            )
+
+        # Get the Notion token from the connector config
+        notion_token = connector.config.get("NOTION_INTEGRATION_TOKEN")
+        if not notion_token:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Notion integration token not found in connector config for connector {connector_id}",
+                "Missing Notion token",
+                {"error_type": "MissingToken"},
+            )
+            return 0, "Notion integration token not found in connector config"
+
+        # Initialize Notion client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Notion client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        logger.info(f"Initializing Notion client for connector {connector_id}")
+        notion_client = NotionHistoryConnector(token=notion_token)
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates
+            calculated_end_date = datetime.now()
+            calculated_start_date = calculated_end_date - timedelta(
+                days=365
+            )  # Check for last 1 year of pages
+
+            # Use calculated dates if not provided
+            if start_date is None:
+                start_date_iso = calculated_start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
+            else:
+                # Convert YYYY-MM-DD to ISO format
+                start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime(
+                    "%Y-%m-%dT%H:%M:%SZ"
+                )
+
+            if end_date is None:
+                end_date_iso = calculated_end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
+            else:
+                # Convert YYYY-MM-DD to ISO format
+                end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime(
+                    "%Y-%m-%dT%H:%M:%SZ"
+                )
+        else:
+            # Convert provided dates to ISO format for Notion API
+            start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime(
+                "%Y-%m-%dT%H:%M:%SZ"
+            )
+            end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime(
+                "%Y-%m-%dT%H:%M:%SZ"
+            )
+
+        logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}")
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Notion pages from {start_date_iso} to {end_date_iso}",
+            {
+                "stage": "fetch_pages",
+                "start_date": start_date_iso,
+                "end_date": end_date_iso,
+            },
+        )
+
+        # Get all pages
+        try:
+            pages = notion_client.get_all_pages(
+                start_date=start_date_iso, end_date=end_date_iso
+            )
+            logger.info(f"Found {len(pages)} Notion pages")
+        except Exception as e:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Failed to get Notion pages for connector {connector_id}",
+                str(e),
+                {"error_type": "PageFetchError"},
+            )
+            logger.error(f"Error fetching Notion pages: {e!s}", exc_info=True)
+            return 0, f"Failed to get Notion pages: {e!s}"
+
+        if not pages:
+            await task_logger.log_task_success(
+                log_entry,
+                f"No Notion pages found for connector {connector_id}",
+                {"pages_found": 0},
+            )
+            logger.info("No Notion pages found to index")
+            return 0, "No Notion pages found"
+
+        # Track the number of documents indexed
+        documents_indexed = 0
+        documents_skipped = 0
+        skipped_pages = []
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Starting to process {len(pages)} Notion pages",
+            {"stage": "process_pages", "total_pages": len(pages)},
+        )
+
+        # Process each page
+        for page in pages:
+            try:
+                page_id = page.get("page_id")
+                page_title = page.get("title", f"Untitled page ({page_id})")
+                page_content = page.get("content", [])
+
+                logger.info(f"Processing Notion page: {page_title} ({page_id})")
+
+                if not page_content:
+                    logger.info(f"No content found in page {page_title}. Skipping.")
+                    skipped_pages.append(f"{page_title} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Convert page content to markdown format
+                markdown_content = f"# Notion Page: {page_title}\n\n"
+
+                # Process blocks recursively
+                def process_blocks(blocks, level=0):
+                    result = ""
+                    for block in blocks:
+                        block_type = block.get("type")
+                        block_content = block.get("content", "")
+                        children = block.get("children", [])
+
+                        # Add indentation based on level
+                        indent = "  " * level
+
+                        # Format based on block type
+                        if block_type in ["paragraph", "text"]:
+                            result += f"{indent}{block_content}\n\n"
+                        elif block_type in ["heading_1", "header"]:
+                            result += f"{indent}# {block_content}\n\n"
+                        elif block_type == "heading_2":
+                            result += f"{indent}## {block_content}\n\n"
+                        elif block_type == "heading_3":
+                            result += f"{indent}### {block_content}\n\n"
+                        elif block_type == "bulleted_list_item":
+                            result += f"{indent}* {block_content}\n"
+                        elif block_type == "numbered_list_item":
+                            result += f"{indent}1. {block_content}\n"
+                        elif block_type == "to_do":
+                            result += f"{indent}- [ ] {block_content}\n"
+                        elif block_type == "toggle":
+                            result += f"{indent}> {block_content}\n"
+                        elif block_type == "code":
+                            result += f"{indent}```\n{block_content}\n```\n\n"
+                        elif block_type == "quote":
+                            result += f"{indent}> {block_content}\n\n"
+                        elif block_type == "callout":
+                            result += f"{indent}> **Note:** {block_content}\n\n"
+                        elif block_type == "image":
+                            result += f"{indent}![Image]({block_content})\n\n"
+                        else:
+                            # Default for other block types
+                            if block_content:
+                                result += f"{indent}{block_content}\n\n"
+
+                        # Process children recursively
+                        if children:
+                            result += process_blocks(children, level + 1)
+
+                    return result
+
+                logger.debug(
+                    f"Converting {len(page_content)} blocks to markdown for page {page_title}"
+                )
+                markdown_content += process_blocks(page_content)
+
+                # Format document metadata
+                metadata_sections = [
+                    ("METADATA", [f"PAGE_TITLE: {page_title}", f"PAGE_ID: {page_id}"]),
+                    (
+                        "CONTENT",
+                        [
+                            "FORMAT: markdown",
+                            "TEXT_START",
+                            markdown_content,
+                            "TEXT_END",
+                        ],
+                    ),
+                ]
+
+                # Build the document string
+                document_parts = []
+                document_parts.append("<DOCUMENT>")
+
+                for section_title, section_content in metadata_sections:
+                    document_parts.append(f"<{section_title}>")
+                    document_parts.extend(section_content)
+                    document_parts.append(f"</{section_title}>")
+
+                document_parts.append("</DOCUMENT>")
+                combined_document_string = "\n".join(document_parts)
+                content_hash = generate_content_hash(
+                    combined_document_string, search_space_id
+                )
+
+                # Check if document with this content hash already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Get user's long context LLM
+                user_llm = await get_user_long_context_llm(session, user_id)
+                if not user_llm:
+                    logger.error(f"No long context LLM configured for user {user_id}")
+                    skipped_pages.append(f"{page_title} (no LLM configured)")
+                    documents_skipped += 1
+                    continue
+
+                # Generate summary
+                logger.debug(f"Generating summary for page {page_title}")
+                summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
+                summary_result = await summary_chain.ainvoke(
+                    {"document": combined_document_string}
+                )
+                summary_content = summary_result.content
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks
+                logger.debug(f"Chunking content for page {page_title}")
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(markdown_content)
+                ]
+
+                # Create and store new document
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Notion - {page_title}",
+                    document_type=DocumentType.NOTION_CONNECTOR,
+                    document_metadata={
+                        "page_title": page_title,
+                        "page_id": page_id,
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(f"Successfully indexed new Notion page: {page_title}")
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_pages.append(
+                    f"{page.get('title', 'Unknown')} (processing error)"
+                )
+                documents_skipped += 1
+                continue  # Skip this page and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        # and if we successfully indexed at least one page
+        total_processed = documents_indexed
+        if update_last_indexed and total_processed > 0:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at for connector {connector_id}")
+
+        # Commit all changes
+        await session.commit()
+
+        # Prepare result message
+        result_message = None
+        if skipped_pages:
+            result_message = f"Processed {total_processed} pages. Skipped {len(skipped_pages)} pages: {', '.join(skipped_pages)}"
+        else:
+            result_message = f"Processed {total_processed} pages."
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Notion indexing for connector {connector_id}",
+            {
+                "pages_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_pages_count": len(skipped_pages),
+                "result_message": result_message,
+            },
+        )
+
+        logger.info(
+            f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+        )
+        return total_processed, result_message
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Notion indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(
+            f"Database error during Notion indexing: {db_error!s}", exc_info=True
+        )
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Notion pages for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Notion pages: {e!s}", exc_info=True)
+        return 0, f"Failed to index Notion pages: {e!s}"
+
+
+async def index_github_repos(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index code and documentation files from accessible GitHub repositories.
+
+    Args:
+        session: Database session
+        connector_id: ID of the GitHub connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="github_repos_indexing",
+        source="connector_indexing_task",
+        message=f"Starting GitHub repositories indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    documents_processed = 0
+    errors = []
+
+    try:
+        # 1. Get the GitHub connector from the database
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Retrieving GitHub connector {connector_id} from database",
+            {"stage": "connector_retrieval"},
+        )
+
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.GITHUB_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a GitHub connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return (
+                0,
+                f"Connector with ID {connector_id} not found or is not a GitHub connector",
+            )
+
+        # 2. Get the GitHub PAT and selected repositories from the connector config
+        github_pat = connector.config.get("GITHUB_PAT")
+        repo_full_names_to_index = connector.config.get("repo_full_names")
+
+        if not github_pat:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"GitHub Personal Access Token (PAT) not found in connector config for connector {connector_id}",
+                "Missing GitHub PAT",
+                {"error_type": "MissingToken"},
+            )
+            return 0, "GitHub Personal Access Token (PAT) not found in connector config"
+
+        if not repo_full_names_to_index or not isinstance(
+            repo_full_names_to_index, list
+        ):
+            await task_logger.log_task_failure(
+                log_entry,
+                f"'repo_full_names' not found or is not a list in connector config for connector {connector_id}",
+                "Invalid repo configuration",
+                {"error_type": "InvalidConfiguration"},
+            )
+            return 0, "'repo_full_names' not found or is not a list in connector config"
+
+        # 3. Initialize GitHub connector client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing GitHub client for connector {connector_id}",
+            {
+                "stage": "client_initialization",
+                "repo_count": len(repo_full_names_to_index),
+            },
+        )
+
+        try:
+            github_client = GitHubConnector(token=github_pat)
+        except ValueError as e:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Failed to initialize GitHub client for connector {connector_id}",
+                str(e),
+                {"error_type": "ClientInitializationError"},
+            )
+            return 0, f"Failed to initialize GitHub client: {e!s}"
+
+        # 4. Validate selected repositories
+        #    For simplicity, we'll proceed with the list provided.
+        #    If a repo is inaccessible, get_repository_files will likely fail gracefully later.
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Starting indexing for {len(repo_full_names_to_index)} selected repositories",
+            {
+                "stage": "repo_processing",
+                "repo_count": len(repo_full_names_to_index),
+                "start_date": start_date,
+                "end_date": end_date,
+            },
+        )
+
+        logger.info(
+            f"Starting indexing for {len(repo_full_names_to_index)} selected repositories."
+        )
+        if start_date and end_date:
+            logger.info(
+                f"Date range requested: {start_date} to {end_date} (Note: GitHub indexing processes all files regardless of dates)"
+            )
+
+        # 6. Iterate through selected repositories and index files
+        for repo_full_name in repo_full_names_to_index:
+            if not repo_full_name or not isinstance(repo_full_name, str):
+                logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
+                continue
+
+            logger.info(f"Processing repository: {repo_full_name}")
+            try:
+                files_to_index = github_client.get_repository_files(repo_full_name)
+                if not files_to_index:
+                    logger.info(
+                        f"No indexable files found in repository: {repo_full_name}"
+                    )
+                    continue
+
+                logger.info(
+                    f"Found {len(files_to_index)} files to process in {repo_full_name}"
+                )
+
+                for file_info in files_to_index:
+                    file_path = file_info.get("path")
+                    file_url = file_info.get("url")
+                    file_sha = file_info.get("sha")
+                    file_type = file_info.get("type")  # 'code' or 'doc'
+                    full_path_key = f"{repo_full_name}/{file_path}"
+
+                    if not file_path or not file_url or not file_sha:
+                        logger.warning(
+                            f"Skipping file with missing info in {repo_full_name}: {file_info}"
+                        )
+                        continue
+
+                    # Get file content
+                    file_content = github_client.get_file_content(
+                        repo_full_name, file_path
+                    )
+
+                    if file_content is None:
+                        logger.warning(
+                            f"Could not retrieve content for {full_path_key}. Skipping."
+                        )
+                        continue  # Skip if content fetch failed
+
+                    content_hash = generate_content_hash(file_content, search_space_id)
+
+                    # Check if document with this content hash already exists
+                    existing_doc_by_hash_result = await session.execute(
+                        select(Document).where(Document.content_hash == content_hash)
+                    )
+                    existing_document_by_hash = (
+                        existing_doc_by_hash_result.scalars().first()
+                    )
+
+                    if existing_document_by_hash:
+                        logger.info(
+                            f"Document with content hash {content_hash} already exists for file {full_path_key}. Skipping processing."
+                        )
+                        continue
+
+                    # Use file_content directly for chunking, maybe summary for main content?
+                    # For now, let's use the full content for both, might need refinement
+                    summary_content = f"GitHub file: {full_path_key}\n\n{file_content[:1000]}..."  # Simple summary
+                    summary_embedding = config.embedding_model_instance.embed(
+                        summary_content
+                    )
+
+                    # Chunk the content
+                    try:
+                        chunks_data = [
+                            Chunk(
+                                content=chunk.text,
+                                embedding=config.embedding_model_instance.embed(
+                                    chunk.text
+                                ),
+                            )
+                            for chunk in config.code_chunker_instance.chunk(
+                                file_content
+                            )
+                        ]
+                    except Exception as chunk_err:
+                        logger.error(
+                            f"Failed to chunk file {full_path_key}: {chunk_err}"
+                        )
+                        errors.append(
+                            f"Chunking failed for {full_path_key}: {chunk_err}"
+                        )
+                        continue  # Skip this file if chunking fails
+
+                    doc_metadata = {
+                        "repository_full_name": repo_full_name,
+                        "file_path": file_path,
+                        "full_path": full_path_key,  # For easier lookup
+                        "url": file_url,
+                        "sha": file_sha,
+                        "type": file_type,
+                        "indexed_at": datetime.now(UTC).isoformat(),
+                    }
+
+                    # Create new document
+                    logger.info(f"Creating new document for file: {full_path_key}")
+                    document = Document(
+                        title=f"GitHub - {file_path}",
+                        document_type=DocumentType.GITHUB_CONNECTOR,
+                        document_metadata=doc_metadata,
+                        content=summary_content,  # Store summary
+                        content_hash=content_hash,
+                        embedding=summary_embedding,
+                        search_space_id=search_space_id,
+                        chunks=chunks_data,  # Associate chunks directly
+                    )
+                    session.add(document)
+                    documents_processed += 1
+
+            except Exception as repo_err:
+                logger.error(
+                    f"Failed to process repository {repo_full_name}: {repo_err}"
+                )
+                errors.append(f"Failed processing {repo_full_name}: {repo_err}")
+
+        # Commit all changes at the end
+        await session.commit()
+        logger.info(
+            f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files."
+        )
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed GitHub indexing for connector {connector_id}",
+            {
+                "documents_processed": documents_processed,
+                "errors_count": len(errors),
+                "repo_count": len(repo_full_names_to_index),
+            },
+        )
+
+    except SQLAlchemyError as db_err:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during GitHub indexing for connector {connector_id}",
+            str(db_err),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(
+            f"Database error during GitHub indexing for connector {connector_id}: {db_err}"
+        )
+        errors.append(f"Database error: {db_err}")
+        return documents_processed, "; ".join(errors) if errors else str(db_err)
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Unexpected error during GitHub indexing for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(
+            f"Unexpected error during GitHub indexing for connector {connector_id}: {e}",
+            exc_info=True,
+        )
+        errors.append(f"Unexpected error: {e}")
+        return documents_processed, "; ".join(errors) if errors else str(e)
+
+    error_message = "; ".join(errors) if errors else None
+    return documents_processed, error_message
+
+
+async def index_linear_issues(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Linear issues and comments.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Linear connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="linear_issues_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Linear issues indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Retrieving Linear connector {connector_id} from database",
+            {"stage": "connector_retrieval"},
+        )
+
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.LINEAR_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a Linear connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return (
+                0,
+                f"Connector with ID {connector_id} not found or is not a Linear connector",
+            )
+
+        # Get the Linear token from the connector config
+        linear_token = connector.config.get("LINEAR_API_KEY")
+        if not linear_token:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Linear API token not found in connector config for connector {connector_id}",
+                "Missing Linear token",
+                {"error_type": "MissingToken"},
+            )
+            return 0, "Linear API token not found in connector config"
+
+        # Initialize Linear client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Linear client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        linear_client = LinearConnector(token=linear_token)
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now()
+
+            # Use last_indexed_at as start date if available, otherwise use 365 days ago
+            if connector.last_indexed_at:
+                # Convert dates to be comparable (both timezone-naive)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
+                )
+
+                # Check if last_indexed_at is in the future or after end_date
+                if last_indexed_naive > calculated_end_date:
+                    logger.warning(
+                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
+                    )
+                    calculated_start_date = calculated_end_date - timedelta(days=365)
+                else:
+                    calculated_start_date = last_indexed_naive
+                    logger.info(
+                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                    )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(
+                    days=365
+                )  # Use 365 days as default
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided
+            start_date_str = (
+                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+            )
+            end_date_str = (
+                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+            )
+        else:
+            # Use provided dates
+            start_date_str = start_date
+            end_date_str = end_date
+
+        logger.info(f"Fetching Linear issues from {start_date_str} to {end_date_str}")
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Linear issues from {start_date_str} to {end_date_str}",
+            {
+                "stage": "fetch_issues",
+                "start_date": start_date_str,
+                "end_date": end_date_str,
+            },
+        )
+
+        # Get issues within date range
+        try:
+            issues, error = linear_client.get_issues_by_date_range(
+                start_date=start_date_str, end_date=end_date_str, include_comments=True
+            )
+
+            if error:
+                logger.error(f"Failed to get Linear issues: {error}")
+
+                # Don't treat "No issues found" as an error that should stop indexing
+                if "No issues found" in error:
+                    logger.info(
+                        "No issues found is not a critical error, continuing with update"
+                    )
+                    if update_last_indexed:
+                        connector.last_indexed_at = datetime.now()
+                        await session.commit()
+                        logger.info(
+                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
+                        )
+                    return 0, None
+                else:
+                    return 0, f"Failed to get Linear issues: {error}"
+
+            logger.info(f"Retrieved {len(issues)} issues from Linear API")
+
+        except Exception as e:
+            logger.error(f"Exception when calling Linear API: {e!s}", exc_info=True)
+            return 0, f"Failed to get Linear issues: {e!s}"
+
+        if not issues:
+            logger.info("No Linear issues found for the specified date range")
+            if update_last_indexed:
+                connector.last_indexed_at = datetime.now()
+                await session.commit()
+                logger.info(
+                    f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
+                )
+            return 0, None  # Return None instead of error message when no issues found
+
+        # Log issue IDs and titles for debugging
+        logger.info("Issues retrieved from Linear API:")
+        for idx, issue in enumerate(issues[:10]):  # Log first 10 issues
+            logger.info(
+                f"  {idx + 1}. {issue.get('identifier', 'Unknown')} - {issue.get('title', 'Unknown')} - Created: {issue.get('createdAt', 'Unknown')} - Updated: {issue.get('updatedAt', 'Unknown')}"
+            )
+        if len(issues) > 10:
+            logger.info(f"  ...and {len(issues) - 10} more issues")
+
+        # Track the number of documents indexed
+        documents_indexed = 0
+        documents_skipped = 0
+        skipped_issues = []
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Starting to process {len(issues)} Linear issues",
+            {"stage": "process_issues", "total_issues": len(issues)},
+        )
+
+        # Process each issue
+        for issue in issues:
+            try:
+                issue_id = issue.get("key")
+                issue_identifier = issue.get("id", "")
+                issue_title = issue.get("key", "")
+
+                if not issue_id or not issue_title:
+                    logger.warning(
+                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
+                    )
+                    skipped_issues.append(
+                        f"{issue_identifier or 'Unknown'} (missing data)"
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Format the issue first to get well-structured data
+                formatted_issue = linear_client.format_issue(issue)
+
+                # Convert issue to markdown format
+                issue_content = linear_client.format_issue_to_markdown(formatted_issue)
+
+                if not issue_content:
+                    logger.warning(
+                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
+                    )
+                    skipped_issues.append(f"{issue_identifier} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a short summary for the embedding
+                # This avoids using the LLM and just uses the issue data directly
+                state = formatted_issue.get("state", "Unknown")
+                description = formatted_issue.get("description", "")
+                # Truncate description if it's too long for the summary
+                if description and len(description) > 500:
+                    description = description[:497] + "..."
+
+                # Create a simple summary from the issue data
+                summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
+                if description:
+                    summary_content += f"Description: {description}\n\n"
+
+                # Add comment count
+                comment_count = len(formatted_issue.get("comments", []))
+                summary_content += f"Comments: {comment_count}"
+
+                content_hash = generate_content_hash(issue_content, search_space_id)
+
+                # Check if document with this content hash already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for issue {issue_identifier}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks - using the full issue content with comments
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(issue_content)
+                ]
+
+                # Create and store new document
+                logger.info(
+                    f"Creating new document for issue {issue_identifier} - {issue_title}"
+                )
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Linear - {issue_identifier}: {issue_title}",
+                    document_type=DocumentType.LINEAR_CONNECTOR,
+                    document_metadata={
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "state": state,
+                        "comment_count": comment_count,
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(
+                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                )
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_issues.append(
+                    f"{issue.get('identifier', 'Unknown')} (processing error)"
+                )
+                documents_skipped += 1
+                continue  # Skip this issue and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if update_last_indexed:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+        # Commit all changes
+        await session.commit()
+        logger.info("Successfully committed all Linear document changes to database")
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Linear indexing for connector {connector_id}",
+            {
+                "issues_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_issues_count": len(skipped_issues),
+            },
+        )
+
+        logger.info(
+            f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Linear indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Linear issues for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Linear issues: {e!s}", exc_info=True)
+        return 0, f"Failed to index Linear issues: {e!s}"
+
+
+async def index_discord_messages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Discord messages from all accessible channels.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Discord connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="discord_messages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Discord messages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Retrieving Discord connector {connector_id} from database",
+            {"stage": "connector_retrieval"},
+        )
+
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.DISCORD_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a Discord connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return (
+                0,
+                f"Connector with ID {connector_id} not found or is not a Discord connector",
+            )
+
+        # Get the Discord token from the connector config
+        discord_token = connector.config.get("DISCORD_BOT_TOKEN")
+        if not discord_token:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Discord token not found in connector config for connector {connector_id}",
+                "Missing Discord token",
+                {"error_type": "MissingToken"},
+            )
+            return 0, "Discord token not found in connector config"
+
+        logger.info(f"Starting Discord indexing for connector {connector_id}")
+
+        # Initialize Discord client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Discord client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        discord_client = DiscordConnector(token=discord_token)
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now(UTC)
+
+            # Use last_indexed_at as start date if available, otherwise use 365 days ago
+            if connector.last_indexed_at:
+                calculated_start_date = connector.last_indexed_at.replace(tzinfo=UTC)
+                logger.info(
+                    f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(days=365)
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided, convert to ISO format for Discord API
+            if start_date is None:
+                start_date_iso = calculated_start_date.isoformat()
+            else:
+                # Convert YYYY-MM-DD to ISO format
+                start_date_iso = (
+                    datetime.strptime(start_date, "%Y-%m-%d")
+                    .replace(tzinfo=UTC)
+                    .isoformat()
+                )
+
+            if end_date is None:
+                end_date_iso = calculated_end_date.isoformat()
+            else:
+                # Convert YYYY-MM-DD to ISO format
+                end_date_iso = (
+                    datetime.strptime(end_date, "%Y-%m-%d")
+                    .replace(tzinfo=UTC)
+                    .isoformat()
+                )
+        else:
+            # Convert provided dates to ISO format for Discord API
+            start_date_iso = (
+                datetime.strptime(start_date, "%Y-%m-%d")
+                .replace(tzinfo=UTC)
+                .isoformat()
+            )
+            end_date_iso = (
+                datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC).isoformat()
+            )
+
+        logger.info(
+            f"Indexing Discord messages from {start_date_iso} to {end_date_iso}"
+        )
+
+        documents_indexed = 0
+        documents_skipped = 0
+        skipped_channels = []
+
+        try:
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Starting Discord bot and fetching guilds for connector {connector_id}",
+                {"stage": "fetch_guilds"},
+            )
+
+            logger.info("Starting Discord bot to fetch guilds")
+            discord_client._bot_task = asyncio.create_task(discord_client.start_bot())
+            await discord_client._wait_until_ready()
+
+            logger.info("Fetching Discord guilds")
+            guilds = await discord_client.get_guilds()
+            logger.info(f"Found {len(guilds)} guilds")
+        except Exception as e:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Failed to get Discord guilds for connector {connector_id}",
+                str(e),
+                {"error_type": "GuildFetchError"},
+            )
+            logger.error(f"Failed to get Discord guilds: {e!s}", exc_info=True)
+            await discord_client.close_bot()
+            return 0, f"Failed to get Discord guilds: {e!s}"
+        if not guilds:
+            await task_logger.log_task_success(
+                log_entry,
+                f"No Discord guilds found for connector {connector_id}",
+                {"guilds_found": 0},
+            )
+            logger.info("No Discord guilds found to index")
+            await discord_client.close_bot()
+            return 0, "No Discord guilds found"
+
+        # Process each guild and channel
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Starting to process {len(guilds)} Discord guilds",
+            {"stage": "process_guilds", "total_guilds": len(guilds)},
+        )
+
+        for guild in guilds:
+            guild_id = guild["id"]
+            guild_name = guild["name"]
+            logger.info(f"Processing guild: {guild_name} ({guild_id})")
+            try:
+                channels = await discord_client.get_text_channels(guild_id)
+                if not channels:
+                    logger.info(f"No channels found in guild {guild_name}. Skipping.")
+                    skipped_channels.append(f"{guild_name} (no channels)")
+                    documents_skipped += 1
+                    continue
+
+                for channel in channels:
+                    channel_id = channel["id"]
+                    channel_name = channel["name"]
+
+                    try:
+                        messages = await discord_client.get_channel_history(
+                            channel_id=channel_id,
+                            start_date=start_date_iso,
+                            end_date=end_date_iso,
+                        )
+                    except Exception as e:
+                        logger.error(
+                            f"Failed to get messages for channel {channel_name}: {e!s}"
+                        )
+                        skipped_channels.append(
+                            f"{guild_name}#{channel_name} (fetch error)"
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    if not messages:
+                        logger.info(
+                            f"No messages found in channel {channel_name} for the specified date range."
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    # Format messages
+                    formatted_messages = []
+                    for msg in messages:
+                        # Skip system messages if needed (Discord has some types)
+                        if msg.get("type") in ["system"]:
+                            continue
+                        formatted_messages.append(msg)
+
+                    if not formatted_messages:
+                        logger.info(
+                            f"No valid messages found in channel {channel_name} after filtering."
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    # Convert messages to markdown format
+                    channel_content = (
+                        f"# Discord Channel: {guild_name} / {channel_name}\n\n"
+                    )
+                    for msg in formatted_messages:
+                        user_name = msg.get("author_name", "Unknown User")
+                        timestamp = msg.get("created_at", "Unknown Time")
+                        text = msg.get("content", "")
+                        channel_content += (
+                            f"## {user_name} ({timestamp})\n\n{text}\n\n---\n\n"
+                        )
+
+                    # Format document metadata
+                    metadata_sections = [
+                        (
+                            "METADATA",
+                            [
+                                f"GUILD_NAME: {guild_name}",
+                                f"GUILD_ID: {guild_id}",
+                                f"CHANNEL_NAME: {channel_name}",
+                                f"CHANNEL_ID: {channel_id}",
+                                f"MESSAGE_COUNT: {len(formatted_messages)}",
+                            ],
+                        ),
+                        (
+                            "CONTENT",
+                            [
+                                "FORMAT: markdown",
+                                "TEXT_START",
+                                channel_content,
+                                "TEXT_END",
+                            ],
+                        ),
+                    ]
+
+                    # Build the document string
+                    document_parts = []
+                    document_parts.append("<DOCUMENT>")
+                    for section_title, section_content in metadata_sections:
+                        document_parts.append(f"<{section_title}>")
+                        document_parts.extend(section_content)
+                        document_parts.append(f"</{section_title}>")
+                    document_parts.append("</DOCUMENT>")
+                    combined_document_string = "\n".join(document_parts)
+                    content_hash = generate_content_hash(
+                        combined_document_string, search_space_id
+                    )
+
+                    # Check if document with this content hash already exists
+                    existing_doc_by_hash_result = await session.execute(
+                        select(Document).where(Document.content_hash == content_hash)
+                    )
+                    existing_document_by_hash = (
+                        existing_doc_by_hash_result.scalars().first()
+                    )
+
+                    if existing_document_by_hash:
+                        logger.info(
+                            f"Document with content hash {content_hash} already exists for channel {guild_name}#{channel_name}. Skipping processing."
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    # Get user's long context LLM
+                    user_llm = await get_user_long_context_llm(session, user_id)
+                    if not user_llm:
+                        logger.error(
+                            f"No long context LLM configured for user {user_id}"
+                        )
+                        skipped_channels.append(
+                            f"{guild_name}#{channel_name} (no LLM configured)"
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    # Generate summary using summary_chain
+                    summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
+                    summary_result = await summary_chain.ainvoke(
+                        {"document": combined_document_string}
+                    )
+                    summary_content = summary_result.content
+                    summary_embedding = await asyncio.to_thread(
+                        config.embedding_model_instance.embed, summary_content
+                    )
+
+                    # Process chunks
+                    raw_chunks = await asyncio.to_thread(
+                        config.chunker_instance.chunk, channel_content
+                    )
+
+                    chunk_texts = [
+                        chunk.text for chunk in raw_chunks if chunk.text.strip()
+                    ]
+                    chunk_embeddings = await asyncio.to_thread(
+                        lambda texts: [
+                            config.embedding_model_instance.embed(t) for t in texts
+                        ],
+                        chunk_texts,
+                    )
+
+                    chunks = [
+                        Chunk(content=raw_chunk.text, embedding=embedding)
+                        for raw_chunk, embedding in zip(
+                            raw_chunks, chunk_embeddings, strict=False
+                        )
+                    ]
+
+                    # Create and store new document
+                    document = Document(
+                        search_space_id=search_space_id,
+                        title=f"Discord - {guild_name}#{channel_name}",
+                        document_type=DocumentType.DISCORD_CONNECTOR,
+                        document_metadata={
+                            "guild_name": guild_name,
+                            "guild_id": guild_id,
+                            "channel_name": channel_name,
+                            "channel_id": channel_id,
+                            "message_count": len(formatted_messages),
+                            "start_date": start_date_iso,
+                            "end_date": end_date_iso,
+                            "indexed_at": datetime.now(UTC).strftime(
+                                "%Y-%m-%d %H:%M:%S"
+                            ),
+                        },
+                        content=summary_content,
+                        content_hash=content_hash,
+                        embedding=summary_embedding,
+                        chunks=chunks,
+                    )
+
+                    session.add(document)
+                    documents_indexed += 1
+                    logger.info(
+                        f"Successfully indexed new channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
+                    )
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing guild {guild_name}: {e!s}", exc_info=True
+                )
+                skipped_channels.append(f"{guild_name} (processing error)")
+                documents_skipped += 1
+                continue
+
+        if update_last_indexed and documents_indexed > 0:
+            connector.last_indexed_at = datetime.now(UTC)
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+        await session.commit()
+        await discord_client.close_bot()
+
+        # Prepare result message
+        result_message = None
+        if skipped_channels:
+            result_message = f"Processed {documents_indexed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
+        else:
+            result_message = f"Processed {documents_indexed} channels."
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Discord indexing for connector {connector_id}",
+            {
+                "channels_processed": documents_indexed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_channels_count": len(skipped_channels),
+                "guilds_processed": len(guilds),
+                "result_message": result_message,
+            },
+        )
+
+        logger.info(
+            f"Discord indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
+        )
+        return documents_indexed, result_message
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Discord indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(
+            f"Database error during Discord indexing: {db_error!s}", exc_info=True
+        )
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Discord messages for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Discord messages: {e!s}", exc_info=True)
+        return 0, f"Failed to index Discord messages: {e!s}"
+
+
+async def index_jira_issues(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Jira issues and comments.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Jira connector
+        search_space_id: ID of the search space to store documents in
+        user_id: User ID
+        start_date: Start date for indexing (YYYY-MM-DD format)
+        end_date: End date for indexing (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="jira_issues_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Jira issues indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector from the database
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.JIRA_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return 0, f"Connector with ID {connector_id} not found"
+
+        # Get the Jira credentials from the connector config
+        jira_email = connector.config.get("JIRA_EMAIL")
+        jira_api_token = connector.config.get("JIRA_API_TOKEN")
+        jira_base_url = connector.config.get("JIRA_BASE_URL")
+
+        if not jira_email or not jira_api_token or not jira_base_url:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Jira credentials not found in connector config for connector {connector_id}",
+                "Missing Jira credentials",
+                {"error_type": "MissingCredentials"},
+            )
+            return 0, "Jira credentials not found in connector config"
+
+        # Initialize Jira client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Jira client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        jira_client = JiraConnector(
+            base_url=jira_base_url, email=jira_email, api_token=jira_api_token
+        )
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now()
+
+            # Use last_indexed_at as start date if available, otherwise use 365 days ago
+            if connector.last_indexed_at:
+                # Convert dates to be comparable (both timezone-naive)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
+                )
+
+                # Check if last_indexed_at is in the future or after end_date
+                if last_indexed_naive > calculated_end_date:
+                    logger.warning(
+                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
+                    )
+                    calculated_start_date = calculated_end_date - timedelta(days=365)
+                else:
+                    calculated_start_date = last_indexed_naive
+                    logger.info(
+                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                    )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(
+                    days=365
+                )  # Use 365 days as default
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided
+            start_date_str = (
+                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+            )
+            end_date_str = (
+                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+            )
+        else:
+            # Use provided dates
+            start_date_str = start_date
+            end_date_str = end_date
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Jira issues from {start_date_str} to {end_date_str}",
+            {
+                "stage": "fetching_issues",
+                "start_date": start_date_str,
+                "end_date": end_date_str,
+            },
+        )
+
+        # Get issues within date range
+        try:
+            issues, error = jira_client.get_issues_by_date_range(
+                start_date=start_date_str, end_date=end_date_str, include_comments=True
+            )
+
+            if error:
+                logger.error(f"Failed to get Jira issues: {error}")
+
+                # Don't treat "No issues found" as an error that should stop indexing
+                if "No issues found" in error:
+                    logger.info(
+                        "No issues found is not a critical error, continuing with update"
+                    )
+                    if update_last_indexed:
+                        connector.last_indexed_at = datetime.now()
+                        await session.commit()
+                        logger.info(
+                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
+                        )
+
+                    await task_logger.log_task_success(
+                        log_entry,
+                        f"No Jira issues found in date range {start_date_str} to {end_date_str}",
+                        {"issues_found": 0},
+                    )
+                    return 0, None
+                else:
+                    await task_logger.log_task_failure(
+                        log_entry,
+                        f"Failed to get Jira issues: {error}",
+                        "API Error",
+                        {"error_type": "APIError"},
+                    )
+                    return 0, f"Failed to get Jira issues: {error}"
+
+            logger.info(f"Retrieved {len(issues)} issues from Jira API")
+
+        except Exception as e:
+            logger.error(f"Error fetching Jira issues: {e!s}", exc_info=True)
+            return 0, f"Error fetching Jira issues: {e!s}"
+
+        # Process and index each issue
+        documents_indexed = 0
+        skipped_issues = []
+        documents_skipped = 0
+
+        for issue in issues:
+            try:
+                issue_id = issue.get("key")
+                issue_identifier = issue.get("key", "")
+                issue_title = issue.get("id", "")
+
+                if not issue_id or not issue_title:
+                    logger.warning(
+                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
+                    )
+                    skipped_issues.append(
+                        f"{issue_identifier or 'Unknown'} (missing data)"
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Format the issue for better readability
+                formatted_issue = jira_client.format_issue(issue)
+
+                # Convert to markdown
+                issue_content = jira_client.format_issue_to_markdown(formatted_issue)
+
+                if not issue_content:
+                    logger.warning(
+                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
+                    )
+                    skipped_issues.append(f"{issue_identifier} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a simple summary
+                summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
+                if formatted_issue.get("description"):
+                    summary_content += (
+                        f"Description: {formatted_issue.get('description')}\n\n"
+                    )
+
+                # Add comment count
+                comment_count = len(formatted_issue.get("comments", []))
+                summary_content += f"Comments: {comment_count}"
+
+                # Generate content hash
+                content_hash = generate_content_hash(issue_content, search_space_id)
+
+                # Check if document already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for issue {issue_identifier}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks - using the full issue content with comments
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(issue_content)
+                ]
+
+                # Create and store new document
+                logger.info(
+                    f"Creating new document for issue {issue_identifier} - {issue_title}"
+                )
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Jira - {issue_identifier}: {issue_title}",
+                    document_type=DocumentType.JIRA_CONNECTOR,
+                    document_metadata={
+                        "issue_id": issue_id,
+                        "issue_identifier": issue_identifier,
+                        "issue_title": issue_title,
+                        "state": formatted_issue.get("status", "Unknown"),
+                        "comment_count": comment_count,
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(
+                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
+                )
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_issues.append(
+                    f"{issue.get('identifier', 'Unknown')} (processing error)"
+                )
+                documents_skipped += 1
+                continue  # Skip this issue and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if update_last_indexed:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+        # Commit all changes
+        await session.commit()
+        logger.info("Successfully committed all JIRA document changes to database")
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed JIRA indexing for connector {connector_id}",
+            {
+                "issues_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_issues_count": len(skipped_issues),
+            },
+        )
+
+        logger.info(
+            f"JIRA indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during JIRA indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index JIRA issues for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index JIRA issues: {e!s}", exc_info=True)
+        return 0, f"Failed to index JIRA issues: {e!s}"
+
+
+async def index_confluence_pages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Confluence pages and comments.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Confluence connector
+        search_space_id: ID of the search space to store documents in
+        user_id: User ID
+        start_date: Start date for indexing (YYYY-MM-DD format)
+        end_date: End date for indexing (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="confluence_pages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Confluence pages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector from the database
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.CONFLUENCE_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return 0, f"Connector with ID {connector_id} not found"
+
+        # Get the Confluence credentials from the connector config
+        confluence_email = connector.config.get("CONFLUENCE_EMAIL")
+        confluence_api_token = connector.config.get("CONFLUENCE_API_TOKEN")
+        confluence_base_url = connector.config.get("CONFLUENCE_BASE_URL")
+
+        if not confluence_email or not confluence_api_token or not confluence_base_url:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Confluence credentials not found in connector config for connector {connector_id}",
+                "Missing Confluence credentials",
+                {"error_type": "MissingCredentials"},
+            )
+            return 0, "Confluence credentials not found in connector config"
+
+        # Initialize Confluence client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Confluence client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        confluence_client = ConfluenceConnector(
+            base_url=confluence_base_url,
+            email=confluence_email,
+            api_token=confluence_api_token,
+        )
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now()
+
+            # Use last_indexed_at as start date if available, otherwise use 365 days ago
+            if connector.last_indexed_at:
+                # Convert dates to be comparable (both timezone-naive)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
+                )
+
+                # Check if last_indexed_at is in the future or after end_date
+                if last_indexed_naive > calculated_end_date:
+                    logger.warning(
+                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
+                    )
+                    calculated_start_date = calculated_end_date - timedelta(days=365)
+                else:
+                    calculated_start_date = last_indexed_naive
+                    logger.info(
+                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                    )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(
+                    days=365
+                )  # Use 365 days as default
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided
+            start_date_str = (
+                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+            )
+            end_date_str = (
+                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+            )
+        else:
+            # Use provided dates
+            start_date_str = start_date
+            end_date_str = end_date
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Confluence pages from {start_date_str} to {end_date_str}",
+            {
+                "stage": "fetching_pages",
+                "start_date": start_date_str,
+                "end_date": end_date_str,
+            },
+        )
+
+        # Get pages within date range
+        try:
+            pages, error = confluence_client.get_pages_by_date_range(
+                start_date=start_date_str, end_date=end_date_str, include_comments=True
+            )
+
+            if error:
+                logger.error(f"Failed to get Confluence pages: {error}")
+
+                # Don't treat "No pages found" as an error that should stop indexing
+                if "No pages found" in error:
+                    logger.info(
+                        "No pages found is not a critical error, continuing with update"
+                    )
+                    if update_last_indexed:
+                        connector.last_indexed_at = datetime.now()
+                        await session.commit()
+                        logger.info(
+                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no pages found"
+                        )
+
+                    await task_logger.log_task_success(
+                        log_entry,
+                        f"No Confluence pages found in date range {start_date_str} to {end_date_str}",
+                        {"pages_found": 0},
+                    )
+                    return 0, None
+                else:
+                    await task_logger.log_task_failure(
+                        log_entry,
+                        f"Failed to get Confluence pages: {error}",
+                        "API Error",
+                        {"error_type": "APIError"},
+                    )
+                    return 0, f"Failed to get Confluence pages: {error}"
+
+            logger.info(f"Retrieved {len(pages)} pages from Confluence API")
+
+        except Exception as e:
+            logger.error(f"Error fetching Confluence pages: {e!s}", exc_info=True)
+            return 0, f"Error fetching Confluence pages: {e!s}"
+
+        # Process and index each page
+        documents_indexed = 0
+        skipped_pages = []
+        documents_skipped = 0
+
+        for page in pages:
+            try:
+                page_id = page.get("id")
+                page_title = page.get("title", "")
+                space_id = page.get("spaceId", "")
+
+                if not page_id or not page_title:
+                    logger.warning(
+                        f"Skipping page with missing ID or title: {page_id or 'Unknown'}"
+                    )
+                    skipped_pages.append(f"{page_title or 'Unknown'} (missing data)")
+                    documents_skipped += 1
+                    continue
+
+                # Extract page content
+                page_content = ""
+                if page.get("body") and page["body"].get("storage"):
+                    page_content = page["body"]["storage"].get("value", "")
+
+                # Add comments to content
+                comments = page.get("comments", [])
+                comments_content = ""
+                if comments:
+                    comments_content = "\n\n## Comments\n\n"
+                    for comment in comments:
+                        comment_body = ""
+                        if comment.get("body") and comment["body"].get("storage"):
+                            comment_body = comment["body"]["storage"].get("value", "")
+
+                        comment_author = comment.get("version", {}).get(
+                            "authorId", "Unknown"
+                        )
+                        comment_date = comment.get("version", {}).get("createdAt", "")
+
+                        comments_content += f"**Comment by {comment_author}** ({comment_date}):\n{comment_body}\n\n"
+
+                # Combine page content with comments
+                full_content = f"# {page_title}\n\n{page_content}{comments_content}"
+
+                if not full_content.strip():
+                    logger.warning(f"Skipping page with no content: {page_title}")
+                    skipped_pages.append(f"{page_title} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a simple summary
+                summary_content = (
+                    f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
+                )
+                if page_content:
+                    # Take first 500 characters of content for summary
+                    content_preview = page_content[:500]
+                    if len(page_content) > 500:
+                        content_preview += "..."
+                    summary_content += f"Content Preview: {content_preview}\n\n"
+
+                # Add comment count
+                comment_count = len(comments)
+                summary_content += f"Comments: {comment_count}"
+
+                # Generate content hash
+                content_hash = generate_content_hash(full_content, search_space_id)
+
+                # Check if document already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks - using the full page content with comments
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(full_content)
+                ]
+
+                # Create and store new document
+                logger.info(f"Creating new document for page {page_title}")
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Confluence - {page_title}",
+                    document_type=DocumentType.CONFLUENCE_CONNECTOR,
+                    document_metadata={
+                        "page_id": page_id,
+                        "page_title": page_title,
+                        "space_id": space_id,
+                        "comment_count": comment_count,
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(f"Successfully indexed new page {page_title}")
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing page {page.get('title', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_pages.append(
+                    f"{page.get('title', 'Unknown')} (processing error)"
+                )
+                documents_skipped += 1
+                continue  # Skip this page and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if update_last_indexed:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+        # Commit all changes
+        await session.commit()
+        logger.info(
+            "Successfully committed all Confluence document changes to database"
+        )
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Confluence indexing for connector {connector_id}",
+            {
+                "pages_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_pages_count": len(skipped_pages),
+            },
+        )
+
+        logger.info(
+            f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Confluence indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Confluence pages for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Confluence pages: {e!s}", exc_info=True)
+        return 0, f"Failed to index Confluence pages: {e!s}"
+
+
+async def index_clickup_tasks(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index tasks from ClickUp workspace.
+
+    Args:
+        session: Database session
+        connector_id: ID of the ClickUp connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        start_date: Start date for filtering tasks (YYYY-MM-DD format)
+        end_date: End date for filtering tasks (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp
+
+    Returns:
+        Tuple of (number of indexed tasks, error message if any)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="clickup_tasks_indexing",
+        source="connector_indexing_task",
+        message=f"Starting ClickUp tasks indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get connector configuration
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            error_msg = f"ClickUp connector with ID {connector_id} not found"
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found or is not a ClickUp connector",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return 0, error_msg
+
+        # Extract ClickUp configuration
+        clickup_api_token = connector.config.get("CLICKUP_API_TOKEN")
+
+        if not clickup_api_token:
+            error_msg = "ClickUp API token not found in connector configuration"
+            await task_logger.log_task_failure(
+                log_entry,
+                f"ClickUp API token not found in connector config for connector {connector_id}",
+                "Missing ClickUp token",
+                {"error_type": "MissingToken"},
+            )
+            return 0, error_msg
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing ClickUp client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        clickup_client = ClickUpConnector(api_token=clickup_api_token)
+
+        # Get authorized workspaces
+        await task_logger.log_task_progress(
+            log_entry,
+            "Fetching authorized ClickUp workspaces",
+            {"stage": "workspace_fetching"},
+        )
+
+        workspaces_response = clickup_client.get_authorized_workspaces()
+        workspaces = workspaces_response.get("teams", [])
+
+        if not workspaces:
+            error_msg = "No authorized ClickUp workspaces found"
+            await task_logger.log_task_failure(
+                log_entry,
+                f"No authorized ClickUp workspaces found for connector {connector_id}",
+                "No workspaces found",
+                {"error_type": "NoWorkspacesFound"},
+            )
+            return 0, error_msg
+
+        # Process and index each task
+        documents_indexed = 0
+        documents_skipped = 0
+
+        for workspace in workspaces:
+            workspace_id = workspace.get("id")
+            workspace_name = workspace.get("name", "Unknown Workspace")
+
+            if not workspace_id:
+                continue
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Processing workspace: {workspace_name}",
+                {"stage": "workspace_processing", "workspace_id": workspace_id},
+            )
+
+            # Fetch tasks from workspace
+            if start_date and end_date:
+                tasks, error = clickup_client.get_tasks_in_date_range(
+                    workspace_id=workspace_id,
+                    start_date=start_date,
+                    end_date=end_date,
+                    include_closed=True,
+                )
+                if error:
+                    logger.warning(
+                        f"Error fetching tasks from workspace {workspace_name}: {error}"
+                    )
+                    continue
+            else:
+                tasks = clickup_client.get_workspace_tasks(
+                    workspace_id=workspace_id, include_closed=True
+                )
+
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Found {len(tasks)} tasks in workspace {workspace_name}",
+                {"stage": "tasks_found", "task_count": len(tasks)},
+            )
+
+            # Process each task
+            for task in tasks:
+                try:
+                    task_id = task.get("id")
+                    task_name = task.get("name", "Untitled Task")
+                    task_description = task.get("description", "")
+                    task_status = task.get("status", {}).get("status", "Unknown")
+                    task_priority = (
+                        task.get("priority", {}).get("priority", "Unknown")
+                        if task.get("priority")
+                        else "None"
+                    )
+                    task_assignees = task.get("assignees", [])
+                    task_due_date = task.get("due_date")
+                    task_created = task.get("date_created")
+                    task_updated = task.get("date_updated")
+
+                    # Get list and space information
+                    task_list = task.get("list", {})
+                    task_list_name = task_list.get("name", "Unknown List")
+                    task_space = task.get("space", {})
+                    task_space_name = task_space.get("name", "Unknown Space")
+
+                    # Create task content
+                    content_parts = [f"Task: {task_name}"]
+
+                    if task_description:
+                        content_parts.append(f"Description: {task_description}")
+
+                    content_parts.extend(
+                        [
+                            f"Status: {task_status}",
+                            f"Priority: {task_priority}",
+                            f"List: {task_list_name}",
+                            f"Space: {task_space_name}",
+                        ]
+                    )
+
+                    if task_assignees:
+                        assignee_names = [
+                            assignee.get("username", "Unknown")
+                            for assignee in task_assignees
+                        ]
+                        content_parts.append(f"Assignees: {', '.join(assignee_names)}")
+
+                    if task_due_date:
+                        content_parts.append(f"Due Date: {task_due_date}")
+
+                    task_content = "\n".join(content_parts)
+
+                    if not task_content.strip():
+                        logger.warning(f"Skipping task with no content: {task_name}")
+                        continue
+
+                    # Generate content hash
+                    content_hash = generate_content_hash(task_content, search_space_id)
+
+                    # Check if document already exists
+                    existing_doc_by_hash_result = await session.execute(
+                        select(Document).where(Document.content_hash == content_hash)
+                    )
+                    existing_document_by_hash = (
+                        existing_doc_by_hash_result.scalars().first()
+                    )
+
+                    if existing_document_by_hash:
+                        logger.info(
+                            f"Document with content hash {content_hash} already exists for task {task_name}. Skipping processing."
+                        )
+                        documents_skipped += 1
+                        continue
+
+                    # Generate embedding for the summary
+                    summary_embedding = config.embedding_model_instance.embed(
+                        task_content
+                    )
+
+                    # Process chunks - using the full page content with comments
+                    chunks = [
+                        Chunk(
+                            content=chunk.text,
+                            embedding=config.embedding_model_instance.embed(chunk.text),
+                        )
+                        for chunk in config.chunker_instance.chunk(task_content)
+                    ]
+
+                    # Create and store new document
+                    logger.info(f"Creating new document for task {task_name}")
+
+                    document = Document(
+                        search_space_id=search_space_id,
+                        title=f"Task - {task_name}",
+                        document_type=DocumentType.CLICKUP_CONNECTOR,
+                        document_metadata={
+                            "task_id": task_id,
+                            "task_name": task_name,
+                            "task_status": task_status,
+                            "task_priority": task_priority,
+                            "task_assignees": task_assignees,
+                            "task_due_date": task_due_date,
+                            "task_created": task_created,
+                            "task_updated": task_updated,
+                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                        },
+                        content=task_content,
+                        content_hash=content_hash,
+                        embedding=summary_embedding,
+                        chunks=chunks,
+                    )
+
+                    session.add(document)
+                    documents_indexed += 1
+                    logger.info(f"Successfully indexed new task {task_name}")
+
+                except Exception as e:
+                    logger.error(
+                        f"Error processing task {task.get('name', 'Unknown')}: {e!s}",
+                        exc_info=True,
+                    )
+                    documents_skipped += 1
+
+            # Update the last_indexed_at timestamp for the connector only if requested
+            total_processed = documents_indexed
+            if update_last_indexed:
+                connector.last_indexed_at = datetime.now()
+                logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+            # Commit all changes
+            await session.commit()
+            logger.info(
+                "Successfully committed all clickup document changes to database"
+            )
+
+            # Log success
+            await task_logger.log_task_success(
+                log_entry,
+                f"Successfully completed clickup indexing for connector {connector_id}",
+                {
+                    "pages_processed": total_processed,
+                    "documents_indexed": documents_indexed,
+                    "documents_skipped": documents_skipped,
+                },
+            )
+
+            logger.info(
+                f"clickup indexing completed: {documents_indexed} new tasks, {documents_skipped} skipped"
+            )
+            return (
+                total_processed,
+                None,
+            )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Cickup indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index ClickUp tasks for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index ClickUp tasks: {e!s}", exc_info=True)
+        return 0, f"Failed to index ClickUp tasks: {e!s}"
+
+
+async def index_google_calendar_events(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+    """
+    Index Google Calendar events.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Google Calendar connector
+        search_space_id: ID of the search space to store documents in
+        user_id: User ID
+        start_date: Start date for indexing (YYYY-MM-DD format)
+        end_date: End date for indexing (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="google_calendar_events_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Google Calendar events indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "start_date": start_date,
+            "end_date": end_date,
+        },
+    )
+
+    try:
+        # Get the connector from the database
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Connector with ID {connector_id} not found",
+                "Connector not found",
+                {"error_type": "ConnectorNotFound"},
+            )
+            return 0, f"Connector with ID {connector_id} not found"
+
+        # Get the Google Calendar credentials from the connector config
+        credentials = Credentials(
+            token=connector.config.get("token"),
+            refresh_token=connector.config.get("refresh_token"),
+            token_uri=connector.config.get("token_uri"),
+            client_id=connector.config.get("client_id"),
+            client_secret=connector.config.get("client_secret"),
+            scopes=connector.config.get("scopes"),
+        )
+
+        if (
+            not credentials.client_id
+            or not credentials.client_secret
+            or not credentials.refresh_token
+        ):
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Google Calendar credentials not found in connector config for connector {connector_id}",
+                "Missing Google Calendar credentials",
+                {"error_type": "MissingCredentials"},
+            )
+            return 0, "Google Calendar credentials not found in connector config"
+
+        # Initialize Google Calendar client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Google Calendar client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        calendar_client = GoogleCalendarConnector(credentials=credentials)
+
+        # Calculate date range
+        if start_date is None or end_date is None:
+            # Fall back to calculating dates based on last_indexed_at
+            calculated_end_date = datetime.now()
+
+            # Use last_indexed_at as start date if available, otherwise use 30 days ago
+            if connector.last_indexed_at:
+                # Convert dates to be comparable (both timezone-naive)
+                last_indexed_naive = (
+                    connector.last_indexed_at.replace(tzinfo=None)
+                    if connector.last_indexed_at.tzinfo
+                    else connector.last_indexed_at
+                )
+
+                # Check if last_indexed_at is in the future or after end_date
+                if last_indexed_naive > calculated_end_date:
+                    logger.warning(
+                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 30 days ago instead."
+                    )
+                    calculated_start_date = calculated_end_date - timedelta(days=30)
+                else:
+                    calculated_start_date = last_indexed_naive
+                    logger.info(
+                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+                    )
+            else:
+                calculated_start_date = calculated_end_date - timedelta(
+                    days=30
+                )  # Use 30 days as default for calendar events
+                logger.info(
+                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (30 days ago) as start date"
+                )
+
+            # Use calculated dates if not provided
+            start_date_str = (
+                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+            )
+            end_date_str = (
+                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+            )
+        else:
+            # Use provided dates
+            start_date_str = start_date
+            end_date_str = end_date
+
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
+            {
+                "stage": "fetching_events",
+                "start_date": start_date_str,
+                "end_date": end_date_str,
+            },
+        )
+
+        # Get events within date range from primary calendar
+        try:
+            events, error = calendar_client.get_all_primary_calendar_events(
+                start_date=start_date_str, end_date=end_date_str
+            )
+
+            if error:
+                logger.error(f"Failed to get Google Calendar events: {error}")
+
+                # Don't treat "No events found" as an error that should stop indexing
+                if "No events found" in error:
+                    logger.info(
+                        "No events found is not a critical error, continuing with update"
+                    )
+                    if update_last_indexed:
+                        connector.last_indexed_at = datetime.now()
+                        await session.commit()
+                        logger.info(
+                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no events found"
+                        )
+
+                    await task_logger.log_task_success(
+                        log_entry,
+                        f"No Google Calendar events found in date range {start_date_str} to {end_date_str}",
+                        {"events_found": 0},
+                    )
+                    return 0, None
+                else:
+                    await task_logger.log_task_failure(
+                        log_entry,
+                        f"Failed to get Google Calendar events: {error}",
+                        "API Error",
+                        {"error_type": "APIError"},
+                    )
+                    return 0, f"Failed to get Google Calendar events: {error}"
+
+            logger.info(f"Retrieved {len(events)} events from Google Calendar API")
+
+        except Exception as e:
+            logger.error(f"Error fetching Google Calendar events: {e!s}", exc_info=True)
+            return 0, f"Error fetching Google Calendar events: {e!s}"
+
+        # Process and index each event
+        documents_indexed = 0
+        skipped_events = []
+        documents_skipped = 0
+
+        for event in events:
+            try:
+                event_id = event.get("id")
+                event_summary = event.get("summary", "No Title")
+                calendar_id = event.get("calendarId", "")
+
+                if not event_id:
+                    logger.warning(f"Skipping event with missing ID: {event_summary}")
+                    skipped_events.append(f"{event_summary} (missing ID)")
+                    documents_skipped += 1
+                    continue
+
+                # Format event as markdown
+                event_markdown = calendar_client.format_event_to_markdown(event)
+
+                if not event_markdown.strip():
+                    logger.warning(f"Skipping event with no content: {event_summary}")
+                    skipped_events.append(f"{event_summary} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a simple summary for the document
+                start = event.get("start", {})
+                end = event.get("end", {})
+                start_time = start.get("dateTime") or start.get("date", "")
+                end_time = end.get("dateTime") or end.get("date", "")
+                location = event.get("location", "")
+                description = event.get("description", "")
+
+                summary_content = f"Google Calendar Event: {event_summary}\n\n"
+                summary_content += f"Calendar: {calendar_id}\n"
+                summary_content += f"Start: {start_time}\n"
+                summary_content += f"End: {end_time}\n"
+
+                if location:
+                    summary_content += f"Location: {location}\n"
+
+                if description:
+                    # Take first 300 characters of description for summary
+                    desc_preview = description[:300]
+                    if len(description) > 300:
+                        desc_preview += "..."
+                    summary_content += f"Description: {desc_preview}\n"
+
+                # Generate content hash
+                content_hash = generate_content_hash(event_markdown, search_space_id)
+
+                # Check if document already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for event {event_summary}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks - using the full event markdown
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(event_markdown)
+                ]
+
+                # Create and store new document
+                logger.info(f"Creating new document for event {event_summary}")
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Calendar Event - {event_summary}",
+                    document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
+                    document_metadata={
+                        "event_id": event_id,
+                        "event_summary": event_summary,
+                        "calendar_id": calendar_id,
+                        "start_time": start_time,
+                        "end_time": end_time,
+                        "location": location,
+                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                    },
+                    content=summary_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+
+                session.add(document)
+                documents_indexed += 1
+                logger.info(f"Successfully indexed new event {event_summary}")
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_events.append(
+                    f"{event.get('summary', 'Unknown')} (processing error)"
+                )
+                documents_skipped += 1
+                continue  # Skip this event and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if update_last_indexed:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+        # Commit all changes
+        await session.commit()
+        logger.info(
+            "Successfully committed all Google Calendar document changes to database"
+        )
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Google Calendar indexing for connector {connector_id}",
+            {
+                "events_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_events_count": len(skipped_events),
+            },
+        )
+
+        logger.info(
+            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Google Calendar indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Google Calendar events for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Google Calendar events: {e!s}", exc_info=True)
+        return 0, f"Failed to index Google Calendar events: {e!s}"
+
+
+async def index_google_gmail_messages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    max_messages: int = 100,
+    days_back: int = 30,
+) -> tuple[int, str]:
+    """
+    Index Gmail messages for a specific connector.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Gmail connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        max_messages: Maximum number of messages to fetch (default: 100)
+        days_back: Number of days to look back (default: 30)
+
+    Returns:
+        Tuple of (number_of_indexed_messages, status_message)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="google_gmail_messages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Gmail messages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "max_messages": max_messages,
+            "days_back": days_back,
+        },
+    )
+
+    try:
+        # Get the connector from the database
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            error_msg = f"Gmail connector with ID {connector_id} not found"
+            await task_logger.log_task_completion(
+                log_entry.id, "FAILED", error_msg, {"error_type": "ConnectorNotFound"}
+            )
+            return 0, error_msg
+
+        # Create credentials from connector config
+        config_data = connector.config
+        credentials = Credentials(
+            token=config_data.get("token"),
+            refresh_token=config_data.get("refresh_token"),
+            token_uri=config_data.get("token_uri"),
+            client_id=config_data.get("client_id"),
+            client_secret=config_data.get("client_secret"),
+            scopes=config_data.get("scopes", []),
+        )
+
+        # Initialize Gmail connector
+        gmail_connector = GoogleGmailConnector(credentials)
+
+        # Fetch recent messages
+        logger.info(f"Fetching recent Gmail messages for connector {connector_id}")
+        messages, error = gmail_connector.get_recent_messages(
+            max_results=max_messages, days_back=days_back
+        )
+
+        if error:
+            await task_logger.log_task_completion(
+                log_entry.id, "FAILED", f"Failed to fetch messages: {error}", {}
+            )
+            return 0, f"Failed to fetch Gmail messages: {error}"
+
+        if not messages:
+            success_msg = "No Gmail messages found in the specified date range"
+            await task_logger.log_task_completion(
+                log_entry.id, "SUCCESS", success_msg, {"messages_count": 0}
+            )
+            return 0, success_msg
+
+        logger.info(f"Found {len(messages)} Gmail messages to index")
+
+        indexed_count = 0
+        for message in messages:
+            try:
+                # Extract message information
+                message_id = message.get("id", "")
+                thread_id = message.get("threadId", "")
+
+                # Extract headers for subject and sender
+                payload = message.get("payload", {})
+                headers = payload.get("headers", [])
+
+                subject = "No Subject"
+                sender = "Unknown Sender"
+                date_str = "Unknown Date"
+
+                for header in headers:
+                    name = header.get("name", "").lower()
+                    value = header.get("value", "")
+                    if name == "subject":
+                        subject = value
+                    elif name == "from":
+                        sender = value
+                    elif name == "date":
+                        date_str = value
+
+                # Check if document already exists
+                existing_doc_result = await session.execute(
+                    select(Document).filter(
+                        Document.search_space_id == search_space_id,
+                        Document.document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR,
+                        Document.document_metadata["message_id"].astext == message_id,
+                    )
+                )
+                existing_doc = existing_doc_result.scalars().first()
+
+                if existing_doc:
+                    logger.info(f"Gmail message {message_id} already indexed, skipping")
+                    continue
+
+                # Format message to markdown
+                markdown_content = gmail_connector.format_message_to_markdown(message)
+
+                # Create and store new document
+                logger.info(f"Creating new document for Gmail message: {subject}")
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Gmail: {subject}",
+                    document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
+                    document_metadata={
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date": date_str,
+                        "connector_id": connector_id,
+                    },
+                    content=markdown_content,
+                )
+                session.add(document)
+                await session.flush()
+
+                # Create chunks for the document
+                chunks = config.chunker_instance.chunk(markdown_content)
+                for i, chunk_text in enumerate(chunks):
+                    chunk = Chunk(
+                        document_id=document.id,
+                        content=chunk_text,
+                        chunk_index=i,
+                        embedding=config.embedding_model_instance.embed_query(
+                            chunk_text
+                        ),
+                    )
+                    session.add(chunk)
+
+                indexed_count += 1
+                logger.info(f"Successfully indexed Gmail message: {subject}")
+
+            except Exception as e:
+                logger.error(
+                    f"Error indexing Gmail message {message_id}: {e!s}", exc_info=True
+                )
+                continue
+
+        # Commit all changes
+        await session.commit()
+
+        # Update connector's last_indexed_at timestamp
+        connector.last_indexed_at = datetime.now(UTC)
+        await session.commit()
+
+        success_msg = f"Successfully indexed {indexed_count} Gmail messages"
+        await task_logger.log_task_completion(
+            log_entry.id,
+            "SUCCESS",
+            success_msg,
+            {"indexed_count": indexed_count, "total_messages": len(messages)},
+        )
+        logger.info(success_msg)
+        return indexed_count, success_msg
+
+    except Exception as e:
+        await task_logger.log_task_completion(
+            log_entry.id,
+            "FAILED",
+            f"Failed to index Gmail messages for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Gmail messages: {e!s}", exc_info=True)
+        return 0, f"Failed to index Gmail messages: {e!s}"

From e17d969087a86addfd08171ac8ddfbb0af174d0f Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 00:58:37 +0200
Subject: [PATCH 04/14] add search google gmail connector service

---
 .../app/services/connector_service.py         | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index d063a86..20ad351 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -1208,6 +1208,132 @@ class ConnectorService:
 
         return result_object, calendar_chunks
 
+    async def search_google_gmail(
+        self,
+        user_query: str,
+        user_id: str,
+        search_space_id: int,
+        top_k: int = 20,
+        search_mode: SearchMode = SearchMode.CHUNKS,
+    ) -> tuple:
+        """
+        Search for Gmail messages and return both the source information and langchain documents
+
+        Args:
+            user_query: The user's query
+            user_id: The user's ID
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            search_mode: Search mode (CHUNKS or DOCUMENTS)
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        if search_mode == SearchMode.CHUNKS:
+            gmail_chunks = await self.chunk_retriever.hybrid_search(
+                query_text=user_query,
+                top_k=top_k,
+                user_id=user_id,
+                search_space_id=search_space_id,
+                document_type="GOOGLE_GMAIL_CONNECTOR",
+            )
+        elif search_mode == SearchMode.DOCUMENTS:
+            gmail_chunks = await self.document_retriever.hybrid_search(
+                query_text=user_query,
+                top_k=top_k,
+                user_id=user_id,
+                search_space_id=search_space_id,
+                document_type="GOOGLE_GMAIL_CONNECTOR",
+            )
+            # Transform document retriever results to match expected format
+            gmail_chunks = self._transform_document_results(gmail_chunks)
+
+        # Early return if no results
+        if not gmail_chunks:
+            return {
+                "id": 32,
+                "name": "Gmail Messages",
+                "type": "GOOGLE_GMAIL_CONNECTOR",
+                "sources": [],
+            }, []
+
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
+        async with self.counter_lock:
+            for _i, chunk in enumerate(gmail_chunks):
+                # Extract document metadata
+                document = chunk.get("document", {})
+                metadata = document.get("metadata", {})
+
+                # Extract Gmail-specific metadata
+                message_id = metadata.get("message_id", "")
+                subject = metadata.get("subject", "No Subject")
+                sender = metadata.get("sender", "Unknown Sender")
+                date_str = metadata.get("date", "")
+                thread_id = metadata.get("thread_id", "")
+
+                # Create a more descriptive title for Gmail messages
+                title = f"Email: {subject}"
+                if sender:
+                    # Extract just the email address or name from sender
+                    import re
+
+                    sender_match = re.search(r"<([^>]+)>", sender)
+                    if sender_match:
+                        sender_email = sender_match.group(1)
+                        title += f" (from {sender_email})"
+                    else:
+                        title += f" (from {sender})"
+
+                # Create a more descriptive description for Gmail messages
+                description = chunk.get("content", "")[:150]
+                if len(description) == 150:
+                    description += "..."
+
+                # Add message info to description
+                info_parts = []
+                if date_str:
+                    info_parts.append(f"Date: {date_str}")
+                if thread_id:
+                    info_parts.append(f"Thread: {thread_id}")
+
+                if info_parts:
+                    if description:
+                        description += f" | {' | '.join(info_parts)}"
+                    else:
+                        description = " | ".join(info_parts)
+
+                # For URL, we could construct a URL to the Gmail message
+                url = ""
+                if message_id:
+                    # Gmail message URL format
+                    url = f"https://mail.google.com/mail/u/0/#inbox/{message_id}"
+
+                source = {
+                    "id": document.get("id", self.source_id_counter),
+                    "title": title,
+                    "description": description,
+                    "url": url,
+                    "message_id": message_id,
+                    "subject": subject,
+                    "sender": sender,
+                    "date": date_str,
+                    "thread_id": thread_id,
+                }
+
+                self.source_id_counter += 1
+                sources_list.append(source)
+
+        # Create result object
+        result_object = {
+            "id": 32,  # Assign a unique ID for the Gmail connector
+            "name": "Gmail Messages",
+            "type": "GOOGLE_GMAIL_CONNECTOR",
+            "sources": sources_list,
+        }
+
+        return result_object, gmail_chunks
+
     async def search_confluence(
         self,
         user_query: str,

From ad951d45dac343dc728de1c7b3cce348d103a26c Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 00:59:56 +0200
Subject: [PATCH 05/14] update reseacher agents

---
 .../app/agents/researcher/nodes.py            | 26 +++++++++++++++++++
 .../agents/researcher/qna_agent/prompts.py    |  1 +
 .../researcher/sub_section_writer/prompts.py  |  1 +
 3 files changed, 28 insertions(+)

diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py
index 6e27aa3..9e5d6d7 100644
--- a/surfsense_backend/app/agents/researcher/nodes.py
+++ b/surfsense_backend/app/agents/researcher/nodes.py
@@ -988,6 +988,32 @@ async def fetch_relevant_documents(
                                 )
                             }
                         )
+                elif connector == "GOOGLE_GMAIL_CONNECTOR":
+                    (
+                        source_object,
+                        gmail_chunks,
+                    ) = await connector_service.search_google_gmail(
+                        user_query=reformulated_query,
+                        user_id=user_id,
+                        search_space_id=search_space_id,
+                        top_k=top_k,
+                        search_mode=search_mode,
+                    )
+
+                    # Add to sources and raw documents
+                    if source_object:
+                        all_sources.append(source_object)
+                    all_raw_documents.extend(gmail_chunks)
+
+                    # Stream found document count
+                    if streaming_service and writer:
+                        writer(
+                            {
+                                "yield_value": streaming_service.format_terminal_info_delta(
+                                    f"📧 Found {len(gmail_chunks)} Gmail messages related to your query"
+                                )
+                            }
+                        )
                 elif connector == "CONFLUENCE_CONNECTOR":
                     (
                         source_object,
diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
index a7554aa..cd64d56 100644
--- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
@@ -19,6 +19,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
 - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
 - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
 - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
+- GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
 - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
 - TAVILY_API: "Tavily search API results" (personalized search results)
 - LINKUP_API: "Linkup search API results" (personalized search results)
diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
index 5080c1b..07aec91 100644
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
@@ -19,6 +19,7 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio
 - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
 - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
 - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
+- GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
 - DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
 - TAVILY_API: "Tavily search API results" (personalized search results)
 - LINKUP_API: "Linkup search API results" (personalized search results)

From b7e941bcb27d5d18a89b63ee8762115000397d4f Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 01:01:04 +0200
Subject: [PATCH 06/14] update google gmail connector

---
 .../app/connectors/google_gmail_connector.py  | 337 ++++++++++++++++++
 1 file changed, 337 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_gmail_connector.py

diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
new file mode 100644
index 0000000..0beabf1
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -0,0 +1,337 @@
+"""
+Google Gmail Connector Module | Google OAuth Credentials | Gmail API
+A module for retrieving emails from Gmail using Google OAuth credentials.
+Allows fetching emails from Gmail mailbox using Google OAuth credentials.
+"""
+
+import base64
+from typing import Any
+
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+
+
+class GoogleGmailConnector:
+    """Class for retrieving emails from Gmail using Google OAuth credentials."""
+
+    def __init__(
+        self,
+        credentials: Credentials,
+    ):
+        """
+        Initialize the GoogleGmailConnector class.
+        Args:
+            credentials: Google OAuth Credentials object
+        """
+        self._credentials = credentials
+        self.service = None
+
+    def _get_credentials(self) -> Credentials:
+        """
+        Get valid Google OAuth credentials.
+        Returns:
+            Google OAuth credentials
+        Raises:
+            ValueError: If credentials have not been set
+            Exception: If credential refresh fails
+        """
+        if not all(
+            [
+                self._credentials.client_id,
+                self._credentials.client_secret,
+                self._credentials.refresh_token,
+            ]
+        ):
+            raise ValueError(
+                "Google OAuth credentials (client_id, client_secret, refresh_token) must be set"
+            )
+
+        if self._credentials and not self._credentials.expired:
+            return self._credentials
+
+        # Create credentials from refresh token
+        self._credentials = Credentials(
+            token=self._credentials.token,
+            refresh_token=self._credentials.refresh_token,
+            token_uri=self._credentials.token_uri,
+            client_id=self._credentials.client_id,
+            client_secret=self._credentials.client_secret,
+            scopes=self._credentials.scopes,
+        )
+
+        # Refresh the token if needed
+        if self._credentials.expired or not self._credentials.valid:
+            try:
+                self._credentials.refresh(Request())
+            except Exception as e:
+                raise Exception(
+                    f"Failed to refresh Google OAuth credentials: {e!s}"
+                ) from e
+
+        return self._credentials
+
+    def _get_service(self):
+        """
+        Get the Gmail service instance using Google OAuth credentials.
+        Returns:
+            Gmail service instance
+        Raises:
+            ValueError: If credentials have not been set
+            Exception: If service creation fails
+        """
+        if self.service:
+            return self.service
+
+        try:
+            credentials = self._get_credentials()
+            self.service = build("gmail", "v1", credentials=credentials)
+            return self.service
+        except Exception as e:
+            raise Exception(f"Failed to create Gmail service: {e!s}") from e
+
+    def get_user_profile(self) -> tuple[dict[str, Any], str | None]:
+        """
+        Fetch user's Gmail profile information.
+        Returns:
+            Tuple containing (profile dict, error message or None)
+        """
+        try:
+            service = self._get_service()
+            profile = service.users().getProfile(userId="me").execute()
+
+            return {
+                "email_address": profile.get("emailAddress"),
+                "messages_total": profile.get("messagesTotal", 0),
+                "threads_total": profile.get("threadsTotal", 0),
+                "history_id": profile.get("historyId"),
+            }, None
+
+        except Exception as e:
+            return {}, f"Error fetching user profile: {e!s}"
+
+    def get_messages_list(
+        self,
+        max_results: int = 100,
+        query: str = "",
+        label_ids: list[str] | None = None,
+        include_spam_trash: bool = False,
+    ) -> tuple[list[dict[str, Any]], str | None]:
+        """
+        Fetch list of messages from Gmail.
+        Args:
+            max_results: Maximum number of messages to fetch (default: 100)
+            query: Gmail search query (e.g., "is:unread", "from:example@gmail.com")
+            label_ids: List of label IDs to filter by
+            include_spam_trash: Whether to include spam and trash
+        Returns:
+            Tuple containing (messages list, error message or None)
+        """
+        try:
+            service = self._get_service()
+
+            # Build request parameters
+            request_params = {
+                "userId": "me",
+                "maxResults": max_results,
+                "includeSpamTrash": include_spam_trash,
+            }
+
+            if query:
+                request_params["q"] = query
+            if label_ids:
+                request_params["labelIds"] = label_ids
+
+            # Get messages list
+            result = service.users().messages().list(**request_params).execute()
+            messages = result.get("messages", [])
+
+            return messages, None
+
+        except Exception as e:
+            return [], f"Error fetching messages list: {e!s}"
+
+    def get_message_details(self, message_id: str) -> tuple[dict[str, Any], str | None]:
+        """
+        Fetch detailed information for a specific message.
+        Args:
+            message_id: The ID of the message to fetch
+        Returns:
+            Tuple containing (message details dict, error message or None)
+        """
+        try:
+            service = self._get_service()
+
+            # Get full message details
+            message = (
+                service.users()
+                .messages()
+                .get(userId="me", id=message_id, format="full")
+                .execute()
+            )
+
+            return message, None
+
+        except Exception as e:
+            return {}, f"Error fetching message details: {e!s}"
+
+    def get_recent_messages(
+        self,
+        max_results: int = 50,
+        days_back: int = 30,
+    ) -> tuple[list[dict[str, Any]], str | None]:
+        """
+        Fetch recent messages from Gmail within specified days.
+        Args:
+            max_results: Maximum number of messages to fetch (default: 50)
+            days_back: Number of days to look back (default: 30)
+        Returns:
+            Tuple containing (messages list with details, error message or None)
+        """
+        try:
+            # Calculate date query
+            from datetime import datetime, timedelta
+
+            cutoff_date = datetime.now() - timedelta(days=days_back)
+            date_query = cutoff_date.strftime("%Y/%m/%d")
+            query = f"after:{date_query}"
+
+            # Get messages list
+            messages_list, error = self.get_messages_list(
+                max_results=max_results, query=query
+            )
+
+            if error:
+                return [], error
+
+            # Get detailed information for each message
+            detailed_messages = []
+            for msg in messages_list:
+                message_details, detail_error = self.get_message_details(msg["id"])
+                if detail_error:
+                    continue  # Skip messages that can't be fetched
+                detailed_messages.append(message_details)
+
+            return detailed_messages, None
+
+        except Exception as e:
+            return [], f"Error fetching recent messages: {e!s}"
+
+    def extract_message_text(self, message: dict[str, Any]) -> str:
+        """
+        Extract text content from a Gmail message.
+        Args:
+            message: Gmail message object
+        Returns:
+            Extracted text content
+        """
+
+        def get_message_parts(payload):
+            """Recursively extract message parts."""
+            parts = []
+
+            if "parts" in payload:
+                for part in payload["parts"]:
+                    parts.extend(get_message_parts(part))
+            else:
+                parts.append(payload)
+
+            return parts
+
+        try:
+            payload = message.get("payload", {})
+            parts = get_message_parts(payload)
+
+            text_content = ""
+
+            for part in parts:
+                mime_type = part.get("mimeType", "")
+                body = part.get("body", {})
+                data = body.get("data", "")
+
+                if mime_type == "text/plain" and data:
+                    # Decode base64 content
+                    decoded_data = base64.urlsafe_b64decode(data + "===").decode(
+                        "utf-8", errors="ignore"
+                    )
+                    text_content += decoded_data + "\n"
+                elif mime_type == "text/html" and data and not text_content:
+                    # Use HTML as fallback if no plain text
+                    decoded_data = base64.urlsafe_b64decode(data + "===").decode(
+                        "utf-8", errors="ignore"
+                    )
+                    # Basic HTML tag removal (you might want to use a proper HTML parser)
+                    import re
+
+                    text_content = re.sub(r"<[^>]+>", "", decoded_data)
+
+            return text_content.strip()
+
+        except Exception as e:
+            return f"Error extracting message text: {e!s}"
+
+    def format_message_to_markdown(self, message: dict[str, Any]) -> str:
+        """
+        Format a Gmail message to markdown.
+        Args:
+            message: Message object from Gmail API
+        Returns:
+            Formatted markdown string
+        """
+        try:
+            # Extract basic message information
+            message_id = message.get("id", "")
+            thread_id = message.get("threadId", "")
+            label_ids = message.get("labelIds", [])
+
+            # Extract headers
+            payload = message.get("payload", {})
+            headers = payload.get("headers", [])
+
+            # Parse headers into a dict
+            header_dict = {}
+            for header in headers:
+                name = header.get("name", "").lower()
+                value = header.get("value", "")
+                header_dict[name] = value
+
+            # Extract key information
+            subject = header_dict.get("subject", "No Subject")
+            from_email = header_dict.get("from", "Unknown Sender")
+            to_email = header_dict.get("to", "Unknown Recipient")
+            date_str = header_dict.get("date", "Unknown Date")
+
+            # Extract message content
+            message_text = self.extract_message_text(message)
+
+            # Build markdown content
+            markdown_content = f"# {subject}\n\n"
+
+            # Add message details
+            markdown_content += f"**From:** {from_email}\n"
+            markdown_content += f"**To:** {to_email}\n"
+            markdown_content += f"**Date:** {date_str}\n"
+
+            if label_ids:
+                markdown_content += f"**Labels:** {', '.join(label_ids)}\n"
+
+            markdown_content += "\n"
+
+            # Add message content
+            if message_text:
+                markdown_content += f"## Message Content\n\n{message_text}\n\n"
+
+            # Add message metadata
+            markdown_content += "## Message Details\n\n"
+            markdown_content += f"- **Message ID:** {message_id}\n"
+            markdown_content += f"- **Thread ID:** {thread_id}\n"
+
+            # Add snippet if available
+            snippet = message.get("snippet", "")
+            if snippet:
+                markdown_content += f"- **Snippet:** {snippet}\n"
+
+            return markdown_content
+
+        except Exception as e:
+            return f"Error formatting message to markdown: {e!s}"

From 715ce6d290a647eb9b1e5fc9c4ca55bdd7701c7f Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 01:01:44 +0200
Subject: [PATCH 07/14] generate migration file

---
 .../18_add_google_gmail_connector_enums.py    | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 surfsense_backend/alembic/versions/18_add_google_gmail_connector_enums.py

diff --git a/surfsense_backend/alembic/versions/18_add_google_gmail_connector_enums.py b/surfsense_backend/alembic/versions/18_add_google_gmail_connector_enums.py
new file mode 100644
index 0000000..a2d77e5
--- /dev/null
+++ b/surfsense_backend/alembic/versions/18_add_google_gmail_connector_enums.py
@@ -0,0 +1,65 @@
+"""Add Google Gmail connector enums
+
+Revision ID: 18
+Revises: 17
+Create Date: 2024-02-01 12:00:00.000000
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "18"
+down_revision: str | None = "17"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Safely add 'GOOGLE_GMAIL_CONNECTOR' to enum types if missing."""
+
+    # Add to searchsourceconnectortype enum
+    op.execute(
+        """
+    DO $$
+    BEGIN
+        IF NOT EXISTS (
+            SELECT 1 FROM pg_type t
+            JOIN pg_enum e ON t.oid = e.enumtypid
+            WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'GOOGLE_GMAIL_CONNECTOR'
+        ) THEN
+            ALTER TYPE searchsourceconnectortype ADD VALUE 'GOOGLE_GMAIL_CONNECTOR';
+        END IF;
+    END
+    $$;
+    """
+    )
+
+    # Add to documenttype enum
+    op.execute(
+        """
+    DO $$
+    BEGIN
+        IF NOT EXISTS (
+            SELECT 1 FROM pg_type t
+            JOIN pg_enum e ON t.oid = e.enumtypid
+            WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_GMAIL_CONNECTOR'
+        ) THEN
+            ALTER TYPE documenttype ADD VALUE 'GOOGLE_GMAIL_CONNECTOR';
+        END IF;
+    END
+    $$;
+    """
+    )
+
+
+def downgrade() -> None:
+    """Remove 'GOOGLE_GMAIL_CONNECTOR' from enum types."""
+
+    # Note: PostgreSQL doesn't support removing enum values directly
+    # This would require recreating the enum type, which is complex
+    # For now, we'll leave the enum values in place
+    # In a production environment, you might want to implement a more sophisticated downgrade
+    pass

From 464d4e3891db3dc4c88d707b51e74a3695d9fc9f Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 01:02:35 +0200
Subject: [PATCH 08/14] update .env.example

---
 surfsense_backend/.env.example           | 1 +
 surfsense_backend/app/app.py             | 6 ++++++
 surfsense_backend/app/config/__init__.py | 3 +++
 3 files changed, 10 insertions(+)

diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index dfb7db8..5d4417f 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -9,6 +9,7 @@ AUTH_TYPE=GOOGLE or LOCAL
 GOOGLE_OAUTH_CLIENT_ID=924507538m
 GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
 GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
+GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback
 
 # Embedding Model
 EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py
index 17f9082..1766c0c 100644
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@@ -1,3 +1,9 @@
+import os
+
+os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = (
+    "1"  # It is to disbale the error "Invalid token scope" when using Google OAuth with increemental scopes
+)
+
 from contextlib import asynccontextmanager
 
 from fastapi import Depends, FastAPI
diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index 38a1f3e..38ae616 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -51,6 +51,9 @@ class Config:
     # Google Calendar redirect URI
     GOOGLE_CALENDAR_REDIRECT_URI = os.getenv("GOOGLE_CALENDAR_REDIRECT_URI")
 
+    # Google Gmail redirect URI
+    GOOGLE_GMAIL_REDIRECT_URI = os.getenv("GOOGLE_GMAIL_REDIRECT_URI")
+
     # LLM instances are now managed per-user through the LLMConfig system
     # Legacy environment variables removed in favor of user-specific configurations
 

From 2655692db361aba97a1b628e9311013c9e48c616 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 01:05:12 +0200
Subject: [PATCH 09/14] add UI implementations

---
 .../add/google-gmail-connector/page.tsx       | 199 ++++++++++++++++++
 .../[search_space_id]/connectors/add/page.tsx |   6 +-
 .../components/chat/ConnectorComponents.tsx   |   3 +
 3 files changed, 205 insertions(+), 3 deletions(-)
 create mode 100644 surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx

diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx
new file mode 100644
index 0000000..6c130bf
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-gmail-connector/page.tsx
@@ -0,0 +1,199 @@
+"use client";
+
+import { zodResolver } from "@hookform/resolvers/zod";
+import { IconMail } from "@tabler/icons-react";
+import { motion } from "framer-motion";
+import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
+import Link from "next/link";
+import { useParams, useRouter, useSearchParams } from "next/navigation";
+import { useEffect, useState } from "react";
+import { useForm } from "react-hook-form";
+import { toast } from "sonner";
+import { z } from "zod";
+import { Button } from "@/components/ui/button";
+import {
+	Card,
+	CardContent,
+	CardDescription,
+	CardFooter,
+	CardHeader,
+	CardTitle,
+} from "@/components/ui/card";
+import {
+	type SearchSourceConnector,
+	useSearchSourceConnectors,
+} from "@/hooks/useSearchSourceConnectors";
+
+export default function GoogleGmailConnectorPage() {
+	const router = useRouter();
+	const params = useParams();
+	const searchSpaceId = params.search_space_id as string;
+	const [isConnecting, setIsConnecting] = useState(false);
+	const [doesConnectorExist, setDoesConnectorExist] = useState(false);
+
+	const { fetchConnectors } = useSearchSourceConnectors();
+
+	useEffect(() => {
+		fetchConnectors().then((data) => {
+			const connector = data.find(
+				(c: SearchSourceConnector) => c.connector_type === "GOOGLE_GMAIL_CONNECTOR"
+			);
+			if (connector) {
+				setDoesConnectorExist(true);
+			}
+		});
+	}, []);
+
+	// Handle Google OAuth connection
+	const handleConnectGoogle = async () => {
+		try {
+			setIsConnecting(true);
+			// Call backend to initiate authorization flow
+			const response = await fetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/gmail/connector/add/?space_id=${searchSpaceId}`,
+				{
+					method: "GET",
+					headers: {
+						Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
+					},
+				}
+			);
+
+			if (!response.ok) {
+				throw new Error("Failed to initiate Google OAuth");
+			}
+
+			const data = await response.json();
+
+			// Redirect to Google for authentication
+			window.location.href = data.auth_url;
+		} catch (error) {
+			console.error("Error connecting to Google:", error);
+			toast.error("Failed to connect to Google Gmail");
+		} finally {
+			setIsConnecting(false);
+		}
+	};
+
+	return (
+		<div className="container mx-auto py-8 max-w-2xl">
+			<motion.div
+				initial={{ opacity: 0, y: 20 }}
+				animate={{ opacity: 1, y: 0 }}
+				transition={{ duration: 0.5 }}
+			>
+				{/* Header */}
+				<div className="mb-8">
+					<Link
+						href={`/dashboard/${searchSpaceId}/connectors/add`}
+						className="inline-flex items-center text-sm text-muted-foreground hover:text-foreground mb-4"
+					>
+						<ArrowLeft className="mr-2 h-4 w-4" />
+						Back to connectors
+					</Link>
+					<div className="flex items-center gap-4">
+						<div className="flex h-12 w-12 items-center justify-center rounded-lg bg-red-100 dark:bg-red-900">
+							<IconMail className="h-6 w-6 text-red-600 dark:text-red-400" />
+						</div>
+						<div>
+							<h1 className="text-3xl font-bold tracking-tight">Connect Google Gmail</h1>
+							<p className="text-muted-foreground">
+								Connect your Gmail account to search through your emails
+							</p>
+						</div>
+					</div>
+				</div>
+
+				{/* Connection Card */}
+				{!doesConnectorExist ? (
+					<Card>
+						<CardHeader>
+							<CardTitle>Connect Your Gmail Account</CardTitle>
+							<CardDescription>
+								Securely connect your Gmail account to enable email search within SurfSense. We'll
+								only access your emails with read-only permissions.
+							</CardDescription>
+						</CardHeader>
+						<CardContent className="space-y-4">
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Read-only access to your emails</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Search through email content and metadata</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Secure OAuth 2.0 authentication</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>You can disconnect anytime</span>
+							</div>
+						</CardContent>
+						<CardFooter className="flex justify-between">
+							<Button
+								type="button"
+								variant="outline"
+								onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
+							>
+								Cancel
+							</Button>
+							<Button onClick={handleConnectGoogle} disabled={isConnecting}>
+								{isConnecting ? (
+									<>
+										<Loader2 className="mr-2 h-4 w-4 animate-spin" />
+										Connecting...
+									</>
+								) : (
+									<>
+										<ExternalLink className="mr-2 h-4 w-4" />
+										Connect Your Google Account
+									</>
+								)}
+							</Button>
+						</CardFooter>
+					</Card>
+				) : (
+					/* Configuration Form Card */
+					<Card>
+						<CardHeader>
+							<CardTitle>✅ Your Gmail is successfully connected!</CardTitle>
+						</CardHeader>
+					</Card>
+				)}
+
+				{/* Information Card */}
+				<Card className="mt-6">
+					<CardHeader>
+						<CardTitle>What data will be indexed?</CardTitle>
+					</CardHeader>
+					<CardContent className="space-y-4">
+						<div className="space-y-2">
+							<h4 className="font-medium">Email Content</h4>
+							<p className="text-sm text-muted-foreground">
+								We'll index the content of your emails including subject lines, sender information,
+								and message body text to make them searchable.
+							</p>
+						</div>
+						<div className="space-y-2">
+							<h4 className="font-medium">Email Metadata</h4>
+							<p className="text-sm text-muted-foreground">
+								Information like sender, recipient, date, and labels will be indexed to provide
+								better search context and filtering options.
+							</p>
+						</div>
+						<div className="space-y-2">
+							<h4 className="font-medium">Privacy & Security</h4>
+							<p className="text-sm text-muted-foreground">
+								Your emails are processed securely and stored with encryption. We only access emails
+								with read-only permissions and never modify or send emails on your behalf.
+							</p>
+						</div>
+					</CardContent>
+				</Card>
+			</motion.div>
+		</div>
+	);
+}
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
index c6ec629..2d4f2b9 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
@@ -157,11 +157,11 @@ const connectorCategories: ConnectorCategory[] = [
 				status: "available",
 			},
 			{
-				id: "gmail",
+				id: "google-gmail-connector",
 				title: "Gmail",
-				description: "Connect to your Gmail account to access emails.",
+				description: "Connect to your Gmail account to search through your emails.",
 				icon: <IconMail className="h-6 w-6" />,
-				status: "coming-soon",
+				status: "available",
 			},
 			{
 				id: "zoom",
diff --git a/surfsense_web/components/chat/ConnectorComponents.tsx b/surfsense_web/components/chat/ConnectorComponents.tsx
index 2a3cdb4..d66227e 100644
--- a/surfsense_web/components/chat/ConnectorComponents.tsx
+++ b/surfsense_web/components/chat/ConnectorComponents.tsx
@@ -7,6 +7,7 @@ import {
 	IconCalendar,
 	IconLayoutKanban,
 	IconLinkPlus,
+	IconMail,
 	IconTicket,
 } from "@tabler/icons-react";
 import {
@@ -59,6 +60,8 @@ export const getConnectorIcon = (connectorType: string) => {
 			return <IconTicket {...iconProps} />;
 		case "GOOGLE_CALENDAR_CONNECTOR":
 			return <IconCalendar {...iconProps} />;
+		case "GOOGLE_GMAIL_CONNECTOR":
+			return <IconMail {...iconProps} />;
 		case "DEEP":
 			return <Sparkles {...iconProps} />;
 		case "DEEPER":

From 69f6a0a2781442a33da014b5ad5db4f0592bd601 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 4 Aug 2025 20:26:04 +0200
Subject: [PATCH 10/14] fix scopes issues for google services

---
 .../routes/search_source_connectors_routes.py |  76 +++++++
 .../app/tasks/connectors_indexing_tasks.py    | 213 ++++++++++++------
 2 files changed, 225 insertions(+), 64 deletions(-)

diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 49f3128..d1f6108 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -41,6 +41,7 @@ from app.tasks.connector_indexers import (
     index_discord_messages,
     index_github_repos,
     index_google_calendar_events,
+    index_google_gmail_messages,
     index_jira_issues,
     index_linear_issues,
     index_notion_pages,
@@ -507,6 +508,22 @@ async def index_connector_content(
                 indexing_to,
             )
             response_message = "Google Calendar indexing started in the background."
+        elif (
+            connector.connector_type == SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR
+        ):
+            # Run indexing in background
+            logger.info(
+                f"Triggering Google Gmail indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
+            )
+            background_tasks.add_task(
+                run_google_gmail_indexing_with_new_session,
+                connector_id,
+                search_space_id,
+                str(user.id),
+                indexing_from,
+                indexing_to,
+            )
+            response_message = "Google Gmail indexing started in the background."
 
         elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
             # Run indexing in background
@@ -1113,3 +1130,62 @@ async def run_google_calendar_indexing(
             exc_info=True,
         )
         # Optionally update status in DB to indicate failure
+
+
+async def run_google_gmail_indexing_with_new_session(
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    max_messages: int,
+    days_back: int,
+):
+    """Wrapper to run Google Gmail indexing with its own database session."""
+    logger.info(
+        f"Background task started: Indexing Google Gmail connector {connector_id} into space {search_space_id} for {max_messages} messages from the last {days_back} days"
+    )
+    async with async_session_maker() as session:
+        await run_google_gmail_indexing(
+            session, connector_id, search_space_id, user_id, max_messages, days_back
+        )
+    logger.info(
+        f"Background task finished: Indexing Google Gmail connector {connector_id}"
+    )
+
+
+async def run_google_gmail_indexing(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    max_messages: int,
+    days_back: int,
+):
+    """Runs the Google Gmail indexing task and updates the timestamp."""
+    try:
+        indexed_count, error_message = await index_google_gmail_messages(
+            session,
+            connector_id,
+            search_space_id,
+            user_id,
+            max_messages,
+            days_back,
+            update_last_indexed=False,
+        )
+        if error_message:
+            logger.error(
+                f"Google Gmail indexing failed for connector {connector_id}: {error_message}"
+            )
+            # Optionally update status in DB to indicate failure
+        else:
+            logger.info(
+                f"Google Gmail indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
+            )
+            # Update the last indexed timestamp only on success
+            await update_connector_last_indexed(session, connector_id)
+            await session.commit()  # Commit timestamp update
+    except Exception as e:
+        logger.error(
+            f"Critical error in run_google_gmail_indexing for connector {connector_id}: {e}",
+            exc_info=True,
+        )
+        # Optionally update status in DB to indicate failure
diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
index 0c678b3..85007fc 100644
--- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py
+++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
@@ -3381,8 +3381,10 @@ async def index_google_gmail_messages(
     connector_id: int,
     search_space_id: int,
     user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
     max_messages: int = 100,
-    days_back: int = 30,
 ) -> tuple[int, str]:
     """
     Index Gmail messages for a specific connector.
@@ -3392,14 +3394,24 @@ async def index_google_gmail_messages(
         connector_id: ID of the Gmail connector
         search_space_id: ID of the search space
         user_id: ID of the user
+        start_date: Start date for filtering messages (YYYY-MM-DD format)
+        end_date: End date for filtering messages (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
         max_messages: Maximum number of messages to fetch (default: 100)
-        days_back: Number of days to look back (default: 30)
 
     Returns:
         Tuple of (number_of_indexed_messages, status_message)
     """
     task_logger = TaskLoggingService(session, search_space_id)
 
+    # Calculate days back based on start_date
+    if start_date:
+        try:
+            start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
+            days_back = (datetime.now() - start_date_obj).days
+        except ValueError:
+            days_back = 30  # Default to 30 days if start_date is invalid
+
     # Log task start
     log_entry = await task_logger.log_task_start(
         task_name="google_gmail_messages_indexing",
@@ -3426,8 +3438,8 @@ async def index_google_gmail_messages(
 
         if not connector:
             error_msg = f"Gmail connector with ID {connector_id} not found"
-            await task_logger.log_task_completion(
-                log_entry.id, "FAILED", error_msg, {"error_type": "ConnectorNotFound"}
+            await task_logger.log_task_failure(
+                log_entry, error_msg, {"error_type": "ConnectorNotFound"}
             )
             return 0, error_msg
 
@@ -3442,31 +3454,53 @@ async def index_google_gmail_messages(
             scopes=config_data.get("scopes", []),
         )
 
-        # Initialize Gmail connector
+        if (
+            not credentials.client_id
+            or not credentials.client_secret
+            or not credentials.refresh_token
+        ):
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Google gmail credentials not found in connector config for connector {connector_id}",
+                "Missing Google gmail credentials",
+                {"error_type": "MissingCredentials"},
+            )
+            return 0, "Google gmail credentials not found in connector config"
+
+        # Initialize Google gmail client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Google gmail client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        # Initialize Google gmail connector
         gmail_connector = GoogleGmailConnector(credentials)
 
-        # Fetch recent messages
-        logger.info(f"Fetching recent Gmail messages for connector {connector_id}")
+        # Fetch recent Google gmail messages
+        logger.info(f"Fetching recent emails for connector {connector_id}")
         messages, error = gmail_connector.get_recent_messages(
             max_results=max_messages, days_back=days_back
         )
 
         if error:
-            await task_logger.log_task_completion(
-                log_entry.id, "FAILED", f"Failed to fetch messages: {error}", {}
+            await task_logger.log_task_failure(
+                log_entry, f"Failed to fetch messages: {error}", {}
             )
             return 0, f"Failed to fetch Gmail messages: {error}"
 
         if not messages:
-            success_msg = "No Gmail messages found in the specified date range"
-            await task_logger.log_task_completion(
-                log_entry.id, "SUCCESS", success_msg, {"messages_count": 0}
+            success_msg = "No Google gmail messages found in the specified date range"
+            await task_logger.log_task_success(
+                log_entry, success_msg, {"messages_count": 0}
             )
             return 0, success_msg
 
-        logger.info(f"Found {len(messages)} Gmail messages to index")
+        logger.info(f"Found {len(messages)} Google gmail messages to index")
 
-        indexed_count = 0
+        documents_indexed = 0
+        skipped_messages = []
+        documents_skipped = 0
         for message in messages:
             try:
                 # Extract message information
@@ -3491,23 +3525,58 @@ async def index_google_gmail_messages(
                     elif name == "date":
                         date_str = value
 
-                # Check if document already exists
-                existing_doc_result = await session.execute(
-                    select(Document).filter(
-                        Document.search_space_id == search_space_id,
-                        Document.document_type == DocumentType.GOOGLE_GMAIL_CONNECTOR,
-                        Document.document_metadata["message_id"].astext == message_id,
-                    )
-                )
-                existing_doc = existing_doc_result.scalars().first()
-
-                if existing_doc:
-                    logger.info(f"Gmail message {message_id} already indexed, skipping")
+                if not message_id:
+                    logger.warning(f"Skipping message with missing ID: {subject}")
+                    skipped_messages.append(f"{subject} (missing ID)")
+                    documents_skipped += 1
                     continue
 
                 # Format message to markdown
                 markdown_content = gmail_connector.format_message_to_markdown(message)
 
+                if not markdown_content.strip():
+                    logger.warning(f"Skipping message with no content: {subject}")
+                    skipped_messages.append(f"{subject} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a simple summary
+                summary_content = f"Google Gmail Message: {subject}\n\n"
+                summary_content += f"Sender: {sender}\n"
+                summary_content += f"Date: {date_str}\n"
+
+                # Generate content hash
+                content_hash = generate_content_hash(markdown_content, search_space_id)
+
+                # Check if document already exists
+                existing_doc_by_hash_result = await session.execute(
+                    select(Document).where(Document.content_hash == content_hash)
+                )
+                existing_document_by_hash = (
+                    existing_doc_by_hash_result.scalars().first()
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for message {message_id}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks
+                chunks = [
+                    Chunk(
+                        content=chunk.text,
+                        embedding=config.embedding_model_instance.embed(chunk.text),
+                    )
+                    for chunk in config.chunker_instance.chunk(markdown_content)
+                ]
+
                 # Create and store new document
                 logger.info(f"Creating new document for Gmail message: {subject}")
                 document = Document(
@@ -3523,56 +3592,72 @@ async def index_google_gmail_messages(
                         "connector_id": connector_id,
                     },
                     content=markdown_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
                 )
                 session.add(document)
-                await session.flush()
-
-                # Create chunks for the document
-                chunks = config.chunker_instance.chunk(markdown_content)
-                for i, chunk_text in enumerate(chunks):
-                    chunk = Chunk(
-                        document_id=document.id,
-                        content=chunk_text,
-                        chunk_index=i,
-                        embedding=config.embedding_model_instance.embed_query(
-                            chunk_text
-                        ),
-                    )
-                    session.add(chunk)
-
-                indexed_count += 1
-                logger.info(f"Successfully indexed Gmail message: {subject}")
+                documents_indexed += 1
+                logger.info(f"Successfully indexed new email {summary_content}")
 
             except Exception as e:
                 logger.error(
-                    f"Error indexing Gmail message {message_id}: {e!s}", exc_info=True
+                    f"Error processing the email {message_id}: {e!s}",
+                    exc_info=True,
                 )
-                continue
+                skipped_messages.append(f"{subject} (processing error)")
+                documents_skipped += 1
+                continue  # Skip this message and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if update_last_indexed:
+            connector.last_indexed_at = datetime.now()
+            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
 
         # Commit all changes
         await session.commit()
-
-        # Update connector's last_indexed_at timestamp
-        connector.last_indexed_at = datetime.now(UTC)
-        await session.commit()
-
-        success_msg = f"Successfully indexed {indexed_count} Gmail messages"
-        await task_logger.log_task_completion(
-            log_entry.id,
-            "SUCCESS",
-            success_msg,
-            {"indexed_count": indexed_count, "total_messages": len(messages)},
+        logger.info(
+            "Successfully committed all Google gmail document changes to database"
         )
-        logger.info(success_msg)
-        return indexed_count, success_msg
 
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Google gmail indexing for connector {connector_id}",
+            {
+                "events_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_messages_count": len(skipped_messages),
+            },
+        )
+
+        logger.info(
+            f"Google gmail indexing completed: {documents_indexed} new emails, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Google gmail indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
     except Exception as e:
-        await task_logger.log_task_completion(
-            log_entry.id,
-            "FAILED",
-            f"Failed to index Gmail messages for connector {connector_id}",
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Google gmail emails for connector {connector_id}",
             str(e),
             {"error_type": type(e).__name__},
         )
-        logger.error(f"Failed to index Gmail messages: {e!s}", exc_info=True)
-        return 0, f"Failed to index Gmail messages: {e!s}"
+        logger.error(f"Failed to index Google gmail emails: {e!s}", exc_info=True)
+        return 0, f"Failed to index Google gmail emails: {e!s}"

From 308b96af8bb277ca50efedd02151b6bd9a0c0bb5 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 Aug 2025 21:16:31 +0200
Subject: [PATCH 11/14] update the gmail connector add route

---
 surfsense_backend/app/app.py                                | 6 ------
 .../app/routes/google_gmail_add_connector_route.py          | 5 ++++-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py
index 1766c0c..17f9082 100644
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@@ -1,9 +1,3 @@
-import os
-
-os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = (
-    "1"  # It is to disbale the error "Invalid token scope" when using Google OAuth with increemental scopes
-)
-
 from contextlib import asynccontextmanager
 
 from fastapi import Depends, FastAPI
diff --git a/surfsense_backend/app/routes/google_gmail_add_connector_route.py b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
index 678f43d..79b131b 100644
--- a/surfsense_backend/app/routes/google_gmail_add_connector_route.py
+++ b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
@@ -1,4 +1,7 @@
-# app/routes/google_gmail.py
+import os
+
+os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
+
 import base64
 import json
 import logging

From d840113bffded53855f497be6141e4fdec3dc290 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 8 Aug 2025 21:57:34 +0200
Subject: [PATCH 12/14] add relelvant bot suggestions

---
 surfsense_backend/app/connectors/google_gmail_connector.py    | 2 +-
 .../app/routes/google_gmail_add_connector_route.py            | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py
index 0beabf1..0e75080 100644
--- a/surfsense_backend/app/connectors/google_gmail_connector.py
+++ b/surfsense_backend/app/connectors/google_gmail_connector.py
@@ -5,6 +5,7 @@ Allows fetching emails from Gmail mailbox using Google OAuth credentials.
 """
 
 import base64
+import re
 from typing import Any
 
 from google.auth.transport.requests import Request
@@ -261,7 +262,6 @@ class GoogleGmailConnector:
                         "utf-8", errors="ignore"
                     )
                     # Basic HTML tag removal (you might want to use a proper HTML parser)
-                    import re
 
                     text_content = re.sub(r"<[^>]+>", "", decoded_data)
 
diff --git a/surfsense_backend/app/routes/google_gmail_add_connector_route.py b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
index 79b131b..01c6e7f 100644
--- a/surfsense_backend/app/routes/google_gmail_add_connector_route.py
+++ b/surfsense_backend/app/routes/google_gmail_add_connector_route.py
@@ -157,7 +157,3 @@ async def gmail_callback(
         raise
     except Exception as e:
         logger.error(f"Unexpected error in Gmail callback: {e!s}", exc_info=True)
-        # Redirect to frontend with error
-        return RedirectResponse(
-            url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/add/google-gmail-connector?error=auth_failed"
-        )

From 869f848179bbcdba424309f8a76dd20b953cef9b Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Mon, 11 Aug 2025 20:49:44 +0200
Subject: [PATCH 13/14] fix merge conflits

---
 .../documents/(manage)/components/DocumentTypeIcon.tsx          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
index e0cc12b..5273fa5 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon.tsx
@@ -10,6 +10,7 @@ import {
 	IconCalendar,
 	IconChecklist,
 	IconLayoutKanban,
+	IconMail,
 	IconTicket,
 } from "@tabler/icons-react";
 import { File, Globe, Webhook } from "lucide-react";
@@ -31,6 +32,7 @@ const documentTypeIcons: Record<string, IconComponent> = {
 	CONFLUENCE_CONNECTOR: IconBook,
 	CLICKUP_CONNECTOR: IconChecklist,
 	GOOGLE_CALENDAR_CONNECTOR: IconCalendar,
+	GOOGLE_GMAIL_CONNECTOR: IconMail,
 };
 
 export function getDocumentTypeIcon(type: string): IconComponent {

From 089c9d1625b04f058b44b7d205b9027492d580ad Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Fri, 15 Aug 2025 10:11:50 +0200
Subject: [PATCH 14/14] use new indexer files structureclear

---
 .../app/tasks/connector_indexers/__init__.py  |    5 +-
 .../google_gmail_indexer.py                   |  299 ++
 .../app/tasks/connectors_indexing_tasks.py    | 3663 -----------------
 3 files changed, 303 insertions(+), 3664 deletions(-)
 create mode 100644 surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
 delete mode 100644 surfsense_backend/app/tasks/connectors_indexing_tasks.py

diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py
index 048a136..7befa59 100644
--- a/surfsense_backend/app/tasks/connector_indexers/__init__.py
+++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py
@@ -14,6 +14,7 @@ Available indexers:
 - Confluence: Index pages from Confluence spaces
 - Discord: Index messages from Discord servers
 - ClickUp: Index tasks from ClickUp workspaces
+- Google Gmail: Index messages from Google Gmail
 - Google Calendar: Index events from Google Calendar
 """
 
@@ -27,6 +28,7 @@ from .github_indexer import index_github_repos
 
 # Calendar and scheduling
 from .google_calendar_indexer import index_google_calendar_events
+from .google_gmail_indexer import index_google_gmail_messages
 from .jira_indexer import index_jira_issues
 
 # Issue tracking and project management
@@ -36,7 +38,7 @@ from .linear_indexer import index_linear_issues
 from .notion_indexer import index_notion_pages
 from .slack_indexer import index_slack_messages
 
-__all__ = [
+__all__ = [  # noqa: RUF022
     "index_clickup_tasks",
     "index_confluence_pages",
     "index_discord_messages",
@@ -51,4 +53,5 @@ __all__ = [
     "index_notion_pages",
     # Communication platforms
     "index_slack_messages",
+    "index_google_gmail_messages",
 ]
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
new file mode 100644
index 0000000..68e29e3
--- /dev/null
+++ b/surfsense_backend/app/tasks/connector_indexers/google_gmail_indexer.py
@@ -0,0 +1,299 @@
+"""
+Google Gmail connector indexer.
+"""
+
+from datetime import datetime
+
+from google.oauth2.credentials import Credentials
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.config import config
+from app.connectors.google_gmail_connector import GoogleGmailConnector
+from app.db import (
+    Document,
+    DocumentType,
+    SearchSourceConnectorType,
+)
+from app.services.task_logging_service import TaskLoggingService
+from app.utils.document_converters import generate_content_hash
+
+from .base import (
+    check_duplicate_document_by_hash,
+    create_document_chunks,
+    get_connector_by_id,
+    logger,
+    update_connector_last_indexed,
+)
+
+
+async def index_google_gmail_messages(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    start_date: str | None = None,
+    end_date: str | None = None,
+    update_last_indexed: bool = True,
+    max_messages: int = 100,
+) -> tuple[int, str]:
+    """
+    Index Gmail messages for a specific connector.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Gmail connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        start_date: Start date for filtering messages (YYYY-MM-DD format)
+        end_date: End date for filtering messages (YYYY-MM-DD format)
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+        max_messages: Maximum number of messages to fetch (default: 100)
+
+    Returns:
+        Tuple of (number_of_indexed_messages, status_message)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Calculate days back based on start_date
+    if start_date:
+        try:
+            start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
+            days_back = (datetime.now() - start_date_obj).days
+        except ValueError:
+            days_back = 30  # Default to 30 days if start_date is invalid
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="google_gmail_messages_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Gmail messages indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "max_messages": max_messages,
+            "days_back": days_back,
+        },
+    )
+
+    try:
+        # Get connector by id
+        connector = await get_connector_by_id(
+            session, connector_id, SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR
+        )
+
+        if not connector:
+            error_msg = f"Gmail connector with ID {connector_id} not found"
+            await task_logger.log_task_failure(
+                log_entry, error_msg, {"error_type": "ConnectorNotFound"}
+            )
+            return 0, error_msg
+
+        # Create credentials from connector config
+        config_data = connector.config
+        credentials = Credentials(
+            token=config_data.get("token"),
+            refresh_token=config_data.get("refresh_token"),
+            token_uri=config_data.get("token_uri"),
+            client_id=config_data.get("client_id"),
+            client_secret=config_data.get("client_secret"),
+            scopes=config_data.get("scopes", []),
+        )
+
+        if (
+            not credentials.client_id
+            or not credentials.client_secret
+            or not credentials.refresh_token
+        ):
+            await task_logger.log_task_failure(
+                log_entry,
+                f"Google gmail credentials not found in connector config for connector {connector_id}",
+                "Missing Google gmail credentials",
+                {"error_type": "MissingCredentials"},
+            )
+            return 0, "Google gmail credentials not found in connector config"
+
+        # Initialize Google gmail client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Google gmail client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        # Initialize Google gmail connector
+        gmail_connector = GoogleGmailConnector(credentials)
+
+        # Fetch recent Google gmail messages
+        logger.info(f"Fetching recent emails for connector {connector_id}")
+        messages, error = gmail_connector.get_recent_messages(
+            max_results=max_messages, days_back=days_back
+        )
+
+        if error:
+            await task_logger.log_task_failure(
+                log_entry, f"Failed to fetch messages: {error}", {}
+            )
+            return 0, f"Failed to fetch Gmail messages: {error}"
+
+        if not messages:
+            success_msg = "No Google gmail messages found in the specified date range"
+            await task_logger.log_task_success(
+                log_entry, success_msg, {"messages_count": 0}
+            )
+            return 0, success_msg
+
+        logger.info(f"Found {len(messages)} Google gmail messages to index")
+
+        documents_indexed = 0
+        skipped_messages = []
+        documents_skipped = 0
+        for message in messages:
+            try:
+                # Extract message information
+                message_id = message.get("id", "")
+                thread_id = message.get("threadId", "")
+
+                # Extract headers for subject and sender
+                payload = message.get("payload", {})
+                headers = payload.get("headers", [])
+
+                subject = "No Subject"
+                sender = "Unknown Sender"
+                date_str = "Unknown Date"
+
+                for header in headers:
+                    name = header.get("name", "").lower()
+                    value = header.get("value", "")
+                    if name == "subject":
+                        subject = value
+                    elif name == "from":
+                        sender = value
+                    elif name == "date":
+                        date_str = value
+
+                if not message_id:
+                    logger.warning(f"Skipping message with missing ID: {subject}")
+                    skipped_messages.append(f"{subject} (missing ID)")
+                    documents_skipped += 1
+                    continue
+
+                # Format message to markdown
+                markdown_content = gmail_connector.format_message_to_markdown(message)
+
+                if not markdown_content.strip():
+                    logger.warning(f"Skipping message with no content: {subject}")
+                    skipped_messages.append(f"{subject} (no content)")
+                    documents_skipped += 1
+                    continue
+
+                # Create a simple summary
+                summary_content = f"Google Gmail Message: {subject}\n\n"
+                summary_content += f"Sender: {sender}\n"
+                summary_content += f"Date: {date_str}\n"
+
+                # Generate content hash
+                content_hash = generate_content_hash(markdown_content, search_space_id)
+
+                # Check if document already exists
+                existing_document_by_hash = await check_duplicate_document_by_hash(
+                    session, content_hash
+                )
+
+                if existing_document_by_hash:
+                    logger.info(
+                        f"Document with content hash {content_hash} already exists for message {message_id}. Skipping processing."
+                    )
+                    documents_skipped += 1
+                    continue
+
+                # Generate embedding for the summary
+                summary_embedding = config.embedding_model_instance.embed(
+                    summary_content
+                )
+
+                # Process chunks
+                chunks = await create_document_chunks(markdown_content)
+
+                # Create and store new document
+                logger.info(f"Creating new document for Gmail message: {subject}")
+                document = Document(
+                    search_space_id=search_space_id,
+                    title=f"Gmail: {subject}",
+                    document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
+                    document_metadata={
+                        "message_id": message_id,
+                        "thread_id": thread_id,
+                        "subject": subject,
+                        "sender": sender,
+                        "date": date_str,
+                        "connector_id": connector_id,
+                    },
+                    content=markdown_content,
+                    content_hash=content_hash,
+                    embedding=summary_embedding,
+                    chunks=chunks,
+                )
+                session.add(document)
+                documents_indexed += 1
+                logger.info(f"Successfully indexed new email {summary_content}")
+
+            except Exception as e:
+                logger.error(
+                    f"Error processing the email {message_id}: {e!s}",
+                    exc_info=True,
+                )
+                skipped_messages.append(f"{subject} (processing error)")
+                documents_skipped += 1
+                continue  # Skip this message and continue with others
+
+        # Update the last_indexed_at timestamp for the connector only if requested
+        total_processed = documents_indexed
+        if total_processed > 0:
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+
+        # Commit all changes
+        await session.commit()
+        logger.info(
+            "Successfully committed all Google gmail document changes to database"
+        )
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Google gmail indexing for connector {connector_id}",
+            {
+                "events_processed": total_processed,
+                "documents_indexed": documents_indexed,
+                "documents_skipped": documents_skipped,
+                "skipped_messages_count": len(skipped_messages),
+            },
+        )
+
+        logger.info(
+            f"Google gmail indexing completed: {documents_indexed} new emails, {documents_skipped} skipped"
+        )
+        return (
+            total_processed,
+            None,
+        )  # Return None as the error message to indicate success
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Google gmail indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Google gmail emails for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Google gmail emails: {e!s}", exc_info=True)
+        return 0, f"Failed to index Google gmail emails: {e!s}"
diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
deleted file mode 100644
index 85007fc..0000000
--- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py
+++ /dev/null
@@ -1,3663 +0,0 @@
-import asyncio
-import logging
-from datetime import UTC, datetime, timedelta
-
-from google.oauth2.credentials import Credentials
-from slack_sdk.errors import SlackApiError
-from sqlalchemy.exc import SQLAlchemyError
-from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.future import select
-
-from app.config import config
-from app.connectors.clickup_connector import ClickUpConnector
-from app.connectors.confluence_connector import ConfluenceConnector
-from app.connectors.discord_connector import DiscordConnector
-from app.connectors.github_connector import GitHubConnector
-from app.connectors.google_calendar_connector import GoogleCalendarConnector
-from app.connectors.google_gmail_connector import GoogleGmailConnector
-from app.connectors.jira_connector import JiraConnector
-from app.connectors.linear_connector import LinearConnector
-from app.connectors.notion_history import NotionHistoryConnector
-from app.connectors.slack_history import SlackHistory
-from app.db import (
-    Chunk,
-    Document,
-    DocumentType,
-    SearchSourceConnector,
-    SearchSourceConnectorType,
-)
-from app.prompts import SUMMARY_PROMPT_TEMPLATE
-from app.services.llm_service import get_user_long_context_llm
-from app.services.task_logging_service import TaskLoggingService
-from app.utils.document_converters import generate_content_hash
-
-# Set up logging
-logger = logging.getLogger(__name__)
-
-
-async def index_slack_messages(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Slack messages from all accessible channels.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Slack connector
-        search_space_id: ID of the search space to store documents in
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="slack_messages_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Slack messages indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving Slack connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.SLACK_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a Slack connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not a Slack connector",
-            )
-
-        # Get the Slack token from the connector config
-        slack_token = connector.config.get("SLACK_BOT_TOKEN")
-        if not slack_token:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Slack token not found in connector config for connector {connector_id}",
-                "Missing Slack token",
-                {"error_type": "MissingToken"},
-            )
-            return 0, "Slack token not found in connector config"
-
-        # Initialize Slack client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Slack client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        slack_client = SlackHistory(token=slack_token)
-
-        # Calculate date range
-        await task_logger.log_task_progress(
-            log_entry,
-            "Calculating date range for Slack indexing",
-            {
-                "stage": "date_calculation",
-                "provided_start_date": start_date,
-                "provided_end_date": end_date,
-            },
-        )
-
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now()
-
-            # Use last_indexed_at as start date if available, otherwise use 365 days ago
-            if connector.last_indexed_at:
-                # Convert dates to be comparable (both timezone-naive)
-                last_indexed_naive = (
-                    connector.last_indexed_at.replace(tzinfo=None)
-                    if connector.last_indexed_at.tzinfo
-                    else connector.last_indexed_at
-                )
-
-                # Check if last_indexed_at is in the future or after end_date
-                if last_indexed_naive > calculated_end_date:
-                    logger.warning(
-                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
-                    )
-                    calculated_start_date = calculated_end_date - timedelta(days=365)
-                else:
-                    calculated_start_date = last_indexed_naive
-                    logger.info(
-                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                    )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(
-                    days=365
-                )  # Use 365 days as default
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided
-            start_date_str = (
-                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
-            )
-            end_date_str = (
-                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
-            )
-        else:
-            # Use provided dates
-            start_date_str = start_date
-            end_date_str = end_date
-
-        logger.info(f"Indexing Slack messages from {start_date_str} to {end_date_str}")
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Slack channels from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetch_channels",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        # Get all channels
-        try:
-            channels = slack_client.get_all_channels()
-        except Exception as e:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Failed to get Slack channels for connector {connector_id}",
-                str(e),
-                {"error_type": "ChannelFetchError"},
-            )
-            return 0, f"Failed to get Slack channels: {e!s}"
-
-        if not channels:
-            await task_logger.log_task_success(
-                log_entry,
-                f"No Slack channels found for connector {connector_id}",
-                {"channels_found": 0},
-            )
-            return 0, "No Slack channels found"
-
-        # Track the number of documents indexed
-        documents_indexed = 0
-        documents_skipped = 0
-        skipped_channels = []
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Starting to process {len(channels)} Slack channels",
-            {"stage": "process_channels", "total_channels": len(channels)},
-        )
-
-        # Process each channel
-        for (
-            channel_obj
-        ) in channels:  # Modified loop to iterate over list of channel objects
-            channel_id = channel_obj["id"]
-            channel_name = channel_obj["name"]
-            is_private = channel_obj["is_private"]
-            is_member = channel_obj[
-                "is_member"
-            ]  # This might be False for public channels too
-
-            try:
-                # If it's a private channel and the bot is not a member, skip.
-                # For public channels, if they are listed by conversations.list, the bot can typically read history.
-                # The `not_in_channel` error in get_conversation_history will be the ultimate gatekeeper if history is inaccessible.
-                if is_private and not is_member:
-                    logger.warning(
-                        f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping."
-                    )
-                    skipped_channels.append(
-                        f"{channel_name} (private, bot not a member)"
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Get messages for this channel
-                # The get_history_by_date_range now uses get_conversation_history,
-                # which handles 'not_in_channel' by returning [] and logging.
-                messages, error = slack_client.get_history_by_date_range(
-                    channel_id=channel_id,
-                    start_date=start_date_str,
-                    end_date=end_date_str,
-                    limit=1000,  # Limit to 1000 messages per channel
-                )
-
-                if error:
-                    logger.warning(
-                        f"Error getting messages from channel {channel_name}: {error}"
-                    )
-                    skipped_channels.append(f"{channel_name} (error: {error})")
-                    documents_skipped += 1
-                    continue  # Skip this channel if there's an error
-
-                if not messages:
-                    logger.info(
-                        f"No messages found in channel {channel_name} for the specified date range."
-                    )
-                    documents_skipped += 1
-                    continue  # Skip if no messages
-
-                # Format messages with user info
-                formatted_messages = []
-                for msg in messages:
-                    # Skip bot messages and system messages
-                    if msg.get("subtype") in [
-                        "bot_message",
-                        "channel_join",
-                        "channel_leave",
-                    ]:
-                        continue
-
-                    formatted_msg = slack_client.format_message(
-                        msg, include_user_info=True
-                    )
-                    formatted_messages.append(formatted_msg)
-
-                if not formatted_messages:
-                    logger.info(
-                        f"No valid messages found in channel {channel_name} after filtering."
-                    )
-                    documents_skipped += 1
-                    continue  # Skip if no valid messages after filtering
-
-                # Convert messages to markdown format
-                channel_content = f"# Slack Channel: {channel_name}\n\n"
-
-                for msg in formatted_messages:
-                    user_name = msg.get("user_name", "Unknown User")
-                    timestamp = msg.get("datetime", "Unknown Time")
-                    text = msg.get("text", "")
-
-                    channel_content += (
-                        f"## {user_name} ({timestamp})\n\n{text}\n\n---\n\n"
-                    )
-
-                # Format document metadata
-                metadata_sections = [
-                    (
-                        "METADATA",
-                        [
-                            f"CHANNEL_NAME: {channel_name}",
-                            f"CHANNEL_ID: {channel_id}",
-                            # f"START_DATE: {start_date_str}",
-                            # f"END_DATE: {end_date_str}",
-                            f"MESSAGE_COUNT: {len(formatted_messages)}",
-                        ],
-                    ),
-                    (
-                        "CONTENT",
-                        ["FORMAT: markdown", "TEXT_START", channel_content, "TEXT_END"],
-                    ),
-                ]
-
-                # Build the document string
-                document_parts = []
-                document_parts.append("<DOCUMENT>")
-
-                for section_title, section_content in metadata_sections:
-                    document_parts.append(f"<{section_title}>")
-                    document_parts.extend(section_content)
-                    document_parts.append(f"</{section_title}>")
-
-                document_parts.append("</DOCUMENT>")
-                combined_document_string = "\n".join(document_parts)
-                content_hash = generate_content_hash(
-                    combined_document_string, search_space_id
-                )
-
-                # Check if document with this content hash already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for channel {channel_name}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Get user's long context LLM
-                user_llm = await get_user_long_context_llm(session, user_id)
-                if not user_llm:
-                    logger.error(f"No long context LLM configured for user {user_id}")
-                    skipped_channels.append(f"{channel_name} (no LLM configured)")
-                    documents_skipped += 1
-                    continue
-
-                # Generate summary
-                summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
-                summary_result = await summary_chain.ainvoke(
-                    {"document": combined_document_string}
-                )
-                summary_content = summary_result.content
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(channel_content)
-                ]
-
-                # Create and store new document
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Slack - {channel_name}",
-                    document_type=DocumentType.SLACK_CONNECTOR,
-                    document_metadata={
-                        "channel_name": channel_name,
-                        "channel_id": channel_id,
-                        "start_date": start_date_str,
-                        "end_date": end_date_str,
-                        "message_count": len(formatted_messages),
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                    content_hash=content_hash,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new channel {channel_name} with {len(formatted_messages)} messages"
-                )
-
-            except SlackApiError as slack_error:
-                logger.error(
-                    f"Slack API error for channel {channel_name}: {slack_error!s}"
-                )
-                skipped_channels.append(f"{channel_name} (Slack API error)")
-                documents_skipped += 1
-                continue  # Skip this channel and continue with others
-            except Exception as e:
-                logger.error(f"Error processing channel {channel_name}: {e!s}")
-                skipped_channels.append(f"{channel_name} (processing error)")
-                documents_skipped += 1
-                continue  # Skip this channel and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one channel
-        total_processed = documents_indexed
-        if update_last_indexed and total_processed > 0:
-            connector.last_indexed_at = datetime.now()
-
-        # Commit all changes
-        await session.commit()
-
-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = f"Processed {total_processed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {total_processed} channels."
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Slack indexing for connector {connector_id}",
-            {
-                "channels_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_channels_count": len(skipped_channels),
-                "result_message": result_message,
-            },
-        )
-
-        logger.info(
-            f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
-        )
-        return total_processed, result_message
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Slack indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}")
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Slack messages for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Slack messages: {e!s}")
-        return 0, f"Failed to index Slack messages: {e!s}"
-
-
-async def index_notion_pages(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Notion pages from all accessible pages.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Notion connector
-        search_space_id: ID of the search space to store documents in
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="notion_pages_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Notion pages indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving Notion connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.NOTION_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a Notion connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not a Notion connector",
-            )
-
-        # Get the Notion token from the connector config
-        notion_token = connector.config.get("NOTION_INTEGRATION_TOKEN")
-        if not notion_token:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Notion integration token not found in connector config for connector {connector_id}",
-                "Missing Notion token",
-                {"error_type": "MissingToken"},
-            )
-            return 0, "Notion integration token not found in connector config"
-
-        # Initialize Notion client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Notion client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        logger.info(f"Initializing Notion client for connector {connector_id}")
-        notion_client = NotionHistoryConnector(token=notion_token)
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates
-            calculated_end_date = datetime.now()
-            calculated_start_date = calculated_end_date - timedelta(
-                days=365
-            )  # Check for last 1 year of pages
-
-            # Use calculated dates if not provided
-            if start_date is None:
-                start_date_iso = calculated_start_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            else:
-                # Convert YYYY-MM-DD to ISO format
-                start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime(
-                    "%Y-%m-%dT%H:%M:%SZ"
-                )
-
-            if end_date is None:
-                end_date_iso = calculated_end_date.strftime("%Y-%m-%dT%H:%M:%SZ")
-            else:
-                # Convert YYYY-MM-DD to ISO format
-                end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime(
-                    "%Y-%m-%dT%H:%M:%SZ"
-                )
-        else:
-            # Convert provided dates to ISO format for Notion API
-            start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime(
-                "%Y-%m-%dT%H:%M:%SZ"
-            )
-            end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime(
-                "%Y-%m-%dT%H:%M:%SZ"
-            )
-
-        logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}")
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Notion pages from {start_date_iso} to {end_date_iso}",
-            {
-                "stage": "fetch_pages",
-                "start_date": start_date_iso,
-                "end_date": end_date_iso,
-            },
-        )
-
-        # Get all pages
-        try:
-            pages = notion_client.get_all_pages(
-                start_date=start_date_iso, end_date=end_date_iso
-            )
-            logger.info(f"Found {len(pages)} Notion pages")
-        except Exception as e:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Failed to get Notion pages for connector {connector_id}",
-                str(e),
-                {"error_type": "PageFetchError"},
-            )
-            logger.error(f"Error fetching Notion pages: {e!s}", exc_info=True)
-            return 0, f"Failed to get Notion pages: {e!s}"
-
-        if not pages:
-            await task_logger.log_task_success(
-                log_entry,
-                f"No Notion pages found for connector {connector_id}",
-                {"pages_found": 0},
-            )
-            logger.info("No Notion pages found to index")
-            return 0, "No Notion pages found"
-
-        # Track the number of documents indexed
-        documents_indexed = 0
-        documents_skipped = 0
-        skipped_pages = []
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Starting to process {len(pages)} Notion pages",
-            {"stage": "process_pages", "total_pages": len(pages)},
-        )
-
-        # Process each page
-        for page in pages:
-            try:
-                page_id = page.get("page_id")
-                page_title = page.get("title", f"Untitled page ({page_id})")
-                page_content = page.get("content", [])
-
-                logger.info(f"Processing Notion page: {page_title} ({page_id})")
-
-                if not page_content:
-                    logger.info(f"No content found in page {page_title}. Skipping.")
-                    skipped_pages.append(f"{page_title} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Convert page content to markdown format
-                markdown_content = f"# Notion Page: {page_title}\n\n"
-
-                # Process blocks recursively
-                def process_blocks(blocks, level=0):
-                    result = ""
-                    for block in blocks:
-                        block_type = block.get("type")
-                        block_content = block.get("content", "")
-                        children = block.get("children", [])
-
-                        # Add indentation based on level
-                        indent = "  " * level
-
-                        # Format based on block type
-                        if block_type in ["paragraph", "text"]:
-                            result += f"{indent}{block_content}\n\n"
-                        elif block_type in ["heading_1", "header"]:
-                            result += f"{indent}# {block_content}\n\n"
-                        elif block_type == "heading_2":
-                            result += f"{indent}## {block_content}\n\n"
-                        elif block_type == "heading_3":
-                            result += f"{indent}### {block_content}\n\n"
-                        elif block_type == "bulleted_list_item":
-                            result += f"{indent}* {block_content}\n"
-                        elif block_type == "numbered_list_item":
-                            result += f"{indent}1. {block_content}\n"
-                        elif block_type == "to_do":
-                            result += f"{indent}- [ ] {block_content}\n"
-                        elif block_type == "toggle":
-                            result += f"{indent}> {block_content}\n"
-                        elif block_type == "code":
-                            result += f"{indent}```\n{block_content}\n```\n\n"
-                        elif block_type == "quote":
-                            result += f"{indent}> {block_content}\n\n"
-                        elif block_type == "callout":
-                            result += f"{indent}> **Note:** {block_content}\n\n"
-                        elif block_type == "image":
-                            result += f"{indent}![Image]({block_content})\n\n"
-                        else:
-                            # Default for other block types
-                            if block_content:
-                                result += f"{indent}{block_content}\n\n"
-
-                        # Process children recursively
-                        if children:
-                            result += process_blocks(children, level + 1)
-
-                    return result
-
-                logger.debug(
-                    f"Converting {len(page_content)} blocks to markdown for page {page_title}"
-                )
-                markdown_content += process_blocks(page_content)
-
-                # Format document metadata
-                metadata_sections = [
-                    ("METADATA", [f"PAGE_TITLE: {page_title}", f"PAGE_ID: {page_id}"]),
-                    (
-                        "CONTENT",
-                        [
-                            "FORMAT: markdown",
-                            "TEXT_START",
-                            markdown_content,
-                            "TEXT_END",
-                        ],
-                    ),
-                ]
-
-                # Build the document string
-                document_parts = []
-                document_parts.append("<DOCUMENT>")
-
-                for section_title, section_content in metadata_sections:
-                    document_parts.append(f"<{section_title}>")
-                    document_parts.extend(section_content)
-                    document_parts.append(f"</{section_title}>")
-
-                document_parts.append("</DOCUMENT>")
-                combined_document_string = "\n".join(document_parts)
-                content_hash = generate_content_hash(
-                    combined_document_string, search_space_id
-                )
-
-                # Check if document with this content hash already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Get user's long context LLM
-                user_llm = await get_user_long_context_llm(session, user_id)
-                if not user_llm:
-                    logger.error(f"No long context LLM configured for user {user_id}")
-                    skipped_pages.append(f"{page_title} (no LLM configured)")
-                    documents_skipped += 1
-                    continue
-
-                # Generate summary
-                logger.debug(f"Generating summary for page {page_title}")
-                summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
-                summary_result = await summary_chain.ainvoke(
-                    {"document": combined_document_string}
-                )
-                summary_content = summary_result.content
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks
-                logger.debug(f"Chunking content for page {page_title}")
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(markdown_content)
-                ]
-
-                # Create and store new document
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Notion - {page_title}",
-                    document_type=DocumentType.NOTION_CONNECTOR,
-                    document_metadata={
-                        "page_title": page_title,
-                        "page_id": page_id,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new Notion page: {page_title}")
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing Notion page {page.get('title', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        # and if we successfully indexed at least one page
-        total_processed = documents_indexed
-        if update_last_indexed and total_processed > 0:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at for connector {connector_id}")
-
-        # Commit all changes
-        await session.commit()
-
-        # Prepare result message
-        result_message = None
-        if skipped_pages:
-            result_message = f"Processed {total_processed} pages. Skipped {len(skipped_pages)} pages: {', '.join(skipped_pages)}"
-        else:
-            result_message = f"Processed {total_processed} pages."
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Notion indexing for connector {connector_id}",
-            {
-                "pages_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_pages_count": len(skipped_pages),
-                "result_message": result_message,
-            },
-        )
-
-        logger.info(
-            f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
-        )
-        return total_processed, result_message
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Notion indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(
-            f"Database error during Notion indexing: {db_error!s}", exc_info=True
-        )
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Notion pages for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Notion pages: {e!s}", exc_info=True)
-        return 0, f"Failed to index Notion pages: {e!s}"
-
-
-async def index_github_repos(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index code and documentation files from accessible GitHub repositories.
-
-    Args:
-        session: Database session
-        connector_id: ID of the GitHub connector
-        search_space_id: ID of the search space to store documents in
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="github_repos_indexing",
-        source="connector_indexing_task",
-        message=f"Starting GitHub repositories indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    documents_processed = 0
-    errors = []
-
-    try:
-        # 1. Get the GitHub connector from the database
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving GitHub connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.GITHUB_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a GitHub connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not a GitHub connector",
-            )
-
-        # 2. Get the GitHub PAT and selected repositories from the connector config
-        github_pat = connector.config.get("GITHUB_PAT")
-        repo_full_names_to_index = connector.config.get("repo_full_names")
-
-        if not github_pat:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"GitHub Personal Access Token (PAT) not found in connector config for connector {connector_id}",
-                "Missing GitHub PAT",
-                {"error_type": "MissingToken"},
-            )
-            return 0, "GitHub Personal Access Token (PAT) not found in connector config"
-
-        if not repo_full_names_to_index or not isinstance(
-            repo_full_names_to_index, list
-        ):
-            await task_logger.log_task_failure(
-                log_entry,
-                f"'repo_full_names' not found or is not a list in connector config for connector {connector_id}",
-                "Invalid repo configuration",
-                {"error_type": "InvalidConfiguration"},
-            )
-            return 0, "'repo_full_names' not found or is not a list in connector config"
-
-        # 3. Initialize GitHub connector client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing GitHub client for connector {connector_id}",
-            {
-                "stage": "client_initialization",
-                "repo_count": len(repo_full_names_to_index),
-            },
-        )
-
-        try:
-            github_client = GitHubConnector(token=github_pat)
-        except ValueError as e:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Failed to initialize GitHub client for connector {connector_id}",
-                str(e),
-                {"error_type": "ClientInitializationError"},
-            )
-            return 0, f"Failed to initialize GitHub client: {e!s}"
-
-        # 4. Validate selected repositories
-        #    For simplicity, we'll proceed with the list provided.
-        #    If a repo is inaccessible, get_repository_files will likely fail gracefully later.
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Starting indexing for {len(repo_full_names_to_index)} selected repositories",
-            {
-                "stage": "repo_processing",
-                "repo_count": len(repo_full_names_to_index),
-                "start_date": start_date,
-                "end_date": end_date,
-            },
-        )
-
-        logger.info(
-            f"Starting indexing for {len(repo_full_names_to_index)} selected repositories."
-        )
-        if start_date and end_date:
-            logger.info(
-                f"Date range requested: {start_date} to {end_date} (Note: GitHub indexing processes all files regardless of dates)"
-            )
-
-        # 6. Iterate through selected repositories and index files
-        for repo_full_name in repo_full_names_to_index:
-            if not repo_full_name or not isinstance(repo_full_name, str):
-                logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
-                continue
-
-            logger.info(f"Processing repository: {repo_full_name}")
-            try:
-                files_to_index = github_client.get_repository_files(repo_full_name)
-                if not files_to_index:
-                    logger.info(
-                        f"No indexable files found in repository: {repo_full_name}"
-                    )
-                    continue
-
-                logger.info(
-                    f"Found {len(files_to_index)} files to process in {repo_full_name}"
-                )
-
-                for file_info in files_to_index:
-                    file_path = file_info.get("path")
-                    file_url = file_info.get("url")
-                    file_sha = file_info.get("sha")
-                    file_type = file_info.get("type")  # 'code' or 'doc'
-                    full_path_key = f"{repo_full_name}/{file_path}"
-
-                    if not file_path or not file_url or not file_sha:
-                        logger.warning(
-                            f"Skipping file with missing info in {repo_full_name}: {file_info}"
-                        )
-                        continue
-
-                    # Get file content
-                    file_content = github_client.get_file_content(
-                        repo_full_name, file_path
-                    )
-
-                    if file_content is None:
-                        logger.warning(
-                            f"Could not retrieve content for {full_path_key}. Skipping."
-                        )
-                        continue  # Skip if content fetch failed
-
-                    content_hash = generate_content_hash(file_content, search_space_id)
-
-                    # Check if document with this content hash already exists
-                    existing_doc_by_hash_result = await session.execute(
-                        select(Document).where(Document.content_hash == content_hash)
-                    )
-                    existing_document_by_hash = (
-                        existing_doc_by_hash_result.scalars().first()
-                    )
-
-                    if existing_document_by_hash:
-                        logger.info(
-                            f"Document with content hash {content_hash} already exists for file {full_path_key}. Skipping processing."
-                        )
-                        continue
-
-                    # Use file_content directly for chunking, maybe summary for main content?
-                    # For now, let's use the full content for both, might need refinement
-                    summary_content = f"GitHub file: {full_path_key}\n\n{file_content[:1000]}..."  # Simple summary
-                    summary_embedding = config.embedding_model_instance.embed(
-                        summary_content
-                    )
-
-                    # Chunk the content
-                    try:
-                        chunks_data = [
-                            Chunk(
-                                content=chunk.text,
-                                embedding=config.embedding_model_instance.embed(
-                                    chunk.text
-                                ),
-                            )
-                            for chunk in config.code_chunker_instance.chunk(
-                                file_content
-                            )
-                        ]
-                    except Exception as chunk_err:
-                        logger.error(
-                            f"Failed to chunk file {full_path_key}: {chunk_err}"
-                        )
-                        errors.append(
-                            f"Chunking failed for {full_path_key}: {chunk_err}"
-                        )
-                        continue  # Skip this file if chunking fails
-
-                    doc_metadata = {
-                        "repository_full_name": repo_full_name,
-                        "file_path": file_path,
-                        "full_path": full_path_key,  # For easier lookup
-                        "url": file_url,
-                        "sha": file_sha,
-                        "type": file_type,
-                        "indexed_at": datetime.now(UTC).isoformat(),
-                    }
-
-                    # Create new document
-                    logger.info(f"Creating new document for file: {full_path_key}")
-                    document = Document(
-                        title=f"GitHub - {file_path}",
-                        document_type=DocumentType.GITHUB_CONNECTOR,
-                        document_metadata=doc_metadata,
-                        content=summary_content,  # Store summary
-                        content_hash=content_hash,
-                        embedding=summary_embedding,
-                        search_space_id=search_space_id,
-                        chunks=chunks_data,  # Associate chunks directly
-                    )
-                    session.add(document)
-                    documents_processed += 1
-
-            except Exception as repo_err:
-                logger.error(
-                    f"Failed to process repository {repo_full_name}: {repo_err}"
-                )
-                errors.append(f"Failed processing {repo_full_name}: {repo_err}")
-
-        # Commit all changes at the end
-        await session.commit()
-        logger.info(
-            f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files."
-        )
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed GitHub indexing for connector {connector_id}",
-            {
-                "documents_processed": documents_processed,
-                "errors_count": len(errors),
-                "repo_count": len(repo_full_names_to_index),
-            },
-        )
-
-    except SQLAlchemyError as db_err:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during GitHub indexing for connector {connector_id}",
-            str(db_err),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(
-            f"Database error during GitHub indexing for connector {connector_id}: {db_err}"
-        )
-        errors.append(f"Database error: {db_err}")
-        return documents_processed, "; ".join(errors) if errors else str(db_err)
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Unexpected error during GitHub indexing for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(
-            f"Unexpected error during GitHub indexing for connector {connector_id}: {e}",
-            exc_info=True,
-        )
-        errors.append(f"Unexpected error: {e}")
-        return documents_processed, "; ".join(errors) if errors else str(e)
-
-    error_message = "; ".join(errors) if errors else None
-    return documents_processed, error_message
-
-
-async def index_linear_issues(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Linear issues and comments.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Linear connector
-        search_space_id: ID of the search space to store documents in
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="linear_issues_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Linear issues indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving Linear connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.LINEAR_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a Linear connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not a Linear connector",
-            )
-
-        # Get the Linear token from the connector config
-        linear_token = connector.config.get("LINEAR_API_KEY")
-        if not linear_token:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Linear API token not found in connector config for connector {connector_id}",
-                "Missing Linear token",
-                {"error_type": "MissingToken"},
-            )
-            return 0, "Linear API token not found in connector config"
-
-        # Initialize Linear client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Linear client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        linear_client = LinearConnector(token=linear_token)
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now()
-
-            # Use last_indexed_at as start date if available, otherwise use 365 days ago
-            if connector.last_indexed_at:
-                # Convert dates to be comparable (both timezone-naive)
-                last_indexed_naive = (
-                    connector.last_indexed_at.replace(tzinfo=None)
-                    if connector.last_indexed_at.tzinfo
-                    else connector.last_indexed_at
-                )
-
-                # Check if last_indexed_at is in the future or after end_date
-                if last_indexed_naive > calculated_end_date:
-                    logger.warning(
-                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
-                    )
-                    calculated_start_date = calculated_end_date - timedelta(days=365)
-                else:
-                    calculated_start_date = last_indexed_naive
-                    logger.info(
-                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                    )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(
-                    days=365
-                )  # Use 365 days as default
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided
-            start_date_str = (
-                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
-            )
-            end_date_str = (
-                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
-            )
-        else:
-            # Use provided dates
-            start_date_str = start_date
-            end_date_str = end_date
-
-        logger.info(f"Fetching Linear issues from {start_date_str} to {end_date_str}")
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Linear issues from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetch_issues",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        # Get issues within date range
-        try:
-            issues, error = linear_client.get_issues_by_date_range(
-                start_date=start_date_str, end_date=end_date_str, include_comments=True
-            )
-
-            if error:
-                logger.error(f"Failed to get Linear issues: {error}")
-
-                # Don't treat "No issues found" as an error that should stop indexing
-                if "No issues found" in error:
-                    logger.info(
-                        "No issues found is not a critical error, continuing with update"
-                    )
-                    if update_last_indexed:
-                        connector.last_indexed_at = datetime.now()
-                        await session.commit()
-                        logger.info(
-                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
-                        )
-                    return 0, None
-                else:
-                    return 0, f"Failed to get Linear issues: {error}"
-
-            logger.info(f"Retrieved {len(issues)} issues from Linear API")
-
-        except Exception as e:
-            logger.error(f"Exception when calling Linear API: {e!s}", exc_info=True)
-            return 0, f"Failed to get Linear issues: {e!s}"
-
-        if not issues:
-            logger.info("No Linear issues found for the specified date range")
-            if update_last_indexed:
-                connector.last_indexed_at = datetime.now()
-                await session.commit()
-                logger.info(
-                    f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
-                )
-            return 0, None  # Return None instead of error message when no issues found
-
-        # Log issue IDs and titles for debugging
-        logger.info("Issues retrieved from Linear API:")
-        for idx, issue in enumerate(issues[:10]):  # Log first 10 issues
-            logger.info(
-                f"  {idx + 1}. {issue.get('identifier', 'Unknown')} - {issue.get('title', 'Unknown')} - Created: {issue.get('createdAt', 'Unknown')} - Updated: {issue.get('updatedAt', 'Unknown')}"
-            )
-        if len(issues) > 10:
-            logger.info(f"  ...and {len(issues) - 10} more issues")
-
-        # Track the number of documents indexed
-        documents_indexed = 0
-        documents_skipped = 0
-        skipped_issues = []
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Starting to process {len(issues)} Linear issues",
-            {"stage": "process_issues", "total_issues": len(issues)},
-        )
-
-        # Process each issue
-        for issue in issues:
-            try:
-                issue_id = issue.get("key")
-                issue_identifier = issue.get("id", "")
-                issue_title = issue.get("key", "")
-
-                if not issue_id or not issue_title:
-                    logger.warning(
-                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
-                    )
-                    skipped_issues.append(
-                        f"{issue_identifier or 'Unknown'} (missing data)"
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Format the issue first to get well-structured data
-                formatted_issue = linear_client.format_issue(issue)
-
-                # Convert issue to markdown format
-                issue_content = linear_client.format_issue_to_markdown(formatted_issue)
-
-                if not issue_content:
-                    logger.warning(
-                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
-                    )
-                    skipped_issues.append(f"{issue_identifier} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Create a short summary for the embedding
-                # This avoids using the LLM and just uses the issue data directly
-                state = formatted_issue.get("state", "Unknown")
-                description = formatted_issue.get("description", "")
-                # Truncate description if it's too long for the summary
-                if description and len(description) > 500:
-                    description = description[:497] + "..."
-
-                # Create a simple summary from the issue data
-                summary_content = f"Linear Issue {issue_identifier}: {issue_title}\n\nStatus: {state}\n\n"
-                if description:
-                    summary_content += f"Description: {description}\n\n"
-
-                # Add comment count
-                comment_count = len(formatted_issue.get("comments", []))
-                summary_content += f"Comments: {comment_count}"
-
-                content_hash = generate_content_hash(issue_content, search_space_id)
-
-                # Check if document with this content hash already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for issue {issue_identifier}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Generate embedding for the summary
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks - using the full issue content with comments
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(issue_content)
-                ]
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Linear - {issue_identifier}: {issue_title}",
-                    document_type=DocumentType.LINEAR_CONNECTOR,
-                    document_metadata={
-                        "issue_id": issue_id,
-                        "issue_identifier": issue_identifier,
-                        "issue_title": issue_title,
-                        "state": state,
-                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
-                )
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this issue and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        # Commit all changes
-        await session.commit()
-        logger.info("Successfully committed all Linear document changes to database")
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Linear indexing for connector {connector_id}",
-            {
-                "issues_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_issues_count": len(skipped_issues),
-            },
-        )
-
-        logger.info(
-            f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
-        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Linear indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Linear issues for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Linear issues: {e!s}", exc_info=True)
-        return 0, f"Failed to index Linear issues: {e!s}"
-
-
-async def index_discord_messages(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Discord messages from all accessible channels.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Discord connector
-        search_space_id: ID of the search space to store documents in
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="discord_messages_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Discord messages indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving Discord connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.DISCORD_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a Discord connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not a Discord connector",
-            )
-
-        # Get the Discord token from the connector config
-        discord_token = connector.config.get("DISCORD_BOT_TOKEN")
-        if not discord_token:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Discord token not found in connector config for connector {connector_id}",
-                "Missing Discord token",
-                {"error_type": "MissingToken"},
-            )
-            return 0, "Discord token not found in connector config"
-
-        logger.info(f"Starting Discord indexing for connector {connector_id}")
-
-        # Initialize Discord client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Discord client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        discord_client = DiscordConnector(token=discord_token)
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now(UTC)
-
-            # Use last_indexed_at as start date if available, otherwise use 365 days ago
-            if connector.last_indexed_at:
-                calculated_start_date = connector.last_indexed_at.replace(tzinfo=UTC)
-                logger.info(
-                    f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(days=365)
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided, convert to ISO format for Discord API
-            if start_date is None:
-                start_date_iso = calculated_start_date.isoformat()
-            else:
-                # Convert YYYY-MM-DD to ISO format
-                start_date_iso = (
-                    datetime.strptime(start_date, "%Y-%m-%d")
-                    .replace(tzinfo=UTC)
-                    .isoformat()
-                )
-
-            if end_date is None:
-                end_date_iso = calculated_end_date.isoformat()
-            else:
-                # Convert YYYY-MM-DD to ISO format
-                end_date_iso = (
-                    datetime.strptime(end_date, "%Y-%m-%d")
-                    .replace(tzinfo=UTC)
-                    .isoformat()
-                )
-        else:
-            # Convert provided dates to ISO format for Discord API
-            start_date_iso = (
-                datetime.strptime(start_date, "%Y-%m-%d")
-                .replace(tzinfo=UTC)
-                .isoformat()
-            )
-            end_date_iso = (
-                datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC).isoformat()
-            )
-
-        logger.info(
-            f"Indexing Discord messages from {start_date_iso} to {end_date_iso}"
-        )
-
-        documents_indexed = 0
-        documents_skipped = 0
-        skipped_channels = []
-
-        try:
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Starting Discord bot and fetching guilds for connector {connector_id}",
-                {"stage": "fetch_guilds"},
-            )
-
-            logger.info("Starting Discord bot to fetch guilds")
-            discord_client._bot_task = asyncio.create_task(discord_client.start_bot())
-            await discord_client._wait_until_ready()
-
-            logger.info("Fetching Discord guilds")
-            guilds = await discord_client.get_guilds()
-            logger.info(f"Found {len(guilds)} guilds")
-        except Exception as e:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Failed to get Discord guilds for connector {connector_id}",
-                str(e),
-                {"error_type": "GuildFetchError"},
-            )
-            logger.error(f"Failed to get Discord guilds: {e!s}", exc_info=True)
-            await discord_client.close_bot()
-            return 0, f"Failed to get Discord guilds: {e!s}"
-        if not guilds:
-            await task_logger.log_task_success(
-                log_entry,
-                f"No Discord guilds found for connector {connector_id}",
-                {"guilds_found": 0},
-            )
-            logger.info("No Discord guilds found to index")
-            await discord_client.close_bot()
-            return 0, "No Discord guilds found"
-
-        # Process each guild and channel
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Starting to process {len(guilds)} Discord guilds",
-            {"stage": "process_guilds", "total_guilds": len(guilds)},
-        )
-
-        for guild in guilds:
-            guild_id = guild["id"]
-            guild_name = guild["name"]
-            logger.info(f"Processing guild: {guild_name} ({guild_id})")
-            try:
-                channels = await discord_client.get_text_channels(guild_id)
-                if not channels:
-                    logger.info(f"No channels found in guild {guild_name}. Skipping.")
-                    skipped_channels.append(f"{guild_name} (no channels)")
-                    documents_skipped += 1
-                    continue
-
-                for channel in channels:
-                    channel_id = channel["id"]
-                    channel_name = channel["name"]
-
-                    try:
-                        messages = await discord_client.get_channel_history(
-                            channel_id=channel_id,
-                            start_date=start_date_iso,
-                            end_date=end_date_iso,
-                        )
-                    except Exception as e:
-                        logger.error(
-                            f"Failed to get messages for channel {channel_name}: {e!s}"
-                        )
-                        skipped_channels.append(
-                            f"{guild_name}#{channel_name} (fetch error)"
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    if not messages:
-                        logger.info(
-                            f"No messages found in channel {channel_name} for the specified date range."
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    # Format messages
-                    formatted_messages = []
-                    for msg in messages:
-                        # Skip system messages if needed (Discord has some types)
-                        if msg.get("type") in ["system"]:
-                            continue
-                        formatted_messages.append(msg)
-
-                    if not formatted_messages:
-                        logger.info(
-                            f"No valid messages found in channel {channel_name} after filtering."
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    # Convert messages to markdown format
-                    channel_content = (
-                        f"# Discord Channel: {guild_name} / {channel_name}\n\n"
-                    )
-                    for msg in formatted_messages:
-                        user_name = msg.get("author_name", "Unknown User")
-                        timestamp = msg.get("created_at", "Unknown Time")
-                        text = msg.get("content", "")
-                        channel_content += (
-                            f"## {user_name} ({timestamp})\n\n{text}\n\n---\n\n"
-                        )
-
-                    # Format document metadata
-                    metadata_sections = [
-                        (
-                            "METADATA",
-                            [
-                                f"GUILD_NAME: {guild_name}",
-                                f"GUILD_ID: {guild_id}",
-                                f"CHANNEL_NAME: {channel_name}",
-                                f"CHANNEL_ID: {channel_id}",
-                                f"MESSAGE_COUNT: {len(formatted_messages)}",
-                            ],
-                        ),
-                        (
-                            "CONTENT",
-                            [
-                                "FORMAT: markdown",
-                                "TEXT_START",
-                                channel_content,
-                                "TEXT_END",
-                            ],
-                        ),
-                    ]
-
-                    # Build the document string
-                    document_parts = []
-                    document_parts.append("<DOCUMENT>")
-                    for section_title, section_content in metadata_sections:
-                        document_parts.append(f"<{section_title}>")
-                        document_parts.extend(section_content)
-                        document_parts.append(f"</{section_title}>")
-                    document_parts.append("</DOCUMENT>")
-                    combined_document_string = "\n".join(document_parts)
-                    content_hash = generate_content_hash(
-                        combined_document_string, search_space_id
-                    )
-
-                    # Check if document with this content hash already exists
-                    existing_doc_by_hash_result = await session.execute(
-                        select(Document).where(Document.content_hash == content_hash)
-                    )
-                    existing_document_by_hash = (
-                        existing_doc_by_hash_result.scalars().first()
-                    )
-
-                    if existing_document_by_hash:
-                        logger.info(
-                            f"Document with content hash {content_hash} already exists for channel {guild_name}#{channel_name}. Skipping processing."
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    # Get user's long context LLM
-                    user_llm = await get_user_long_context_llm(session, user_id)
-                    if not user_llm:
-                        logger.error(
-                            f"No long context LLM configured for user {user_id}"
-                        )
-                        skipped_channels.append(
-                            f"{guild_name}#{channel_name} (no LLM configured)"
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    # Generate summary using summary_chain
-                    summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
-                    summary_result = await summary_chain.ainvoke(
-                        {"document": combined_document_string}
-                    )
-                    summary_content = summary_result.content
-                    summary_embedding = await asyncio.to_thread(
-                        config.embedding_model_instance.embed, summary_content
-                    )
-
-                    # Process chunks
-                    raw_chunks = await asyncio.to_thread(
-                        config.chunker_instance.chunk, channel_content
-                    )
-
-                    chunk_texts = [
-                        chunk.text for chunk in raw_chunks if chunk.text.strip()
-                    ]
-                    chunk_embeddings = await asyncio.to_thread(
-                        lambda texts: [
-                            config.embedding_model_instance.embed(t) for t in texts
-                        ],
-                        chunk_texts,
-                    )
-
-                    chunks = [
-                        Chunk(content=raw_chunk.text, embedding=embedding)
-                        for raw_chunk, embedding in zip(
-                            raw_chunks, chunk_embeddings, strict=False
-                        )
-                    ]
-
-                    # Create and store new document
-                    document = Document(
-                        search_space_id=search_space_id,
-                        title=f"Discord - {guild_name}#{channel_name}",
-                        document_type=DocumentType.DISCORD_CONNECTOR,
-                        document_metadata={
-                            "guild_name": guild_name,
-                            "guild_id": guild_id,
-                            "channel_name": channel_name,
-                            "channel_id": channel_id,
-                            "message_count": len(formatted_messages),
-                            "start_date": start_date_iso,
-                            "end_date": end_date_iso,
-                            "indexed_at": datetime.now(UTC).strftime(
-                                "%Y-%m-%d %H:%M:%S"
-                            ),
-                        },
-                        content=summary_content,
-                        content_hash=content_hash,
-                        embedding=summary_embedding,
-                        chunks=chunks,
-                    )
-
-                    session.add(document)
-                    documents_indexed += 1
-                    logger.info(
-                        f"Successfully indexed new channel {guild_name}#{channel_name} with {len(formatted_messages)} messages"
-                    )
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing guild {guild_name}: {e!s}", exc_info=True
-                )
-                skipped_channels.append(f"{guild_name} (processing error)")
-                documents_skipped += 1
-                continue
-
-        if update_last_indexed and documents_indexed > 0:
-            connector.last_indexed_at = datetime.now(UTC)
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        await session.commit()
-        await discord_client.close_bot()
-
-        # Prepare result message
-        result_message = None
-        if skipped_channels:
-            result_message = f"Processed {documents_indexed} channels. Skipped {len(skipped_channels)} channels: {', '.join(skipped_channels)}"
-        else:
-            result_message = f"Processed {documents_indexed} channels."
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Discord indexing for connector {connector_id}",
-            {
-                "channels_processed": documents_indexed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_channels_count": len(skipped_channels),
-                "guilds_processed": len(guilds),
-                "result_message": result_message,
-            },
-        )
-
-        logger.info(
-            f"Discord indexing completed: {documents_indexed} new channels, {documents_skipped} skipped"
-        )
-        return documents_indexed, result_message
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Discord indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(
-            f"Database error during Discord indexing: {db_error!s}", exc_info=True
-        )
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Discord messages for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Discord messages: {e!s}", exc_info=True)
-        return 0, f"Failed to index Discord messages: {e!s}"
-
-
-async def index_jira_issues(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Jira issues and comments.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Jira connector
-        search_space_id: ID of the search space to store documents in
-        user_id: User ID
-        start_date: Start date for indexing (YYYY-MM-DD format)
-        end_date: End date for indexing (YYYY-MM-DD format)
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="jira_issues_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Jira issues indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector from the database
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.JIRA_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return 0, f"Connector with ID {connector_id} not found"
-
-        # Get the Jira credentials from the connector config
-        jira_email = connector.config.get("JIRA_EMAIL")
-        jira_api_token = connector.config.get("JIRA_API_TOKEN")
-        jira_base_url = connector.config.get("JIRA_BASE_URL")
-
-        if not jira_email or not jira_api_token or not jira_base_url:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Jira credentials not found in connector config for connector {connector_id}",
-                "Missing Jira credentials",
-                {"error_type": "MissingCredentials"},
-            )
-            return 0, "Jira credentials not found in connector config"
-
-        # Initialize Jira client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Jira client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        jira_client = JiraConnector(
-            base_url=jira_base_url, email=jira_email, api_token=jira_api_token
-        )
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now()
-
-            # Use last_indexed_at as start date if available, otherwise use 365 days ago
-            if connector.last_indexed_at:
-                # Convert dates to be comparable (both timezone-naive)
-                last_indexed_naive = (
-                    connector.last_indexed_at.replace(tzinfo=None)
-                    if connector.last_indexed_at.tzinfo
-                    else connector.last_indexed_at
-                )
-
-                # Check if last_indexed_at is in the future or after end_date
-                if last_indexed_naive > calculated_end_date:
-                    logger.warning(
-                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
-                    )
-                    calculated_start_date = calculated_end_date - timedelta(days=365)
-                else:
-                    calculated_start_date = last_indexed_naive
-                    logger.info(
-                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                    )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(
-                    days=365
-                )  # Use 365 days as default
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided
-            start_date_str = (
-                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
-            )
-            end_date_str = (
-                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
-            )
-        else:
-            # Use provided dates
-            start_date_str = start_date
-            end_date_str = end_date
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Jira issues from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetching_issues",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        # Get issues within date range
-        try:
-            issues, error = jira_client.get_issues_by_date_range(
-                start_date=start_date_str, end_date=end_date_str, include_comments=True
-            )
-
-            if error:
-                logger.error(f"Failed to get Jira issues: {error}")
-
-                # Don't treat "No issues found" as an error that should stop indexing
-                if "No issues found" in error:
-                    logger.info(
-                        "No issues found is not a critical error, continuing with update"
-                    )
-                    if update_last_indexed:
-                        connector.last_indexed_at = datetime.now()
-                        await session.commit()
-                        logger.info(
-                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no issues found"
-                        )
-
-                    await task_logger.log_task_success(
-                        log_entry,
-                        f"No Jira issues found in date range {start_date_str} to {end_date_str}",
-                        {"issues_found": 0},
-                    )
-                    return 0, None
-                else:
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        f"Failed to get Jira issues: {error}",
-                        "API Error",
-                        {"error_type": "APIError"},
-                    )
-                    return 0, f"Failed to get Jira issues: {error}"
-
-            logger.info(f"Retrieved {len(issues)} issues from Jira API")
-
-        except Exception as e:
-            logger.error(f"Error fetching Jira issues: {e!s}", exc_info=True)
-            return 0, f"Error fetching Jira issues: {e!s}"
-
-        # Process and index each issue
-        documents_indexed = 0
-        skipped_issues = []
-        documents_skipped = 0
-
-        for issue in issues:
-            try:
-                issue_id = issue.get("key")
-                issue_identifier = issue.get("key", "")
-                issue_title = issue.get("id", "")
-
-                if not issue_id or not issue_title:
-                    logger.warning(
-                        f"Skipping issue with missing ID or title: {issue_id or 'Unknown'}"
-                    )
-                    skipped_issues.append(
-                        f"{issue_identifier or 'Unknown'} (missing data)"
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Format the issue for better readability
-                formatted_issue = jira_client.format_issue(issue)
-
-                # Convert to markdown
-                issue_content = jira_client.format_issue_to_markdown(formatted_issue)
-
-                if not issue_content:
-                    logger.warning(
-                        f"Skipping issue with no content: {issue_identifier} - {issue_title}"
-                    )
-                    skipped_issues.append(f"{issue_identifier} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Create a simple summary
-                summary_content = f"Jira Issue {issue_identifier}: {issue_title}\n\nStatus: {formatted_issue.get('status', 'Unknown')}\n\n"
-                if formatted_issue.get("description"):
-                    summary_content += (
-                        f"Description: {formatted_issue.get('description')}\n\n"
-                    )
-
-                # Add comment count
-                comment_count = len(formatted_issue.get("comments", []))
-                summary_content += f"Comments: {comment_count}"
-
-                # Generate content hash
-                content_hash = generate_content_hash(issue_content, search_space_id)
-
-                # Check if document already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for issue {issue_identifier}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Generate embedding for the summary
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks - using the full issue content with comments
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(issue_content)
-                ]
-
-                # Create and store new document
-                logger.info(
-                    f"Creating new document for issue {issue_identifier} - {issue_title}"
-                )
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Jira - {issue_identifier}: {issue_title}",
-                    document_type=DocumentType.JIRA_CONNECTOR,
-                    document_metadata={
-                        "issue_id": issue_id,
-                        "issue_identifier": issue_identifier,
-                        "issue_title": issue_title,
-                        "state": formatted_issue.get("status", "Unknown"),
-                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(
-                    f"Successfully indexed new issue {issue_identifier} - {issue_title}"
-                )
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing issue {issue.get('identifier', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_issues.append(
-                    f"{issue.get('identifier', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this issue and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        # Commit all changes
-        await session.commit()
-        logger.info("Successfully committed all JIRA document changes to database")
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed JIRA indexing for connector {connector_id}",
-            {
-                "issues_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_issues_count": len(skipped_issues),
-            },
-        )
-
-        logger.info(
-            f"JIRA indexing completed: {documents_indexed} new issues, {documents_skipped} skipped"
-        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during JIRA indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index JIRA issues for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index JIRA issues: {e!s}", exc_info=True)
-        return 0, f"Failed to index JIRA issues: {e!s}"
-
-
-async def index_confluence_pages(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Confluence pages and comments.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Confluence connector
-        search_space_id: ID of the search space to store documents in
-        user_id: User ID
-        start_date: Start date for indexing (YYYY-MM-DD format)
-        end_date: End date for indexing (YYYY-MM-DD format)
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="confluence_pages_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Confluence pages indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector from the database
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.CONFLUENCE_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return 0, f"Connector with ID {connector_id} not found"
-
-        # Get the Confluence credentials from the connector config
-        confluence_email = connector.config.get("CONFLUENCE_EMAIL")
-        confluence_api_token = connector.config.get("CONFLUENCE_API_TOKEN")
-        confluence_base_url = connector.config.get("CONFLUENCE_BASE_URL")
-
-        if not confluence_email or not confluence_api_token or not confluence_base_url:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Confluence credentials not found in connector config for connector {connector_id}",
-                "Missing Confluence credentials",
-                {"error_type": "MissingCredentials"},
-            )
-            return 0, "Confluence credentials not found in connector config"
-
-        # Initialize Confluence client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Confluence client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        confluence_client = ConfluenceConnector(
-            base_url=confluence_base_url,
-            email=confluence_email,
-            api_token=confluence_api_token,
-        )
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now()
-
-            # Use last_indexed_at as start date if available, otherwise use 365 days ago
-            if connector.last_indexed_at:
-                # Convert dates to be comparable (both timezone-naive)
-                last_indexed_naive = (
-                    connector.last_indexed_at.replace(tzinfo=None)
-                    if connector.last_indexed_at.tzinfo
-                    else connector.last_indexed_at
-                )
-
-                # Check if last_indexed_at is in the future or after end_date
-                if last_indexed_naive > calculated_end_date:
-                    logger.warning(
-                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead."
-                    )
-                    calculated_start_date = calculated_end_date - timedelta(days=365)
-                else:
-                    calculated_start_date = last_indexed_naive
-                    logger.info(
-                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                    )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(
-                    days=365
-                )  # Use 365 days as default
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided
-            start_date_str = (
-                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
-            )
-            end_date_str = (
-                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
-            )
-        else:
-            # Use provided dates
-            start_date_str = start_date
-            end_date_str = end_date
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Confluence pages from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetching_pages",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        # Get pages within date range
-        try:
-            pages, error = confluence_client.get_pages_by_date_range(
-                start_date=start_date_str, end_date=end_date_str, include_comments=True
-            )
-
-            if error:
-                logger.error(f"Failed to get Confluence pages: {error}")
-
-                # Don't treat "No pages found" as an error that should stop indexing
-                if "No pages found" in error:
-                    logger.info(
-                        "No pages found is not a critical error, continuing with update"
-                    )
-                    if update_last_indexed:
-                        connector.last_indexed_at = datetime.now()
-                        await session.commit()
-                        logger.info(
-                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no pages found"
-                        )
-
-                    await task_logger.log_task_success(
-                        log_entry,
-                        f"No Confluence pages found in date range {start_date_str} to {end_date_str}",
-                        {"pages_found": 0},
-                    )
-                    return 0, None
-                else:
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        f"Failed to get Confluence pages: {error}",
-                        "API Error",
-                        {"error_type": "APIError"},
-                    )
-                    return 0, f"Failed to get Confluence pages: {error}"
-
-            logger.info(f"Retrieved {len(pages)} pages from Confluence API")
-
-        except Exception as e:
-            logger.error(f"Error fetching Confluence pages: {e!s}", exc_info=True)
-            return 0, f"Error fetching Confluence pages: {e!s}"
-
-        # Process and index each page
-        documents_indexed = 0
-        skipped_pages = []
-        documents_skipped = 0
-
-        for page in pages:
-            try:
-                page_id = page.get("id")
-                page_title = page.get("title", "")
-                space_id = page.get("spaceId", "")
-
-                if not page_id or not page_title:
-                    logger.warning(
-                        f"Skipping page with missing ID or title: {page_id or 'Unknown'}"
-                    )
-                    skipped_pages.append(f"{page_title or 'Unknown'} (missing data)")
-                    documents_skipped += 1
-                    continue
-
-                # Extract page content
-                page_content = ""
-                if page.get("body") and page["body"].get("storage"):
-                    page_content = page["body"]["storage"].get("value", "")
-
-                # Add comments to content
-                comments = page.get("comments", [])
-                comments_content = ""
-                if comments:
-                    comments_content = "\n\n## Comments\n\n"
-                    for comment in comments:
-                        comment_body = ""
-                        if comment.get("body") and comment["body"].get("storage"):
-                            comment_body = comment["body"]["storage"].get("value", "")
-
-                        comment_author = comment.get("version", {}).get(
-                            "authorId", "Unknown"
-                        )
-                        comment_date = comment.get("version", {}).get("createdAt", "")
-
-                        comments_content += f"**Comment by {comment_author}** ({comment_date}):\n{comment_body}\n\n"
-
-                # Combine page content with comments
-                full_content = f"# {page_title}\n\n{page_content}{comments_content}"
-
-                if not full_content.strip():
-                    logger.warning(f"Skipping page with no content: {page_title}")
-                    skipped_pages.append(f"{page_title} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Create a simple summary
-                summary_content = (
-                    f"Confluence Page: {page_title}\n\nSpace ID: {space_id}\n\n"
-                )
-                if page_content:
-                    # Take first 500 characters of content for summary
-                    content_preview = page_content[:500]
-                    if len(page_content) > 500:
-                        content_preview += "..."
-                    summary_content += f"Content Preview: {content_preview}\n\n"
-
-                # Add comment count
-                comment_count = len(comments)
-                summary_content += f"Comments: {comment_count}"
-
-                # Generate content hash
-                content_hash = generate_content_hash(full_content, search_space_id)
-
-                # Check if document already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for page {page_title}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Generate embedding for the summary
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks - using the full page content with comments
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(full_content)
-                ]
-
-                # Create and store new document
-                logger.info(f"Creating new document for page {page_title}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Confluence - {page_title}",
-                    document_type=DocumentType.CONFLUENCE_CONNECTOR,
-                    document_metadata={
-                        "page_id": page_id,
-                        "page_title": page_title,
-                        "space_id": space_id,
-                        "comment_count": comment_count,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new page {page_title}")
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing page {page.get('title', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_pages.append(
-                    f"{page.get('title', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this page and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        # Commit all changes
-        await session.commit()
-        logger.info(
-            "Successfully committed all Confluence document changes to database"
-        )
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Confluence indexing for connector {connector_id}",
-            {
-                "pages_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_pages_count": len(skipped_pages),
-            },
-        )
-
-        logger.info(
-            f"Confluence indexing completed: {documents_indexed} new pages, {documents_skipped} skipped"
-        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Confluence indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Confluence pages for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Confluence pages: {e!s}", exc_info=True)
-        return 0, f"Failed to index Confluence pages: {e!s}"
-
-
-async def index_clickup_tasks(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index tasks from ClickUp workspace.
-
-    Args:
-        session: Database session
-        connector_id: ID of the ClickUp connector
-        search_space_id: ID of the search space
-        user_id: ID of the user
-        start_date: Start date for filtering tasks (YYYY-MM-DD format)
-        end_date: End date for filtering tasks (YYYY-MM-DD format)
-        update_last_indexed: Whether to update the last_indexed_at timestamp
-
-    Returns:
-        Tuple of (number of indexed tasks, error message if any)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="clickup_tasks_indexing",
-        source="connector_indexing_task",
-        message=f"Starting ClickUp tasks indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get connector configuration
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            error_msg = f"ClickUp connector with ID {connector_id} not found"
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not a ClickUp connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return 0, error_msg
-
-        # Extract ClickUp configuration
-        clickup_api_token = connector.config.get("CLICKUP_API_TOKEN")
-
-        if not clickup_api_token:
-            error_msg = "ClickUp API token not found in connector configuration"
-            await task_logger.log_task_failure(
-                log_entry,
-                f"ClickUp API token not found in connector config for connector {connector_id}",
-                "Missing ClickUp token",
-                {"error_type": "MissingToken"},
-            )
-            return 0, error_msg
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing ClickUp client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        clickup_client = ClickUpConnector(api_token=clickup_api_token)
-
-        # Get authorized workspaces
-        await task_logger.log_task_progress(
-            log_entry,
-            "Fetching authorized ClickUp workspaces",
-            {"stage": "workspace_fetching"},
-        )
-
-        workspaces_response = clickup_client.get_authorized_workspaces()
-        workspaces = workspaces_response.get("teams", [])
-
-        if not workspaces:
-            error_msg = "No authorized ClickUp workspaces found"
-            await task_logger.log_task_failure(
-                log_entry,
-                f"No authorized ClickUp workspaces found for connector {connector_id}",
-                "No workspaces found",
-                {"error_type": "NoWorkspacesFound"},
-            )
-            return 0, error_msg
-
-        # Process and index each task
-        documents_indexed = 0
-        documents_skipped = 0
-
-        for workspace in workspaces:
-            workspace_id = workspace.get("id")
-            workspace_name = workspace.get("name", "Unknown Workspace")
-
-            if not workspace_id:
-                continue
-
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Processing workspace: {workspace_name}",
-                {"stage": "workspace_processing", "workspace_id": workspace_id},
-            )
-
-            # Fetch tasks from workspace
-            if start_date and end_date:
-                tasks, error = clickup_client.get_tasks_in_date_range(
-                    workspace_id=workspace_id,
-                    start_date=start_date,
-                    end_date=end_date,
-                    include_closed=True,
-                )
-                if error:
-                    logger.warning(
-                        f"Error fetching tasks from workspace {workspace_name}: {error}"
-                    )
-                    continue
-            else:
-                tasks = clickup_client.get_workspace_tasks(
-                    workspace_id=workspace_id, include_closed=True
-                )
-
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Found {len(tasks)} tasks in workspace {workspace_name}",
-                {"stage": "tasks_found", "task_count": len(tasks)},
-            )
-
-            # Process each task
-            for task in tasks:
-                try:
-                    task_id = task.get("id")
-                    task_name = task.get("name", "Untitled Task")
-                    task_description = task.get("description", "")
-                    task_status = task.get("status", {}).get("status", "Unknown")
-                    task_priority = (
-                        task.get("priority", {}).get("priority", "Unknown")
-                        if task.get("priority")
-                        else "None"
-                    )
-                    task_assignees = task.get("assignees", [])
-                    task_due_date = task.get("due_date")
-                    task_created = task.get("date_created")
-                    task_updated = task.get("date_updated")
-
-                    # Get list and space information
-                    task_list = task.get("list", {})
-                    task_list_name = task_list.get("name", "Unknown List")
-                    task_space = task.get("space", {})
-                    task_space_name = task_space.get("name", "Unknown Space")
-
-                    # Create task content
-                    content_parts = [f"Task: {task_name}"]
-
-                    if task_description:
-                        content_parts.append(f"Description: {task_description}")
-
-                    content_parts.extend(
-                        [
-                            f"Status: {task_status}",
-                            f"Priority: {task_priority}",
-                            f"List: {task_list_name}",
-                            f"Space: {task_space_name}",
-                        ]
-                    )
-
-                    if task_assignees:
-                        assignee_names = [
-                            assignee.get("username", "Unknown")
-                            for assignee in task_assignees
-                        ]
-                        content_parts.append(f"Assignees: {', '.join(assignee_names)}")
-
-                    if task_due_date:
-                        content_parts.append(f"Due Date: {task_due_date}")
-
-                    task_content = "\n".join(content_parts)
-
-                    if not task_content.strip():
-                        logger.warning(f"Skipping task with no content: {task_name}")
-                        continue
-
-                    # Generate content hash
-                    content_hash = generate_content_hash(task_content, search_space_id)
-
-                    # Check if document already exists
-                    existing_doc_by_hash_result = await session.execute(
-                        select(Document).where(Document.content_hash == content_hash)
-                    )
-                    existing_document_by_hash = (
-                        existing_doc_by_hash_result.scalars().first()
-                    )
-
-                    if existing_document_by_hash:
-                        logger.info(
-                            f"Document with content hash {content_hash} already exists for task {task_name}. Skipping processing."
-                        )
-                        documents_skipped += 1
-                        continue
-
-                    # Generate embedding for the summary
-                    summary_embedding = config.embedding_model_instance.embed(
-                        task_content
-                    )
-
-                    # Process chunks - using the full page content with comments
-                    chunks = [
-                        Chunk(
-                            content=chunk.text,
-                            embedding=config.embedding_model_instance.embed(chunk.text),
-                        )
-                        for chunk in config.chunker_instance.chunk(task_content)
-                    ]
-
-                    # Create and store new document
-                    logger.info(f"Creating new document for task {task_name}")
-
-                    document = Document(
-                        search_space_id=search_space_id,
-                        title=f"Task - {task_name}",
-                        document_type=DocumentType.CLICKUP_CONNECTOR,
-                        document_metadata={
-                            "task_id": task_id,
-                            "task_name": task_name,
-                            "task_status": task_status,
-                            "task_priority": task_priority,
-                            "task_assignees": task_assignees,
-                            "task_due_date": task_due_date,
-                            "task_created": task_created,
-                            "task_updated": task_updated,
-                            "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        },
-                        content=task_content,
-                        content_hash=content_hash,
-                        embedding=summary_embedding,
-                        chunks=chunks,
-                    )
-
-                    session.add(document)
-                    documents_indexed += 1
-                    logger.info(f"Successfully indexed new task {task_name}")
-
-                except Exception as e:
-                    logger.error(
-                        f"Error processing task {task.get('name', 'Unknown')}: {e!s}",
-                        exc_info=True,
-                    )
-                    documents_skipped += 1
-
-            # Update the last_indexed_at timestamp for the connector only if requested
-            total_processed = documents_indexed
-            if update_last_indexed:
-                connector.last_indexed_at = datetime.now()
-                logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-            # Commit all changes
-            await session.commit()
-            logger.info(
-                "Successfully committed all clickup document changes to database"
-            )
-
-            # Log success
-            await task_logger.log_task_success(
-                log_entry,
-                f"Successfully completed clickup indexing for connector {connector_id}",
-                {
-                    "pages_processed": total_processed,
-                    "documents_indexed": documents_indexed,
-                    "documents_skipped": documents_skipped,
-                },
-            )
-
-            logger.info(
-                f"clickup indexing completed: {documents_indexed} new tasks, {documents_skipped} skipped"
-            )
-            return (
-                total_processed,
-                None,
-            )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Cickup indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index ClickUp tasks for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index ClickUp tasks: {e!s}", exc_info=True)
-        return 0, f"Failed to index ClickUp tasks: {e!s}"
-
-
-async def index_google_calendar_events(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-) -> tuple[int, str | None]:
-    """
-    Index Google Calendar events.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Google Calendar connector
-        search_space_id: ID of the search space to store documents in
-        user_id: User ID
-        start_date: Start date for indexing (YYYY-MM-DD format)
-        end_date: End date for indexing (YYYY-MM-DD format)
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="google_calendar_events_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Google Calendar events indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector from the database
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return 0, f"Connector with ID {connector_id} not found"
-
-        # Get the Google Calendar credentials from the connector config
-        credentials = Credentials(
-            token=connector.config.get("token"),
-            refresh_token=connector.config.get("refresh_token"),
-            token_uri=connector.config.get("token_uri"),
-            client_id=connector.config.get("client_id"),
-            client_secret=connector.config.get("client_secret"),
-            scopes=connector.config.get("scopes"),
-        )
-
-        if (
-            not credentials.client_id
-            or not credentials.client_secret
-            or not credentials.refresh_token
-        ):
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Google Calendar credentials not found in connector config for connector {connector_id}",
-                "Missing Google Calendar credentials",
-                {"error_type": "MissingCredentials"},
-            )
-            return 0, "Google Calendar credentials not found in connector config"
-
-        # Initialize Google Calendar client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Google Calendar client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        calendar_client = GoogleCalendarConnector(credentials=credentials)
-
-        # Calculate date range
-        if start_date is None or end_date is None:
-            # Fall back to calculating dates based on last_indexed_at
-            calculated_end_date = datetime.now()
-
-            # Use last_indexed_at as start date if available, otherwise use 30 days ago
-            if connector.last_indexed_at:
-                # Convert dates to be comparable (both timezone-naive)
-                last_indexed_naive = (
-                    connector.last_indexed_at.replace(tzinfo=None)
-                    if connector.last_indexed_at.tzinfo
-                    else connector.last_indexed_at
-                )
-
-                # Check if last_indexed_at is in the future or after end_date
-                if last_indexed_naive > calculated_end_date:
-                    logger.warning(
-                        f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 30 days ago instead."
-                    )
-                    calculated_start_date = calculated_end_date - timedelta(days=30)
-                else:
-                    calculated_start_date = last_indexed_naive
-                    logger.info(
-                        f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
-                    )
-            else:
-                calculated_start_date = calculated_end_date - timedelta(
-                    days=30
-                )  # Use 30 days as default for calendar events
-                logger.info(
-                    f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (30 days ago) as start date"
-                )
-
-            # Use calculated dates if not provided
-            start_date_str = (
-                start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
-            )
-            end_date_str = (
-                end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
-            )
-        else:
-            # Use provided dates
-            start_date_str = start_date
-            end_date_str = end_date
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
-            {
-                "stage": "fetching_events",
-                "start_date": start_date_str,
-                "end_date": end_date_str,
-            },
-        )
-
-        # Get events within date range from primary calendar
-        try:
-            events, error = calendar_client.get_all_primary_calendar_events(
-                start_date=start_date_str, end_date=end_date_str
-            )
-
-            if error:
-                logger.error(f"Failed to get Google Calendar events: {error}")
-
-                # Don't treat "No events found" as an error that should stop indexing
-                if "No events found" in error:
-                    logger.info(
-                        "No events found is not a critical error, continuing with update"
-                    )
-                    if update_last_indexed:
-                        connector.last_indexed_at = datetime.now()
-                        await session.commit()
-                        logger.info(
-                            f"Updated last_indexed_at to {connector.last_indexed_at} despite no events found"
-                        )
-
-                    await task_logger.log_task_success(
-                        log_entry,
-                        f"No Google Calendar events found in date range {start_date_str} to {end_date_str}",
-                        {"events_found": 0},
-                    )
-                    return 0, None
-                else:
-                    await task_logger.log_task_failure(
-                        log_entry,
-                        f"Failed to get Google Calendar events: {error}",
-                        "API Error",
-                        {"error_type": "APIError"},
-                    )
-                    return 0, f"Failed to get Google Calendar events: {error}"
-
-            logger.info(f"Retrieved {len(events)} events from Google Calendar API")
-
-        except Exception as e:
-            logger.error(f"Error fetching Google Calendar events: {e!s}", exc_info=True)
-            return 0, f"Error fetching Google Calendar events: {e!s}"
-
-        # Process and index each event
-        documents_indexed = 0
-        skipped_events = []
-        documents_skipped = 0
-
-        for event in events:
-            try:
-                event_id = event.get("id")
-                event_summary = event.get("summary", "No Title")
-                calendar_id = event.get("calendarId", "")
-
-                if not event_id:
-                    logger.warning(f"Skipping event with missing ID: {event_summary}")
-                    skipped_events.append(f"{event_summary} (missing ID)")
-                    documents_skipped += 1
-                    continue
-
-                # Format event as markdown
-                event_markdown = calendar_client.format_event_to_markdown(event)
-
-                if not event_markdown.strip():
-                    logger.warning(f"Skipping event with no content: {event_summary}")
-                    skipped_events.append(f"{event_summary} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Create a simple summary for the document
-                start = event.get("start", {})
-                end = event.get("end", {})
-                start_time = start.get("dateTime") or start.get("date", "")
-                end_time = end.get("dateTime") or end.get("date", "")
-                location = event.get("location", "")
-                description = event.get("description", "")
-
-                summary_content = f"Google Calendar Event: {event_summary}\n\n"
-                summary_content += f"Calendar: {calendar_id}\n"
-                summary_content += f"Start: {start_time}\n"
-                summary_content += f"End: {end_time}\n"
-
-                if location:
-                    summary_content += f"Location: {location}\n"
-
-                if description:
-                    # Take first 300 characters of description for summary
-                    desc_preview = description[:300]
-                    if len(description) > 300:
-                        desc_preview += "..."
-                    summary_content += f"Description: {desc_preview}\n"
-
-                # Generate content hash
-                content_hash = generate_content_hash(event_markdown, search_space_id)
-
-                # Check if document already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for event {event_summary}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Generate embedding for the summary
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks - using the full event markdown
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(event_markdown)
-                ]
-
-                # Create and store new document
-                logger.info(f"Creating new document for event {event_summary}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Calendar Event - {event_summary}",
-                    document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
-                    document_metadata={
-                        "event_id": event_id,
-                        "event_summary": event_summary,
-                        "calendar_id": calendar_id,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                        "location": location,
-                        "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                    },
-                    content=summary_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-
-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new event {event_summary}")
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_events.append(
-                    f"{event.get('summary', 'Unknown')} (processing error)"
-                )
-                documents_skipped += 1
-                continue  # Skip this event and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        # Commit all changes
-        await session.commit()
-        logger.info(
-            "Successfully committed all Google Calendar document changes to database"
-        )
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Google Calendar indexing for connector {connector_id}",
-            {
-                "events_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_events_count": len(skipped_events),
-            },
-        )
-
-        logger.info(
-            f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
-        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Google Calendar indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Google Calendar events for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Google Calendar events: {e!s}", exc_info=True)
-        return 0, f"Failed to index Google Calendar events: {e!s}"
-
-
-async def index_google_gmail_messages(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-    max_messages: int = 100,
-) -> tuple[int, str]:
-    """
-    Index Gmail messages for a specific connector.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Gmail connector
-        search_space_id: ID of the search space
-        user_id: ID of the user
-        start_date: Start date for filtering messages (YYYY-MM-DD format)
-        end_date: End date for filtering messages (YYYY-MM-DD format)
-        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
-        max_messages: Maximum number of messages to fetch (default: 100)
-
-    Returns:
-        Tuple of (number_of_indexed_messages, status_message)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Calculate days back based on start_date
-    if start_date:
-        try:
-            start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
-            days_back = (datetime.now() - start_date_obj).days
-        except ValueError:
-            days_back = 30  # Default to 30 days if start_date is invalid
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="google_gmail_messages_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Gmail messages indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "max_messages": max_messages,
-            "days_back": days_back,
-        },
-    )
-
-    try:
-        # Get the connector from the database
-        result = await session.execute(
-            select(SearchSourceConnector).filter(
-                SearchSourceConnector.id == connector_id,
-                SearchSourceConnector.connector_type
-                == SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
-            )
-        )
-        connector = result.scalars().first()
-
-        if not connector:
-            error_msg = f"Gmail connector with ID {connector_id} not found"
-            await task_logger.log_task_failure(
-                log_entry, error_msg, {"error_type": "ConnectorNotFound"}
-            )
-            return 0, error_msg
-
-        # Create credentials from connector config
-        config_data = connector.config
-        credentials = Credentials(
-            token=config_data.get("token"),
-            refresh_token=config_data.get("refresh_token"),
-            token_uri=config_data.get("token_uri"),
-            client_id=config_data.get("client_id"),
-            client_secret=config_data.get("client_secret"),
-            scopes=config_data.get("scopes", []),
-        )
-
-        if (
-            not credentials.client_id
-            or not credentials.client_secret
-            or not credentials.refresh_token
-        ):
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Google gmail credentials not found in connector config for connector {connector_id}",
-                "Missing Google gmail credentials",
-                {"error_type": "MissingCredentials"},
-            )
-            return 0, "Google gmail credentials not found in connector config"
-
-        # Initialize Google gmail client
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Initializing Google gmail client for connector {connector_id}",
-            {"stage": "client_initialization"},
-        )
-
-        # Initialize Google gmail connector
-        gmail_connector = GoogleGmailConnector(credentials)
-
-        # Fetch recent Google gmail messages
-        logger.info(f"Fetching recent emails for connector {connector_id}")
-        messages, error = gmail_connector.get_recent_messages(
-            max_results=max_messages, days_back=days_back
-        )
-
-        if error:
-            await task_logger.log_task_failure(
-                log_entry, f"Failed to fetch messages: {error}", {}
-            )
-            return 0, f"Failed to fetch Gmail messages: {error}"
-
-        if not messages:
-            success_msg = "No Google gmail messages found in the specified date range"
-            await task_logger.log_task_success(
-                log_entry, success_msg, {"messages_count": 0}
-            )
-            return 0, success_msg
-
-        logger.info(f"Found {len(messages)} Google gmail messages to index")
-
-        documents_indexed = 0
-        skipped_messages = []
-        documents_skipped = 0
-        for message in messages:
-            try:
-                # Extract message information
-                message_id = message.get("id", "")
-                thread_id = message.get("threadId", "")
-
-                # Extract headers for subject and sender
-                payload = message.get("payload", {})
-                headers = payload.get("headers", [])
-
-                subject = "No Subject"
-                sender = "Unknown Sender"
-                date_str = "Unknown Date"
-
-                for header in headers:
-                    name = header.get("name", "").lower()
-                    value = header.get("value", "")
-                    if name == "subject":
-                        subject = value
-                    elif name == "from":
-                        sender = value
-                    elif name == "date":
-                        date_str = value
-
-                if not message_id:
-                    logger.warning(f"Skipping message with missing ID: {subject}")
-                    skipped_messages.append(f"{subject} (missing ID)")
-                    documents_skipped += 1
-                    continue
-
-                # Format message to markdown
-                markdown_content = gmail_connector.format_message_to_markdown(message)
-
-                if not markdown_content.strip():
-                    logger.warning(f"Skipping message with no content: {subject}")
-                    skipped_messages.append(f"{subject} (no content)")
-                    documents_skipped += 1
-                    continue
-
-                # Create a simple summary
-                summary_content = f"Google Gmail Message: {subject}\n\n"
-                summary_content += f"Sender: {sender}\n"
-                summary_content += f"Date: {date_str}\n"
-
-                # Generate content hash
-                content_hash = generate_content_hash(markdown_content, search_space_id)
-
-                # Check if document already exists
-                existing_doc_by_hash_result = await session.execute(
-                    select(Document).where(Document.content_hash == content_hash)
-                )
-                existing_document_by_hash = (
-                    existing_doc_by_hash_result.scalars().first()
-                )
-
-                if existing_document_by_hash:
-                    logger.info(
-                        f"Document with content hash {content_hash} already exists for message {message_id}. Skipping processing."
-                    )
-                    documents_skipped += 1
-                    continue
-
-                # Generate embedding for the summary
-                summary_embedding = config.embedding_model_instance.embed(
-                    summary_content
-                )
-
-                # Process chunks
-                chunks = [
-                    Chunk(
-                        content=chunk.text,
-                        embedding=config.embedding_model_instance.embed(chunk.text),
-                    )
-                    for chunk in config.chunker_instance.chunk(markdown_content)
-                ]
-
-                # Create and store new document
-                logger.info(f"Creating new document for Gmail message: {subject}")
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=f"Gmail: {subject}",
-                    document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
-                    document_metadata={
-                        "message_id": message_id,
-                        "thread_id": thread_id,
-                        "subject": subject,
-                        "sender": sender,
-                        "date": date_str,
-                        "connector_id": connector_id,
-                    },
-                    content=markdown_content,
-                    content_hash=content_hash,
-                    embedding=summary_embedding,
-                    chunks=chunks,
-                )
-                session.add(document)
-                documents_indexed += 1
-                logger.info(f"Successfully indexed new email {summary_content}")
-
-            except Exception as e:
-                logger.error(
-                    f"Error processing the email {message_id}: {e!s}",
-                    exc_info=True,
-                )
-                skipped_messages.append(f"{subject} (processing error)")
-                documents_skipped += 1
-                continue  # Skip this message and continue with others
-
-        # Update the last_indexed_at timestamp for the connector only if requested
-        total_processed = documents_indexed
-        if update_last_indexed:
-            connector.last_indexed_at = datetime.now()
-            logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
-
-        # Commit all changes
-        await session.commit()
-        logger.info(
-            "Successfully committed all Google gmail document changes to database"
-        )
-
-        # Log success
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Google gmail indexing for connector {connector_id}",
-            {
-                "events_processed": total_processed,
-                "documents_indexed": documents_indexed,
-                "documents_skipped": documents_skipped,
-                "skipped_messages_count": len(skipped_messages),
-            },
-        )
-
-        logger.info(
-            f"Google gmail indexing completed: {documents_indexed} new emails, {documents_skipped} skipped"
-        )
-        return (
-            total_processed,
-            None,
-        )  # Return None as the error message to indicate success
-
-    except SQLAlchemyError as db_error:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Google gmail indexing for connector {connector_id}",
-            str(db_error),
-            {"error_type": "SQLAlchemyError"},
-        )
-        logger.error(f"Database error: {db_error!s}", exc_info=True)
-        return 0, f"Database error: {db_error!s}"
-    except Exception as e:
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Failed to index Google gmail emails for connector {connector_id}",
-            str(e),
-            {"error_type": type(e).__name__},
-        )
-        logger.error(f"Failed to index Google gmail emails: {e!s}", exc_info=True)
-        return 0, f"Failed to index Google gmail emails: {e!s}"