diff --git a/.recurseml.yaml b/.recurseml.yaml
new file mode 100644
index 0000000..225cc5b
--- /dev/null
+++ b/.recurseml.yaml
@@ -0,0 +1 @@
+rules: .rules/
\ No newline at end of file
diff --git a/.rules/avoid_source_deduplication.mdc b/.rules/avoid_source_deduplication.mdc
new file mode 100644
index 0000000..e5fa19b
--- /dev/null
+++ b/.rules/avoid_source_deduplication.mdc
@@ -0,0 +1,28 @@
+```yaml
+name: avoid-source-deduplication
+description: Preserve unique source entries in search results to maintain proper citation tracking
+globs: ['**/connector_service.py', '**/search_service.py']
+alwaysApply: true
+```
+
+Search result processing should preserve all source entries to maintain accurate citation tracking, rather than deduplicating sources.
+
+❌ Bad - Deduplicating sources:
+```python
+mapped_sources = {}
+for chunk in chunks:
+ source_key = chunk.get('url') or chunk.get('title')
+ if source_key not in mapped_sources:
+ mapped_sources[source_key] = create_source(chunk)
+sources_list = list(mapped_sources.values())
+```
+
+✅ Good - Preserving unique sources:
+```python
+sources_list = []
+for chunk in chunks:
+ source = create_source(chunk)
+ sources_list.append(source)
+```
+
+Each chunk should maintain its unique source reference for proper citation tracking.
\ No newline at end of file
diff --git a/.rules/consistent_container_image_sources.mdc b/.rules/consistent_container_image_sources.mdc
new file mode 100644
index 0000000..ff3115b
--- /dev/null
+++ b/.rules/consistent_container_image_sources.mdc
@@ -0,0 +1,28 @@
+```yaml
+name: consistent-container-image-sources
+description: Maintain consistent image sources in Docker compose files using authorized registries
+globs: ['**/docker-compose.yml', '**/docker-compose.*.yml']
+alwaysApply: true
+```
+
+Docker compose files should use consistent image sources from authorized registries rather than local builds in production configurations.
+
+❌ Bad - Mixing build and image sources:
+```yaml
+services:
+ frontend:
+ build: ./frontend
+ backend:
+ image: ghcr.io/org/backend:latest
+```
+
+✅ Good - Consistent image sources:
+```yaml
+services:
+ frontend:
+ image: ghcr.io/org/frontend:latest
+ backend:
+ image: ghcr.io/org/backend:latest
+```
+
+Use build contexts only in development compose files.
\ No newline at end of file
diff --git a/.rules/no_env_files_in_repo.mdc b/.rules/no_env_files_in_repo.mdc
new file mode 100644
index 0000000..baac119
--- /dev/null
+++ b/.rules/no_env_files_in_repo.mdc
@@ -0,0 +1,22 @@
+```yaml
+name: no-env-files-in-repo
+description: Prevent committing environment and configuration files containing sensitive credentials
+globs: ['**/.env', '**/.env.*', '**/config/*.yml', '**/config/*.yaml']
+alwaysApply: true
+```
+
+Configuration files like `.env` should never be committed to version control as they often contain sensitive information like API keys, passwords, and tokens.
+
+❌ Bad - Committing .env files:
+```
+POSTGRES_DATABASE_URL=postgresql+psycopg2://user:password@localhost:5432/db
+API_KEY=sk-1234567890abcdef
+```
+
+✅ Good - Use .env.example instead:
+```
+POSTGRES_DATABASE_URL=postgresql+psycopg2://user:password@host:5432/dbname
+API_KEY=your-api-key-here
+```
+
+Add `.env` and similar config files to .gitignore and provide example templates instead.
\ No newline at end of file
diff --git a/.rules/require_unique_id_props.mdc b/.rules/require_unique_id_props.mdc
new file mode 100644
index 0000000..33adbc5
--- /dev/null
+++ b/.rules/require_unique_id_props.mdc
@@ -0,0 +1,28 @@
+```yaml
+name: require-unique-id-props
+description: Ensure unique key props are provided when mapping arrays to React elements
+globs: ['**/*.tsx', '**/*.jsx']
+alwaysApply: true
+```
+
+When mapping arrays to React elements, each element must have a unique key prop to help React efficiently update the DOM.
+
+❌ Bad - Missing key prop:
+```jsx
+{items.map((item) => (
+
+ {item.name}
+
+))}
+```
+
+✅ Good - With key prop:
+```jsx
+{items.map((item) => (
+
+ {item.name}
+
+))}
+```
+
+Keys should be stable, predictable, and unique among siblings.
\ No newline at end of file
diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index 0b4e2cf..58e3f73 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -8,6 +8,7 @@ AUTH_TYPE=GOOGLE or LOCAL
# For Google Auth Only
GOOGLE_OAUTH_CLIENT_ID=924507538m
GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
+GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
# Embedding Model
EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1
diff --git a/surfsense_backend/alembic/versions/17_add_google_calendar_connector_enums.py b/surfsense_backend/alembic/versions/17_add_google_calendar_connector_enums.py
new file mode 100644
index 0000000..5fe5bf6
--- /dev/null
+++ b/surfsense_backend/alembic/versions/17_add_google_calendar_connector_enums.py
@@ -0,0 +1,65 @@
+"""Add Google Calendar connector enums
+
+Revision ID: 17
+Revises: 16
+Create Date: 2024-02-01 12:00:00.000000
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "17"
+down_revision: str | None = "16"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+ """Safely add 'GOOGLE_CALENDAR_CONNECTOR' to enum types if missing."""
+
+ # Add to searchsourceconnectortype enum
+ op.execute(
+ """
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT 1 FROM pg_type t
+ JOIN pg_enum e ON t.oid = e.enumtypid
+ WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'GOOGLE_CALENDAR_CONNECTOR'
+ ) THEN
+ ALTER TYPE searchsourceconnectortype ADD VALUE 'GOOGLE_CALENDAR_CONNECTOR';
+ END IF;
+ END
+ $$;
+ """
+ )
+
+ # Add to documenttype enum
+ op.execute(
+ """
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT 1 FROM pg_type t
+ JOIN pg_enum e ON t.oid = e.enumtypid
+ WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_CALENDAR_CONNECTOR'
+ ) THEN
+ ALTER TYPE documenttype ADD VALUE 'GOOGLE_CALENDAR_CONNECTOR';
+ END IF;
+ END
+ $$;
+ """
+ )
+
+
+def downgrade() -> None:
+ """Remove 'GOOGLE_CALENDAR_CONNECTOR' from enum types."""
+
+ # Note: PostgreSQL doesn't support removing enum values directly
+ # This would require recreating the enrelum type, which is complex
+ # For now, we'll leave the enum values in place
+ # In a production environment, you might want to implement a more sophisticated downgrade
+ pass
diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py
index 7882fed..6e27aa3 100644
--- a/surfsense_backend/app/agents/researcher/nodes.py
+++ b/surfsense_backend/app/agents/researcher/nodes.py
@@ -271,6 +271,49 @@ async def fetch_documents_by_ids(
else:
url = ""
+ elif doc_type == "GOOGLE_CALENDAR_CONNECTOR":
+ # Extract Google Calendar-specific metadata
+ event_id = metadata.get("event_id", "Unknown Event")
+ event_summary = metadata.get("event_summary", "Untitled Event")
+ calendar_id = metadata.get("calendar_id", "")
+ start_time = metadata.get("start_time", "")
+ location = metadata.get("location", "")
+
+ title = f"Calendar: {event_summary}"
+ if start_time:
+ # Format the start time for display
+ try:
+ if "T" in start_time:
+ from datetime import datetime
+
+ start_dt = datetime.fromisoformat(
+ start_time.replace("Z", "+00:00")
+ )
+ formatted_time = start_dt.strftime("%Y-%m-%d %H:%M")
+ title += f" ({formatted_time})"
+ else:
+ title += f" ({start_time})"
+ except Exception:
+ title += f" ({start_time})"
+
+ description = (
+ doc.content[:100] + "..."
+ if len(doc.content) > 100
+ else doc.content
+ )
+ if location:
+ description += f" | Location: {location}"
+ if calendar_id and calendar_id != "primary":
+ description += f" | Calendar: {calendar_id}"
+
+ # Construct Google Calendar URL
+ if event_id:
+ url = (
+ f"https://calendar.google.com/calendar/event?eid={event_id}"
+ )
+ else:
+ url = ""
+
elif doc_type == "EXTENSION":
# Extract Extension-specific metadata
webpage_title = metadata.get("VisitedWebPageTitle", doc.title)
@@ -919,6 +962,32 @@ async def fetch_relevant_documents(
)
}
)
+ elif connector == "GOOGLE_CALENDAR_CONNECTOR":
+ (
+ source_object,
+ calendar_chunks,
+ ) = await connector_service.search_google_calendar(
+ user_query=reformulated_query,
+ user_id=user_id,
+ search_space_id=search_space_id,
+ top_k=top_k,
+ search_mode=search_mode,
+ )
+
+ # Add to sources and raw documents
+ if source_object:
+ all_sources.append(source_object)
+ all_raw_documents.extend(calendar_chunks)
+
+ # Stream found document count
+ if streaming_service and writer:
+ writer(
+ {
+ "yield_value": streaming_service.format_terminal_info_delta(
+ f"📅 Found {len(calendar_chunks)} calendar events related to your query"
+ )
+ }
+ )
elif connector == "CONFLUENCE_CONNECTOR":
(
source_object,
diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
index 608c165..a7554aa 100644
--- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
+++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py
@@ -18,6 +18,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
+- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)
diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
index 83125e6..5080c1b 100644
--- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
+++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py
@@ -15,6 +15,10 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
+- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
+- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
+- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
+- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)
diff --git a/surfsense_backend/app/agents/researcher/utils.py b/surfsense_backend/app/agents/researcher/utils.py
index e26788c..21969e6 100644
--- a/surfsense_backend/app/agents/researcher/utils.py
+++ b/surfsense_backend/app/agents/researcher/utils.py
@@ -47,6 +47,7 @@ def get_connector_emoji(connector_name: str) -> str:
"DISCORD_CONNECTOR": "🗨️",
"TAVILY_API": "🔍",
"LINKUP_API": "🔗",
+ "GOOGLE_CALENDAR_CONNECTOR": "📅",
}
return connector_emojis.get(connector_name, "🔎")
@@ -63,6 +64,8 @@ def get_connector_friendly_name(connector_name: str) -> str:
"GITHUB_CONNECTOR": "GitHub",
"LINEAR_CONNECTOR": "Linear",
"JIRA_CONNECTOR": "Jira",
+ "CONFLUENCE_CONNECTOR": "Confluence",
+ "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar",
"DISCORD_CONNECTOR": "Discord",
"TAVILY_API": "Tavily Search",
"LINKUP_API": "Linkup Search",
diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index 48de86d..38a1f3e 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -41,11 +41,15 @@ class Config:
NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL")
- # AUTH: Google OAuth
+ # Auth
AUTH_TYPE = os.getenv("AUTH_TYPE")
- if AUTH_TYPE == "GOOGLE":
- GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
- GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
+
+ # Google OAuth
+ GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
+ GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
+
+ # Google Calendar redirect URI
+ GOOGLE_CALENDAR_REDIRECT_URI = os.getenv("GOOGLE_CALENDAR_REDIRECT_URI")
# LLM instances are now managed per-user through the LLMConfig system
# Legacy environment variables removed in favor of user-specific configurations
diff --git a/surfsense_backend/app/connectors/google_calendar_connector.py b/surfsense_backend/app/connectors/google_calendar_connector.py
new file mode 100644
index 0000000..3d7ca2e
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_calendar_connector.py
@@ -0,0 +1,280 @@
+"""
+Google Calendar Connector Module | Google OAuth Credentials | Google Calendar API
+A module for retrieving calendar events from Google Calendar using Google OAuth credentials.
+Allows fetching events from specified calendars within date ranges using Google OAuth credentials.
+"""
+
+from datetime import datetime
+from typing import Any
+
+import pytz
+from dateutil.parser import isoparse
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+
+
+class GoogleCalendarConnector:
+ """Class for retrieving data from Google Calendar using Google OAuth credentials."""
+
+ def __init__(
+ self,
+ credentials: Credentials,
+ ):
+ """
+ Initialize the GoogleCalendarConnector class.
+ Args:
+ credentials: Google OAuth Credentials object
+ """
+ self._credentials = credentials
+ self.service = None
+
+ def _get_credentials(self) -> Credentials:
+ """
+ Get valid Google OAuth credentials.
+ Returns:
+ Google OAuth credentials
+ Raises:
+ ValueError: If credentials have not been set
+ Exception: If credential refresh fails
+ """
+ if not all(
+ [
+ self._credentials.client_id,
+ self._credentials.client_secret,
+ self._credentials.refresh_token,
+ ]
+ ):
+ raise ValueError(
+ "Google OAuth credentials (client_id, client_secret, refresh_token) must be set"
+ )
+
+ if self._credentials and not self._credentials.expired:
+ return self._credentials
+
+ # Create credentials from refresh token
+ self._credentials = Credentials(
+ token=self._credentials.token,
+ refresh_token=self._credentials.refresh_token,
+ token_uri=self._credentials.token_uri,
+ client_id=self._credentials.client_id,
+ client_secret=self._credentials.client_secret,
+ scopes=self._credentials.scopes,
+ )
+
+ # Refresh the token if needed
+ if self._credentials.expired or not self._credentials.valid:
+ try:
+ self._credentials.refresh(Request())
+ except Exception as e:
+ raise Exception(
+ f"Failed to refresh Google OAuth credentials: {e!s}"
+ ) from e
+
+ return self._credentials
+
+ def _get_service(self):
+ """
+ Get the Google Calendar service instance using Google OAuth credentials.
+ Returns:
+ Google Calendar service instance
+ Raises:
+ ValueError: If credentials have not been set
+ Exception: If service creation fails
+ """
+ if self.service:
+ return self.service
+
+ try:
+ credentials = self._get_credentials()
+ self.service = build("calendar", "v3", credentials=credentials)
+ return self.service
+ except Exception as e:
+ raise Exception(f"Failed to create Google Calendar service: {e!s}") from e
+
+ def get_calendars(self) -> tuple[list[dict[str, Any]], str | None]:
+ """
+ Fetch list of user's calendars using Google OAuth credentials.
+ Returns:
+ Tuple containing (calendars list, error message or None)
+ """
+ try:
+ service = self._get_service()
+ calendars_result = service.calendarList().list().execute()
+ calendars = calendars_result.get("items", [])
+
+ # Format calendar data
+ formatted_calendars = []
+ for calendar in calendars:
+ formatted_calendars.append(
+ {
+ "id": calendar.get("id"),
+ "summary": calendar.get("summary"),
+ "description": calendar.get("description", ""),
+ "primary": calendar.get("primary", False),
+ "accessRole": calendar.get("accessRole"),
+ "timeZone": calendar.get("timeZone"),
+ }
+ )
+
+ return formatted_calendars, None
+
+ except Exception as e:
+ return [], f"Error fetching calendars: {e!s}"
+
+ def get_all_primary_calendar_events(
+ self,
+ start_date: str,
+ end_date: str,
+ max_results: int = 2500,
+ ) -> tuple[list[dict[str, Any]], str | None]:
+ """
+ Fetch events from the primary calendar using Google OAuth credentials.
+ Args:
+ max_results: Maximum number of events to fetch (default: 2500)
+ Returns:
+ Tuple containing (events list, error message or None)
+ """
+ try:
+ service = self._get_service()
+
+ # Parse both dates
+ dt_start = isoparse(start_date)
+ dt_end = isoparse(end_date)
+
+ if dt_start.tzinfo is None:
+ dt_start = dt_start.replace(tzinfo=pytz.UTC)
+ else:
+ dt_start = dt_start.astimezone(pytz.UTC)
+
+ if dt_end.tzinfo is None:
+ dt_end = dt_end.replace(tzinfo=pytz.UTC)
+ else:
+ dt_end = dt_end.astimezone(pytz.UTC)
+
+ if dt_start >= dt_end:
+ return [], (
+ f"start_date ({dt_start.isoformat()}) must be strictly before "
+ f"end_date ({dt_end.isoformat()})."
+ )
+
+ # RFC3339 with 'Z' for UTC
+ time_min = dt_start.isoformat().replace("+00:00", "Z")
+ time_max = dt_end.isoformat().replace("+00:00", "Z")
+
+ # Fetch events
+ events_result = (
+ service.events()
+ .list(
+ calendarId="primary",
+ maxResults=max_results,
+ singleEvents=True,
+ orderBy="startTime",
+ timeMin=time_min,
+ timeMax=time_max,
+ )
+ .execute()
+ )
+
+ events = events_result.get("items", [])
+
+ if not events:
+ return [], "No events found in the specified date range."
+
+ return events, None
+
+ except Exception as e:
+ return [], f"Error fetching events: {e!s}"
+
+ def format_event_to_markdown(self, event: dict[str, Any]) -> str:
+ """
+ Format a Google Calendar event to markdown.
+ Args:
+ event: Event object from Google Calendar API
+ Returns:
+ Formatted markdown string
+ """
+ # Extract basic event information
+ summary = event.get("summary", "No Title")
+ description = event.get("description", "")
+ location = event.get("location", "")
+ calendar_id = event.get("calendarId", "")
+
+ # Extract start and end times
+ start = event.get("start", {})
+ end = event.get("end", {})
+
+ start_time = start.get("dateTime") or start.get("date", "")
+ end_time = end.get("dateTime") or end.get("date", "")
+
+ # Format times for display
+ if start_time:
+ try:
+ if "T" in start_time: # DateTime format
+ start_dt = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
+ start_formatted = start_dt.strftime("%Y-%m-%d %H:%M")
+ else: # Date format (all-day event)
+ start_formatted = start_time
+ except Exception:
+ start_formatted = start_time
+ else:
+ start_formatted = "Unknown"
+
+ if end_time:
+ try:
+ if "T" in end_time: # DateTime format
+ end_dt = datetime.fromisoformat(end_time.replace("Z", "+00:00"))
+ end_formatted = end_dt.strftime("%Y-%m-%d %H:%M")
+ else: # Date format (all-day event)
+ end_formatted = end_time
+ except Exception:
+ end_formatted = end_time
+ else:
+ end_formatted = "Unknown"
+
+ # Extract attendees
+ attendees = event.get("attendees", [])
+ attendee_list = []
+ for attendee in attendees:
+ email = attendee.get("email", "")
+ display_name = attendee.get("displayName", email)
+ response_status = attendee.get("responseStatus", "")
+ attendee_list.append(f"- {display_name} ({response_status})")
+
+ # Build markdown content
+ markdown_content = f"# {summary}\n\n"
+
+ # Add event details
+ markdown_content += f"**Start:** {start_formatted}\n"
+ markdown_content += f"**End:** {end_formatted}\n"
+
+ if location:
+ markdown_content += f"**Location:** {location}\n"
+
+ if calendar_id:
+ markdown_content += f"**Calendar:** {calendar_id}\n"
+
+ markdown_content += "\n"
+
+ # Add description if available
+ if description:
+ markdown_content += f"## Description\n\n{description}\n\n"
+
+ # Add attendees if available
+ if attendee_list:
+ markdown_content += "## Attendees\n\n"
+ markdown_content += "\n".join(attendee_list)
+ markdown_content += "\n\n"
+
+ # Add event metadata
+ markdown_content += "## Event Details\n\n"
+ markdown_content += f"- **Event ID:** {event.get('id', 'Unknown')}\n"
+ markdown_content += f"- **Created:** {event.get('created', 'Unknown')}\n"
+ markdown_content += f"- **Updated:** {event.get('updated', 'Unknown')}\n"
+
+ if event.get("recurringEventId"):
+ markdown_content += (
+ f"- **Recurring Event ID:** {event.get('recurringEventId')}\n"
+ )
+
+ return markdown_content
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index d749b3b..f572438 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -46,6 +46,7 @@ class DocumentType(str, Enum):
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
+ GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
class SearchSourceConnectorType(str, Enum):
@@ -60,6 +61,7 @@ class SearchSourceConnectorType(str, Enum):
JIRA_CONNECTOR = "JIRA_CONNECTOR"
CONFLUENCE_CONNECTOR = "CONFLUENCE_CONNECTOR"
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
+ GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
class ChatType(str, Enum):
diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index 91c41ee..3e9c6ba 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -2,6 +2,9 @@ from fastapi import APIRouter
from .chats_routes import router as chats_router
from .documents_routes import router as documents_router
+from .google_calendar_add_connector_route import (
+ router as google_calendar_add_connector_router,
+)
from .llm_config_routes import router as llm_config_router
from .logs_routes import router as logs_router
from .podcasts_routes import router as podcasts_router
@@ -15,5 +18,6 @@ router.include_router(documents_router)
router.include_router(podcasts_router)
router.include_router(chats_router)
router.include_router(search_source_connectors_router)
+router.include_router(google_calendar_add_connector_router)
router.include_router(llm_config_router)
router.include_router(logs_router)
diff --git a/surfsense_backend/app/routes/google_calendar_add_connector_route.py b/surfsense_backend/app/routes/google_calendar_add_connector_route.py
new file mode 100644
index 0000000..a319c87
--- /dev/null
+++ b/surfsense_backend/app/routes/google_calendar_add_connector_route.py
@@ -0,0 +1,160 @@
+import os
+
+os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
+
+import base64
+import json
+import logging
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import RedirectResponse
+from google_auth_oauthlib.flow import Flow
+from pydantic import ValidationError
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.config import config
+from app.db import (
+ SearchSourceConnector,
+ SearchSourceConnectorType,
+ User,
+ get_async_session,
+)
+from app.users import current_active_user
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+SCOPES = ["https://www.googleapis.com/auth/calendar.readonly"]
+REDIRECT_URI = config.GOOGLE_CALENDAR_REDIRECT_URI
+
+
+def get_google_flow():
+ try:
+ return Flow.from_client_config(
+ {
+ "web": {
+ "client_id": config.GOOGLE_OAUTH_CLIENT_ID,
+ "client_secret": config.GOOGLE_OAUTH_CLIENT_SECRET,
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+ "token_uri": "https://oauth2.googleapis.com/token",
+ "redirect_uris": [REDIRECT_URI],
+ }
+ },
+ scopes=SCOPES,
+ redirect_uri=REDIRECT_URI,
+ )
+ except Exception as e:
+ raise HTTPException(
+ status_code=500, detail=f"Failed to create Google flow: {e!s}"
+ ) from e
+
+
+@router.get("/auth/google/calendar/connector/add/")
+async def connect_calendar(space_id: int, user: User = Depends(current_active_user)):
+ try:
+ if not space_id:
+ raise HTTPException(status_code=400, detail="space_id is required")
+
+ flow = get_google_flow()
+
+ # Encode space_id and user_id in state
+ state_payload = json.dumps(
+ {
+ "space_id": space_id,
+ "user_id": str(user.id),
+ }
+ )
+ state_encoded = base64.urlsafe_b64encode(state_payload.encode()).decode()
+
+ auth_url, _ = flow.authorization_url(
+ access_type="offline",
+ prompt="consent",
+ include_granted_scopes="true",
+ state=state_encoded,
+ )
+ return {"auth_url": auth_url}
+ except Exception as e:
+ raise HTTPException(
+ status_code=500, detail=f"Failed to initiate Google OAuth: {e!s}"
+ ) from e
+
+
+@router.get("/auth/google/calendar/connector/callback/")
+async def calendar_callback(
+ request: Request,
+ code: str,
+ state: str,
+ session: AsyncSession = Depends(get_async_session),
+):
+ try:
+ # Decode and parse the state
+ decoded_state = base64.urlsafe_b64decode(state.encode()).decode()
+ data = json.loads(decoded_state)
+
+ user_id = UUID(data["user_id"])
+ space_id = data["space_id"]
+
+ flow = get_google_flow()
+ flow.fetch_token(code=code)
+
+ creds = flow.credentials
+ creds_dict = json.loads(creds.to_json())
+
+ try:
+ # Check if a connector with the same type already exists for this user
+ result = await session.execute(
+ select(SearchSourceConnector).filter(
+ SearchSourceConnector.user_id == user_id,
+ SearchSourceConnector.connector_type
+ == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+ )
+ )
+ existing_connector = result.scalars().first()
+ if existing_connector:
+ raise HTTPException(
+ status_code=409,
+ detail="A GOOGLE_CALENDAR_CONNECTOR connector already exists. Each user can have only one connector of each type.",
+ )
+ db_connector = SearchSourceConnector(
+ name="Google Calendar Connector",
+ connector_type=SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+ config=creds_dict,
+ user_id=user_id,
+ is_indexable=True,
+ )
+ session.add(db_connector)
+ await session.commit()
+ await session.refresh(db_connector)
+ return RedirectResponse(
+ f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/add/google-calendar-connector?success=true"
+ )
+ except ValidationError as e:
+ await session.rollback()
+ raise HTTPException(
+ status_code=422, detail=f"Validation error: {e!s}"
+ ) from e
+ except IntegrityError as e:
+ await session.rollback()
+ raise HTTPException(
+ status_code=409,
+ detail=f"Integrity error: A connector with this type already exists. {e!s}",
+ ) from e
+ except HTTPException:
+ await session.rollback()
+ raise
+ except Exception as e:
+ logger.error(f"Failed to create search source connector: {e!s}")
+ await session.rollback()
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to create search source connector: {e!s}",
+ ) from e
+
+ except Exception as e:
+ raise HTTPException(
+ status_code=500, detail=f"Failed to complete Google OAuth: {e!s}"
+ ) from e
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index f91bd9f..a65f1cf 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -40,6 +40,7 @@ from app.tasks.connectors_indexing_tasks import (
index_confluence_pages,
index_discord_messages,
index_github_repos,
+ index_google_calendar_events,
index_jira_issues,
index_linear_issues,
index_notion_pages,
@@ -489,6 +490,24 @@ async def index_connector_content(
)
response_message = "ClickUp indexing started in the background."
+ elif (
+ connector.connector_type
+ == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR
+ ):
+ # Run indexing in background
+ logger.info(
+ f"Triggering Google Calendar indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
+ )
+ background_tasks.add_task(
+ run_google_calendar_indexing_with_new_session,
+ connector_id,
+ search_space_id,
+ str(user.id),
+ indexing_from,
+ indexing_to,
+ )
+ response_message = "Google Calendar indexing started in the background."
+
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
# Run indexing in background
logger.info(
@@ -1034,3 +1053,63 @@ async def run_clickup_indexing(
exc_info=True,
)
# Optionally update status in DB to indicate failure
+
+
+# Add new helper functions for Google Calendar indexing
+async def run_google_calendar_indexing_with_new_session(
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str,
+ end_date: str,
+):
+ """Wrapper to run Google Calendar indexing with its own database session."""
+ logger.info(
+ f"Background task started: Indexing Google Calendar connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
+ )
+ async with async_session_maker() as session:
+ await run_google_calendar_indexing(
+ session, connector_id, search_space_id, user_id, start_date, end_date
+ )
+ logger.info(
+ f"Background task finished: Indexing Google Calendar connector {connector_id}"
+ )
+
+
+async def run_google_calendar_indexing(
+ session: AsyncSession,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str,
+ end_date: str,
+):
+ """Runs the Google Calendar indexing task and updates the timestamp."""
+ try:
+ indexed_count, error_message = await index_google_calendar_events(
+ session,
+ connector_id,
+ search_space_id,
+ user_id,
+ start_date,
+ end_date,
+ update_last_indexed=False,
+ )
+ if error_message:
+ logger.error(
+ f"Google Calendar indexing failed for connector {connector_id}: {error_message}"
+ )
+ # Optionally update status in DB to indicate failure
+ else:
+ logger.info(
+ f"Google Calendar indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
+ )
+ # Update the last indexed timestamp only on success
+ await update_connector_last_indexed(session, connector_id)
+ await session.commit() # Commit timestamp update
+ except Exception as e:
+ logger.error(
+ f"Critical error in run_google_calendar_indexing for connector {connector_id}: {e}",
+ exc_info=True,
+ )
+ # Optionally update status in DB to indicate failure
diff --git a/surfsense_backend/app/schemas/google_auth_credentials.py b/surfsense_backend/app/schemas/google_auth_credentials.py
new file mode 100644
index 0000000..16e112e
--- /dev/null
+++ b/surfsense_backend/app/schemas/google_auth_credentials.py
@@ -0,0 +1,18 @@
+from datetime import UTC, datetime
+
+from pydantic import BaseModel
+
+
+class GoogleAuthCredentialsBase(BaseModel):
+ token: str
+ refresh_token: str
+ token_uri: str
+ client_id: str
+ expiry: datetime
+ scopes: list[str]
+ client_secret: str
+
+ @property
+ def is_expired(self) -> bool:
+ """Check if the credentials have expired."""
+ return self.expiry <= datetime.now(UTC)
diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py
index 28d5425..4c36893 100644
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@@ -5,6 +5,7 @@ from typing import Any
from pydantic import BaseModel, ConfigDict, field_validator
from app.db import SearchSourceConnectorType
+from app.schemas.google_auth_credentials import GoogleAuthCredentialsBase
from .base import IDModel, TimestampModel
@@ -179,6 +180,14 @@ class SearchSourceConnectorBase(BaseModel):
if not config.get("CLICKUP_API_TOKEN"):
raise ValueError("CLICKUP_API_TOKEN cannot be empty")
+ elif connector_type == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR:
+ # Required fields
+ required_keys = list(GoogleAuthCredentialsBase.model_fields.keys())
+
+ for key in required_keys:
+ if key not in config or config[key] in (None, ""):
+ raise ValueError(f"{key} is required and cannot be empty")
+
return config
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index 3b3cce7..d063a86 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -1073,6 +1073,141 @@ class ConnectorService:
return result_object, jira_chunks
+ async def search_google_calendar(
+ self,
+ user_query: str,
+ user_id: str,
+ search_space_id: int,
+ top_k: int = 20,
+ search_mode: SearchMode = SearchMode.CHUNKS,
+ ) -> tuple:
+ """
+ Search for Google Calendar events and return both the source information and langchain documents
+
+ Args:
+ user_query: The user's query
+ user_id: The user's ID
+ search_space_id: The search space ID to search in
+ top_k: Maximum number of results to return
+ search_mode: Search mode (CHUNKS or DOCUMENTS)
+
+ Returns:
+ tuple: (sources_info, langchain_documents)
+ """
+ if search_mode == SearchMode.CHUNKS:
+ calendar_chunks = await self.chunk_retriever.hybrid_search(
+ query_text=user_query,
+ top_k=top_k,
+ user_id=user_id,
+ search_space_id=search_space_id,
+ document_type="GOOGLE_CALENDAR_CONNECTOR",
+ )
+ elif search_mode == SearchMode.DOCUMENTS:
+ calendar_chunks = await self.document_retriever.hybrid_search(
+ query_text=user_query,
+ top_k=top_k,
+ user_id=user_id,
+ search_space_id=search_space_id,
+ document_type="GOOGLE_CALENDAR_CONNECTOR",
+ )
+ # Transform document retriever results to match expected format
+ calendar_chunks = self._transform_document_results(calendar_chunks)
+
+ # Early return if no results
+ if not calendar_chunks:
+ return {
+ "id": 31,
+ "name": "Google Calendar Events",
+ "type": "GOOGLE_CALENDAR_CONNECTOR",
+ "sources": [],
+ }, []
+
+ # Process each chunk and create sources directly without deduplication
+ sources_list = []
+ async with self.counter_lock:
+ for _i, chunk in enumerate(calendar_chunks):
+ # Extract document metadata
+ document = chunk.get("document", {})
+ metadata = document.get("metadata", {})
+
+ # Extract Google Calendar-specific metadata
+ event_id = metadata.get("event_id", "")
+ event_summary = metadata.get("event_summary", "Untitled Event")
+ calendar_id = metadata.get("calendar_id", "")
+ start_time = metadata.get("start_time", "")
+ end_time = metadata.get("end_time", "")
+ location = metadata.get("location", "")
+
+ # Create a more descriptive title for calendar events
+ title = f"Calendar: {event_summary}"
+ if start_time:
+ # Format the start time for display
+ try:
+ if "T" in start_time:
+ from datetime import datetime
+
+ start_dt = datetime.fromisoformat(
+ start_time.replace("Z", "+00:00")
+ )
+ formatted_time = start_dt.strftime("%Y-%m-%d %H:%M")
+ title += f" ({formatted_time})"
+ else:
+ title += f" ({start_time})"
+ except Exception:
+ title += f" ({start_time})"
+
+ # Create a more descriptive description for calendar events
+ description = chunk.get("content", "")[:100]
+ if len(description) == 100:
+ description += "..."
+
+ # Add event info to description
+ info_parts = []
+ if location:
+ info_parts.append(f"Location: {location}")
+ if calendar_id and calendar_id != "primary":
+ info_parts.append(f"Calendar: {calendar_id}")
+ if end_time:
+ info_parts.append(f"End: {end_time}")
+
+ if info_parts:
+ if description:
+ description += f" | {' | '.join(info_parts)}"
+ else:
+ description = " | ".join(info_parts)
+
+ # For URL, we could construct a URL to the Google Calendar event
+ url = ""
+ if event_id and calendar_id:
+ # Google Calendar event URL format
+ url = f"https://calendar.google.com/calendar/event?eid={event_id}"
+
+ source = {
+ "id": document.get("id", self.source_id_counter),
+ "title": title,
+ "description": description,
+ "url": url,
+ "event_id": event_id,
+ "event_summary": event_summary,
+ "calendar_id": calendar_id,
+ "start_time": start_time,
+ "end_time": end_time,
+ "location": location,
+ }
+
+ self.source_id_counter += 1
+ sources_list.append(source)
+
+ # Create result object
+ result_object = {
+ "id": 31, # Assign a unique ID for the Google Calendar connector
+ "name": "Google Calendar Events",
+ "type": "GOOGLE_CALENDAR_CONNECTOR",
+ "sources": sources_list,
+ }
+
+ return result_object, calendar_chunks
+
async def search_confluence(
self,
user_query: str,
diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
index 5f257fb..72bc18c 100644
--- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py
+++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
@@ -2,6 +2,7 @@ import asyncio
import logging
from datetime import UTC, datetime, timedelta
+from google.oauth2.credentials import Credentials
from slack_sdk.errors import SlackApiError
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@@ -12,6 +13,7 @@ from app.connectors.clickup_connector import ClickUpConnector
from app.connectors.confluence_connector import ConfluenceConnector
from app.connectors.discord_connector import DiscordConnector
from app.connectors.github_connector import GitHubConnector
+from app.connectors.google_calendar_connector import GoogleCalendarConnector
from app.connectors.jira_connector import JiraConnector
from app.connectors.linear_connector import LinearConnector
from app.connectors.notion_history import NotionHistoryConnector
@@ -3012,3 +3014,362 @@ async def index_clickup_tasks(
)
logger.error(f"Failed to index ClickUp tasks: {e!s}", exc_info=True)
return 0, f"Failed to index ClickUp tasks: {e!s}"
+
+
+async def index_google_calendar_events(
+ session: AsyncSession,
+ connector_id: int,
+ search_space_id: int,
+ user_id: str,
+ start_date: str | None = None,
+ end_date: str | None = None,
+ update_last_indexed: bool = True,
+) -> tuple[int, str | None]:
+ """
+ Index Google Calendar events.
+
+ Args:
+ session: Database session
+ connector_id: ID of the Google Calendar connector
+ search_space_id: ID of the search space to store documents in
+ user_id: User ID
+ start_date: Start date for indexing (YYYY-MM-DD format)
+ end_date: End date for indexing (YYYY-MM-DD format)
+ update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+ Returns:
+ Tuple containing (number of documents indexed, error message or None)
+ """
+ task_logger = TaskLoggingService(session, search_space_id)
+
+ # Log task start
+ log_entry = await task_logger.log_task_start(
+ task_name="google_calendar_events_indexing",
+ source="connector_indexing_task",
+ message=f"Starting Google Calendar events indexing for connector {connector_id}",
+ metadata={
+ "connector_id": connector_id,
+ "user_id": str(user_id),
+ "start_date": start_date,
+ "end_date": end_date,
+ },
+ )
+
+ try:
+ # Get the connector from the database
+ result = await session.execute(
+ select(SearchSourceConnector).filter(
+ SearchSourceConnector.id == connector_id,
+ SearchSourceConnector.connector_type
+ == SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
+ )
+ )
+ connector = result.scalars().first()
+
+ if not connector:
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Connector with ID {connector_id} not found",
+ "Connector not found",
+ {"error_type": "ConnectorNotFound"},
+ )
+ return 0, f"Connector with ID {connector_id} not found"
+
+ # Get the Google Calendar credentials from the connector config
+ credentials = Credentials(
+ token=connector.config.get("token"),
+ refresh_token=connector.config.get("refresh_token"),
+ token_uri=connector.config.get("token_uri"),
+ client_id=connector.config.get("client_id"),
+ client_secret=connector.config.get("client_secret"),
+ scopes=connector.config.get("scopes"),
+ )
+
+ if (
+ not credentials.client_id
+ or not credentials.client_secret
+ or not credentials.refresh_token
+ ):
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Google Calendar credentials not found in connector config for connector {connector_id}",
+ "Missing Google Calendar credentials",
+ {"error_type": "MissingCredentials"},
+ )
+ return 0, "Google Calendar credentials not found in connector config"
+
+ # Initialize Google Calendar client
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Initializing Google Calendar client for connector {connector_id}",
+ {"stage": "client_initialization"},
+ )
+
+ calendar_client = GoogleCalendarConnector(credentials=credentials)
+
+ # Calculate date range
+ if start_date is None or end_date is None:
+ # Fall back to calculating dates based on last_indexed_at
+ calculated_end_date = datetime.now()
+
+ # Use last_indexed_at as start date if available, otherwise use 30 days ago
+ if connector.last_indexed_at:
+ # Convert dates to be comparable (both timezone-naive)
+ last_indexed_naive = (
+ connector.last_indexed_at.replace(tzinfo=None)
+ if connector.last_indexed_at.tzinfo
+ else connector.last_indexed_at
+ )
+
+ # Check if last_indexed_at is in the future or after end_date
+ if last_indexed_naive > calculated_end_date:
+ logger.warning(
+ f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 30 days ago instead."
+ )
+ calculated_start_date = calculated_end_date - timedelta(days=30)
+ else:
+ calculated_start_date = last_indexed_naive
+ logger.info(
+ f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
+ )
+ else:
+ calculated_start_date = calculated_end_date - timedelta(
+ days=30
+ ) # Use 30 days as default for calendar events
+ logger.info(
+ f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (30 days ago) as start date"
+ )
+
+ # Use calculated dates if not provided
+ start_date_str = (
+ start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
+ )
+ end_date_str = (
+ end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
+ )
+ else:
+ # Use provided dates
+ start_date_str = start_date
+ end_date_str = end_date
+
+ await task_logger.log_task_progress(
+ log_entry,
+ f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
+ {
+ "stage": "fetching_events",
+ "start_date": start_date_str,
+ "end_date": end_date_str,
+ },
+ )
+
+ # Get events within date range from primary calendar
+ try:
+ events, error = calendar_client.get_all_primary_calendar_events(
+ start_date=start_date_str, end_date=end_date_str
+ )
+
+ if error:
+ logger.error(f"Failed to get Google Calendar events: {error}")
+
+ # Don't treat "No events found" as an error that should stop indexing
+ if "No events found" in error:
+ logger.info(
+ "No events found is not a critical error, continuing with update"
+ )
+ if update_last_indexed:
+ connector.last_indexed_at = datetime.now()
+ await session.commit()
+ logger.info(
+ f"Updated last_indexed_at to {connector.last_indexed_at} despite no events found"
+ )
+
+ await task_logger.log_task_success(
+ log_entry,
+ f"No Google Calendar events found in date range {start_date_str} to {end_date_str}",
+ {"events_found": 0},
+ )
+ return 0, None
+ else:
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Failed to get Google Calendar events: {error}",
+ "API Error",
+ {"error_type": "APIError"},
+ )
+ return 0, f"Failed to get Google Calendar events: {error}"
+
+ logger.info(f"Retrieved {len(events)} events from Google Calendar API")
+
+ except Exception as e:
+ logger.error(f"Error fetching Google Calendar events: {e!s}", exc_info=True)
+ return 0, f"Error fetching Google Calendar events: {e!s}"
+
+ # Process and index each event
+ documents_indexed = 0
+ skipped_events = []
+ documents_skipped = 0
+
+ for event in events:
+ try:
+ event_id = event.get("id")
+ event_summary = event.get("summary", "No Title")
+ calendar_id = event.get("calendarId", "")
+
+ if not event_id:
+ logger.warning(f"Skipping event with missing ID: {event_summary}")
+ skipped_events.append(f"{event_summary} (missing ID)")
+ documents_skipped += 1
+ continue
+
+ # Format event as markdown
+ event_markdown = calendar_client.format_event_to_markdown(event)
+
+ if not event_markdown.strip():
+ logger.warning(f"Skipping event with no content: {event_summary}")
+ skipped_events.append(f"{event_summary} (no content)")
+ documents_skipped += 1
+ continue
+
+ # Create a simple summary for the document
+ start = event.get("start", {})
+ end = event.get("end", {})
+ start_time = start.get("dateTime") or start.get("date", "")
+ end_time = end.get("dateTime") or end.get("date", "")
+ location = event.get("location", "")
+ description = event.get("description", "")
+
+ summary_content = f"Google Calendar Event: {event_summary}\n\n"
+ summary_content += f"Calendar: {calendar_id}\n"
+ summary_content += f"Start: {start_time}\n"
+ summary_content += f"End: {end_time}\n"
+
+ if location:
+ summary_content += f"Location: {location}\n"
+
+ if description:
+ # Take first 300 characters of description for summary
+ desc_preview = description[:300]
+ if len(description) > 300:
+ desc_preview += "..."
+ summary_content += f"Description: {desc_preview}\n"
+
+ # Generate content hash
+ content_hash = generate_content_hash(event_markdown, search_space_id)
+
+ # Check if document already exists
+ existing_doc_by_hash_result = await session.execute(
+ select(Document).where(Document.content_hash == content_hash)
+ )
+ existing_document_by_hash = (
+ existing_doc_by_hash_result.scalars().first()
+ )
+
+ if existing_document_by_hash:
+ logger.info(
+ f"Document with content hash {content_hash} already exists for event {event_summary}. Skipping processing."
+ )
+ documents_skipped += 1
+ continue
+
+ # Generate embedding for the summary
+ summary_embedding = config.embedding_model_instance.embed(
+ summary_content
+ )
+
+ # Process chunks - using the full event markdown
+ chunks = [
+ Chunk(
+ content=chunk.text,
+ embedding=config.embedding_model_instance.embed(chunk.text),
+ )
+ for chunk in config.chunker_instance.chunk(event_markdown)
+ ]
+
+ # Create and store new document
+ logger.info(f"Creating new document for event {event_summary}")
+ document = Document(
+ search_space_id=search_space_id,
+ title=f"Calendar Event - {event_summary}",
+ document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
+ document_metadata={
+ "event_id": event_id,
+ "event_summary": event_summary,
+ "calendar_id": calendar_id,
+ "start_time": start_time,
+ "end_time": end_time,
+ "location": location,
+ "indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ },
+ content=summary_content,
+ content_hash=content_hash,
+ embedding=summary_embedding,
+ chunks=chunks,
+ )
+
+ session.add(document)
+ documents_indexed += 1
+ logger.info(f"Successfully indexed new event {event_summary}")
+
+ except Exception as e:
+ logger.error(
+ f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
+ exc_info=True,
+ )
+ skipped_events.append(
+ f"{event.get('summary', 'Unknown')} (processing error)"
+ )
+ documents_skipped += 1
+ continue # Skip this event and continue with others
+
+ # Update the last_indexed_at timestamp for the connector only if requested
+ total_processed = documents_indexed
+ if update_last_indexed:
+ connector.last_indexed_at = datetime.now()
+ logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
+
+ # Commit all changes
+ await session.commit()
+ logger.info(
+ "Successfully committed all Google Calendar document changes to database"
+ )
+
+ # Log success
+ await task_logger.log_task_success(
+ log_entry,
+ f"Successfully completed Google Calendar indexing for connector {connector_id}",
+ {
+ "events_processed": total_processed,
+ "documents_indexed": documents_indexed,
+ "documents_skipped": documents_skipped,
+ "skipped_events_count": len(skipped_events),
+ },
+ )
+
+ logger.info(
+ f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
+ )
+ return (
+ total_processed,
+ None,
+ ) # Return None as the error message to indicate success
+
+ except SQLAlchemyError as db_error:
+ await session.rollback()
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Database error during Google Calendar indexing for connector {connector_id}",
+ str(db_error),
+ {"error_type": "SQLAlchemyError"},
+ )
+ logger.error(f"Database error: {db_error!s}", exc_info=True)
+ return 0, f"Database error: {db_error!s}"
+ except Exception as e:
+ await session.rollback()
+ await task_logger.log_task_failure(
+ log_entry,
+ f"Failed to index Google Calendar events for connector {connector_id}",
+ str(e),
+ {"error_type": type(e).__name__},
+ )
+ logger.error(f"Failed to index Google Calendar events: {e!s}", exc_info=True)
+ return 0, f"Failed to index Google Calendar events: {e!s}"
diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml
index 9ab56b7..3d1701e 100644
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@@ -14,6 +14,8 @@ dependencies = [
"fastapi-users[oauth,sqlalchemy]>=14.0.1",
"firecrawl-py>=1.12.0",
"github3.py==4.0.1",
+ "google-api-python-client>=2.156.0",
+ "google-auth-oauthlib>=1.2.1",
"langchain-community>=0.3.17",
"langchain-unstructured>=0.1.6",
"langgraph>=0.3.29",
diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock
index f3bc42c..3386961 100644
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@@ -1252,6 +1252,22 @@ grpc = [
{ name = "grpcio-status" },
]
+[[package]]
+name = "google-api-python-client"
+version = "2.177.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-api-core" },
+ { name = "google-auth" },
+ { name = "google-auth-httplib2" },
+ { name = "httplib2" },
+ { name = "uritemplate" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/75/a89cad519fa8910132e3b08571d0e682ae1163643da6f963f1930f3dc788/google_api_python_client-2.177.0.tar.gz", hash = "sha256:9ffd2b57d68f5afa7e6ac64e2c440534eaa056cbb394812a62ff94723c31b50e", size = 13184405, upload-time = "2025-07-23T16:22:46.321Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/47/f5/121248e18ca605a11720c81ae1b52a5a8cb690af9f01887c56de23cd9a5a/google_api_python_client-2.177.0-py3-none-any.whl", hash = "sha256:f2f50f11105ab883eb9b6cf38ec54ea5fd4b429249f76444bec90deba5be79b3", size = 13709470, upload-time = "2025-07-23T16:22:44.081Z" },
+]
+
[[package]]
name = "google-auth"
version = "2.40.3"
@@ -1266,6 +1282,32 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" },
]
+[[package]]
+name = "google-auth-httplib2"
+version = "0.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "httplib2" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" },
+]
+
+[[package]]
+name = "google-auth-oauthlib"
+version = "1.2.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "requests-oauthlib" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
+]
+
[[package]]
name = "google-cloud-vision"
version = "3.10.2"
@@ -1470,6 +1512,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
+[[package]]
+name = "httplib2"
+version = "0.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pyparsing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/ad/2371116b22d616c194aa25ec410c9c6c37f23599dcd590502b74db197584/httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81", size = 351116, upload-time = "2023-03-21T22:29:37.214Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" },
+]
+
[[package]]
name = "httptools"
version = "0.6.4"
@@ -4724,6 +4778,8 @@ dependencies = [
{ name = "fastapi-users", extra = ["oauth", "sqlalchemy"] },
{ name = "firecrawl-py" },
{ name = "github3-py" },
+ { name = "google-api-python-client" },
+ { name = "google-auth-oauthlib" },
{ name = "langchain-community" },
{ name = "langchain-unstructured" },
{ name = "langgraph" },
@@ -4763,6 +4819,8 @@ requires-dist = [
{ name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" },
{ name = "firecrawl-py", specifier = ">=1.12.0" },
{ name = "github3-py", specifier = "==4.0.1" },
+ { name = "google-api-python-client", specifier = ">=2.156.0" },
+ { name = "google-auth-oauthlib", specifier = ">=1.2.1" },
{ name = "langchain-community", specifier = ">=0.3.17" },
{ name = "langchain-unstructured", specifier = ">=0.1.6" },
{ name = "langgraph", specifier = ">=0.3.29" },
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx
new file mode 100644
index 0000000..fb04a27
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-calendar-connector/page.tsx
@@ -0,0 +1,190 @@
+"use client";
+
+import { zodResolver } from "@hookform/resolvers/zod";
+import { IconCalendar } from "@tabler/icons-react";
+import { motion } from "framer-motion";
+import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
+import Link from "next/link";
+import { useParams, useRouter, useSearchParams } from "next/navigation";
+import { useEffect, useState } from "react";
+import { useForm } from "react-hook-form";
+import { toast } from "sonner";
+import { z } from "zod";
+import { Button } from "@/components/ui/button";
+import {
+ Card,
+ CardContent,
+ CardDescription,
+ CardFooter,
+ CardHeader,
+ CardTitle,
+} from "@/components/ui/card";
+import {
+ type SearchSourceConnector,
+ useSearchSourceConnectors,
+} from "@/hooks/useSearchSourceConnectors";
+
+export default function GoogleCalendarConnectorPage() {
+ const router = useRouter();
+ const params = useParams();
+ const searchSpaceId = params.search_space_id as string;
+ const [isConnecting, setIsConnecting] = useState(false);
+ const [doesConnectorExist, setDoesConnectorExist] = useState(false);
+
+ const { fetchConnectors } = useSearchSourceConnectors();
+
+ useEffect(() => {
+ fetchConnectors().then((data) => {
+ const connector = data.find(
+ (c: SearchSourceConnector) => c.connector_type === "GOOGLE_CALENDAR_CONNECTOR"
+ );
+ if (connector) {
+ setDoesConnectorExist(true);
+ }
+ });
+ }, []);
+
+ // Handle Google OAuth connection
+ const handleConnectGoogle = async () => {
+ try {
+ setIsConnecting(true);
+ // Call backend to initiate authorization flow
+ const response = await fetch(
+ `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/calendar/connector/add/?space_id=${searchSpaceId}`,
+ {
+ method: "GET",
+ headers: {
+ Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
+ },
+ }
+ );
+
+ if (!response.ok) {
+ throw new Error("Failed to initiate Google OAuth");
+ }
+
+ const data = await response.json();
+
+ // Redirect to Google for authentication
+ window.location.href = data.auth_url;
+ } catch (error) {
+ console.error("Error connecting to Google:", error);
+ toast.error("Failed to connect to Google Calendar");
+ } finally {
+ setIsConnecting(false);
+ }
+ };
+
+ return (
+
+
+ {/* Header */}
+
+
+
+ Back to connectors
+
+
+
+
+
+
+
Connect Google Calendar
+
+ Connect your Google Calendar to search events.
+
+
+
+
+
+ {/* OAuth Connection Card */}
+ {!doesConnectorExist ? (
+
+
+ Connect Your Google Account
+
+ Connect your Google account to access your calendar events. We'll only request
+ read-only access to your calendars.
+
+
+
+
+
+ Read-only access to your calendar events
+
+
+
+ Access works even when you're offline
+
+
+
+ You can disconnect anytime
+
+
+
+
+
+
+
+ ) : (
+ /* Configuration Form Card */
+
+
+ ✅ Your Google calendar is successfully connected!
+
+
+ )}
+
+ {/* Help Section */}
+ {!doesConnectorExist && (
+
+
+ How It Works
+
+
+
+
1. Connect Your Account
+
+ Click "Connect Your Google Account" to start the secure OAuth process. You'll be
+ redirected to Google to sign in.
+
+
+
+
2. Grant Permissions
+
+ Google will ask for permission to read your calendar events. We only request
+ read-only access to keep your data safe.
+