update seach source connector schema

This commit is contained in:
CREDO23 2025-08-02 04:39:48 +02:00
parent 44d2338663
commit edf46e4de1
21 changed files with 1213 additions and 19 deletions

View file

@ -8,6 +8,7 @@ AUTH_TYPE=GOOGLE or LOCAL
# For Google Auth Only
GOOGLE_OAUTH_CLIENT_ID=924507538m
GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
# Embedding Model
EMBEDDING_MODEL=mixedbread-ai/mxbai-embed-large-v1

View file

@ -0,0 +1,65 @@
"""Add Google Calendar connector enums
Revision ID: 15
Revises: 14
Create Date: 2024-02-01 12:00:00.000000
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "15"
down_revision: str | None = "14"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Safely add 'GOOGLE_CALENDAR_CONNECTOR' to enum types if missing."""
# Add to searchsourceconnectortype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'GOOGLE_CALENDAR_CONNECTOR'
) THEN
ALTER TYPE searchsourceconnectortype ADD VALUE 'GOOGLE_CALENDAR_CONNECTOR';
END IF;
END
$$;
"""
)
# Add to documenttype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_CALENDAR_CONNECTOR'
) THEN
ALTER TYPE documenttype ADD VALUE 'GOOGLE_CALENDAR_CONNECTOR';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""Remove 'GOOGLE_CALENDAR_CONNECTOR' from enum types."""
# Note: PostgreSQL doesn't support removing enum values directly
# This would require recreating the enum type, which is complex
# For now, we'll leave the enum values in place
# In a production environment, you might want to implement a more sophisticated downgrade
pass

View file

@ -271,6 +271,49 @@ async def fetch_documents_by_ids(
else:
url = ""
elif doc_type == "GOOGLE_CALENDAR_CONNECTOR":
# Extract Google Calendar-specific metadata
event_id = metadata.get("event_id", "Unknown Event")
event_summary = metadata.get("event_summary", "Untitled Event")
calendar_id = metadata.get("calendar_id", "")
start_time = metadata.get("start_time", "")
location = metadata.get("location", "")
title = f"Calendar: {event_summary}"
if start_time:
# Format the start time for display
try:
if "T" in start_time:
from datetime import datetime
start_dt = datetime.fromisoformat(
start_time.replace("Z", "+00:00")
)
formatted_time = start_dt.strftime("%Y-%m-%d %H:%M")
title += f" ({formatted_time})"
else:
title += f" ({start_time})"
except Exception:
title += f" ({start_time})"
description = (
doc.content[:100] + "..."
if len(doc.content) > 100
else doc.content
)
if location:
description += f" | Location: {location}"
if calendar_id and calendar_id != "primary":
description += f" | Calendar: {calendar_id}"
# Construct Google Calendar URL
if event_id:
url = (
f"https://calendar.google.com/calendar/event?eid={event_id}"
)
else:
url = ""
elif doc_type == "EXTENSION":
# Extract Extension-specific metadata
webpage_title = metadata.get("VisitedWebPageTitle", doc.title)
@ -919,6 +962,32 @@ async def fetch_relevant_documents(
)
}
)
elif connector == "GOOGLE_CALENDAR_CONNECTOR":
(
source_object,
calendar_chunks,
) = await connector_service.search_google_calendar(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
search_mode=search_mode,
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(calendar_chunks)
# Stream found document count
if streaming_service and writer:
writer(
{
"yield_value": streaming_service.format_terminal_info_delta(
f"📅 Found {len(calendar_chunks)} calendar events related to your query"
)
}
)
elif connector == "CONFLUENCE_CONNECTOR":
(
source_object,

View file

@ -18,6 +18,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)

View file

@ -15,6 +15,9 @@ You are SurfSense, an advanced AI research assistant that synthesizes informatio
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)

View file

@ -63,6 +63,8 @@ def get_connector_friendly_name(connector_name: str) -> str:
"GITHUB_CONNECTOR": "GitHub",
"LINEAR_CONNECTOR": "Linear",
"JIRA_CONNECTOR": "Jira",
"CONFLUENCE_CONNECTOR": "Confluence",
"GOOGLE_CALENDAR_CONNECTOR": "Google Calendar",
"DISCORD_CONNECTOR": "Discord",
"TAVILY_API": "Tavily Search",
"LINKUP_API": "Linkup Search",

View file

@ -43,9 +43,10 @@ class Config:
# AUTH: Google OAuth
AUTH_TYPE = os.getenv("AUTH_TYPE")
if AUTH_TYPE == "GOOGLE":
GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
GOOGLE_CALENDAR_REDIRECT_URI = os.getenv("GOOGLE_CALENDAR_REDIRECT_URI")
# LLM instances are now managed per-user through the LLMConfig system
# Legacy environment variables removed in favor of user-specific configurations

View file

@ -1,6 +1,5 @@
"""
Google Calendar Connector Module | Google OAuth Credentials | Google Calendar API
A module for retrieving calendar events from Google Calendar using Google OAuth credentials.
Allows fetching events from specified calendars within date ranges using Google OAuth credentials.
"""
@ -8,6 +7,8 @@ Allows fetching events from specified calendars within date ranges using Google
from datetime import datetime
from typing import Any
import pytz
from dateutil.parser import isoparse
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
@ -22,7 +23,6 @@ class GoogleCalendarConnector:
):
"""
Initialize the GoogleCalendarConnector class.
Args:
credentials: Google OAuth Credentials object
"""
@ -32,10 +32,8 @@ class GoogleCalendarConnector:
def _get_credentials(self) -> Credentials:
"""
Get valid Google OAuth credentials.
Returns:
Google OAuth credentials
Raises:
ValueError: If credentials have not been set
Exception: If credential refresh fails
@ -78,10 +76,8 @@ class GoogleCalendarConnector:
def _get_service(self):
"""
Get the Google Calendar service instance using Google OAuth credentials.
Returns:
Google Calendar service instance
Raises:
ValueError: If credentials have not been set
Exception: If service creation fails
@ -99,7 +95,6 @@ class GoogleCalendarConnector:
def get_calendars(self) -> tuple[list[dict[str, Any]], str | None]:
"""
Fetch list of user's calendars using Google OAuth credentials.
Returns:
Tuple containing (calendars list, error message or None)
"""
@ -129,20 +124,44 @@ class GoogleCalendarConnector:
def get_all_primary_calendar_events(
self,
start_date: str,
end_date: str,
max_results: int = 2500,
) -> tuple[list[dict[str, Any]], str | None]:
"""
Fetch events from the primary calendar using Google OAuth credentials.
Args:
max_results: Maximum number of events to fetch (default: 2500)
Returns:
Tuple containing (events list, error message or None)
"""
try:
service = self._get_service()
# Parse both dates
dt_start = isoparse(start_date)
dt_end = isoparse(end_date)
if dt_start.tzinfo is None:
dt_start = dt_start.replace(tzinfo=pytz.UTC)
else:
dt_start = dt_start.astimezone(pytz.UTC)
if dt_end.tzinfo is None:
dt_end = dt_end.replace(tzinfo=pytz.UTC)
else:
dt_end = dt_end.astimezone(pytz.UTC)
if dt_start >= dt_end:
return [], (
f"start_date ({dt_start.isoformat()}) must be strictly before "
f"end_date ({dt_end.isoformat()})."
)
# RFC3339 with 'Z' for UTC
time_min = dt_start.isoformat().replace("+00:00", "Z")
time_max = dt_end.isoformat().replace("+00:00", "Z")
# Fetch events
events_result = (
service.events()
@ -151,6 +170,8 @@ class GoogleCalendarConnector:
maxResults=max_results,
singleEvents=True,
orderBy="startTime",
timeMin=time_min,
timeMax=time_max,
)
.execute()
)
@ -168,10 +189,8 @@ class GoogleCalendarConnector:
def format_event_to_markdown(self, event: dict[str, Any]) -> str:
"""
Format a Google Calendar event to markdown.
Args:
event: Event object from Google Calendar API
Returns:
Formatted markdown string
"""

View file

@ -257,7 +257,7 @@ class GoogleCalendarAccount(BaseModel):
)
access_token = Column(String, nullable=False)
refresh_token = Column(String, nullable=False)
refresh_token = Column(String, nullable=True)
user = relationship("User", back_populates="calendar_account")

View file

@ -2,7 +2,9 @@ from fastapi import APIRouter
from .chats_routes import router as chats_router
from .documents_routes import router as documents_router
from .google_calendar_add_connector_route import router as google_oauth_router
from .google_calendar_add_connector_route import (
router as google_calendar_add_connector_router,
)
from .llm_config_routes import router as llm_config_router
from .logs_routes import router as logs_router
from .podcasts_routes import router as podcasts_router
@ -16,6 +18,6 @@ router.include_router(documents_router)
router.include_router(podcasts_router)
router.include_router(chats_router)
router.include_router(search_source_connectors_router)
router.include_router(google_oauth_router)
router.include_router(google_calendar_add_connector_router)
router.include_router(llm_config_router)
router.include_router(logs_router)

View file

@ -1,4 +1,4 @@
from datetime import datetime
from datetime import UTC, datetime
from pydantic import BaseModel
@ -11,3 +11,8 @@ class GoogleAuthCredentialsBase(BaseModel):
expiry: datetime
scopes: list[str]
client_secret: str
@property
def is_expired(self) -> bool:
"""Check if the credentials have expired."""
return self.expiry <= datetime.now(UTC)

View file

@ -1073,6 +1073,141 @@ class ConnectorService:
return result_object, jira_chunks
async def search_google_calendar(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
search_mode: SearchMode = SearchMode.CHUNKS,
) -> tuple:
"""
Search for Google Calendar events and return both the source information and langchain documents
Args:
user_query: The user's query
user_id: The user's ID
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
search_mode: Search mode (CHUNKS or DOCUMENTS)
Returns:
tuple: (sources_info, langchain_documents)
"""
if search_mode == SearchMode.CHUNKS:
calendar_chunks = await self.chunk_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="GOOGLE_CALENDAR_CONNECTOR",
)
elif search_mode == SearchMode.DOCUMENTS:
calendar_chunks = await self.document_retriever.hybrid_search(
query_text=user_query,
top_k=top_k,
user_id=user_id,
search_space_id=search_space_id,
document_type="GOOGLE_CALENDAR_CONNECTOR",
)
# Transform document retriever results to match expected format
calendar_chunks = self._transform_document_results(calendar_chunks)
# Early return if no results
if not calendar_chunks:
return {
"id": 31,
"name": "Google Calendar Events",
"type": "GOOGLE_CALENDAR_CONNECTOR",
"sources": [],
}, []
# Process each chunk and create sources directly without deduplication
sources_list = []
async with self.counter_lock:
for _i, chunk in enumerate(calendar_chunks):
# Extract document metadata
document = chunk.get("document", {})
metadata = document.get("metadata", {})
# Extract Google Calendar-specific metadata
event_id = metadata.get("event_id", "")
event_summary = metadata.get("event_summary", "Untitled Event")
calendar_id = metadata.get("calendar_id", "")
start_time = metadata.get("start_time", "")
end_time = metadata.get("end_time", "")
location = metadata.get("location", "")
# Create a more descriptive title for calendar events
title = f"Calendar: {event_summary}"
if start_time:
# Format the start time for display
try:
if "T" in start_time:
from datetime import datetime
start_dt = datetime.fromisoformat(
start_time.replace("Z", "+00:00")
)
formatted_time = start_dt.strftime("%Y-%m-%d %H:%M")
title += f" ({formatted_time})"
else:
title += f" ({start_time})"
except Exception:
title += f" ({start_time})"
# Create a more descriptive description for calendar events
description = chunk.get("content", "")[:100]
if len(description) == 100:
description += "..."
# Add event info to description
info_parts = []
if location:
info_parts.append(f"Location: {location}")
if calendar_id and calendar_id != "primary":
info_parts.append(f"Calendar: {calendar_id}")
if end_time:
info_parts.append(f"End: {end_time}")
if info_parts:
if description:
description += f" | {' | '.join(info_parts)}"
else:
description = " | ".join(info_parts)
# For URL, we could construct a URL to the Google Calendar event
url = ""
if event_id and calendar_id:
# Google Calendar event URL format
url = f"https://calendar.google.com/calendar/event?eid={event_id}"
source = {
"id": document.get("id", self.source_id_counter),
"title": title,
"description": description,
"url": url,
"event_id": event_id,
"event_summary": event_summary,
"calendar_id": calendar_id,
"start_time": start_time,
"end_time": end_time,
"location": location,
}
self.source_id_counter += 1
sources_list.append(source)
# Create result object
result_object = {
"id": 31, # Assign a unique ID for the Google Calendar connector
"name": "Google Calendar Events",
"type": "GOOGLE_CALENDAR_CONNECTOR",
"sources": sources_list,
}
return result_object, calendar_chunks
async def search_confluence(
self,
user_query: str,

View file

@ -2,6 +2,7 @@ import asyncio
import logging
from datetime import UTC, datetime, timedelta
from google.oauth2.credentials import Credentials
from slack_sdk.errors import SlackApiError
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
@ -12,6 +13,7 @@ from app.connectors.clickup_connector import ClickUpConnector
from app.connectors.confluence_connector import ConfluenceConnector
from app.connectors.discord_connector import DiscordConnector
from app.connectors.github_connector import GitHubConnector
from app.connectors.google_calendar_connector import GoogleCalendarConnector
from app.connectors.jira_connector import JiraConnector
from app.connectors.linear_connector import LinearConnector
from app.connectors.notion_history import NotionHistoryConnector
@ -3012,3 +3014,362 @@ async def index_clickup_tasks(
)
logger.error(f"Failed to index ClickUp tasks: {e!s}", exc_info=True)
return 0, f"Failed to index ClickUp tasks: {e!s}"
async def index_google_calendar_events(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str | None = None,
end_date: str | None = None,
update_last_indexed: bool = True,
) -> tuple[int, str | None]:
"""
Index Google Calendar events.
Args:
session: Database session
connector_id: ID of the Google Calendar connector
search_space_id: ID of the search space to store documents in
user_id: User ID
start_date: Start date for indexing (YYYY-MM-DD format)
end_date: End date for indexing (YYYY-MM-DD format)
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
Returns:
Tuple containing (number of documents indexed, error message or None)
"""
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="google_calendar_events_indexing",
source="connector_indexing_task",
message=f"Starting Google Calendar events indexing for connector {connector_id}",
metadata={
"connector_id": connector_id,
"user_id": str(user_id),
"start_date": start_date,
"end_date": end_date,
},
)
try:
# Get the connector from the database
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == connector_id,
SearchSourceConnector.connector_type
== SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
)
)
connector = result.scalars().first()
if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found",
"Connector not found",
{"error_type": "ConnectorNotFound"},
)
return 0, f"Connector with ID {connector_id} not found"
# Get the Google Calendar credentials from the connector config
credentials = Credentials(
token=connector.config.get("token"),
refresh_token=connector.config.get("refresh_token"),
token_uri=connector.config.get("token_uri"),
client_id=connector.config.get("client_id"),
client_secret=connector.config.get("client_secret"),
scopes=connector.config.get("scopes"),
)
if (
not credentials.client_id
or not credentials.client_secret
or not credentials.refresh_token
):
await task_logger.log_task_failure(
log_entry,
f"Google Calendar credentials not found in connector config for connector {connector_id}",
"Missing Google Calendar credentials",
{"error_type": "MissingCredentials"},
)
return 0, "Google Calendar credentials not found in connector config"
# Initialize Google Calendar client
await task_logger.log_task_progress(
log_entry,
f"Initializing Google Calendar client for connector {connector_id}",
{"stage": "client_initialization"},
)
calendar_client = GoogleCalendarConnector(credentials=credentials)
# Calculate date range
if start_date is None or end_date is None:
# Fall back to calculating dates based on last_indexed_at
calculated_end_date = datetime.now()
# Use last_indexed_at as start date if available, otherwise use 30 days ago
if connector.last_indexed_at:
# Convert dates to be comparable (both timezone-naive)
last_indexed_naive = (
connector.last_indexed_at.replace(tzinfo=None)
if connector.last_indexed_at.tzinfo
else connector.last_indexed_at
)
# Check if last_indexed_at is in the future or after end_date
if last_indexed_naive > calculated_end_date:
logger.warning(
f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 30 days ago instead."
)
calculated_start_date = calculated_end_date - timedelta(days=30)
else:
calculated_start_date = last_indexed_naive
logger.info(
f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date"
)
else:
calculated_start_date = calculated_end_date - timedelta(
days=30
) # Use 30 days as default for calendar events
logger.info(
f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (30 days ago) as start date"
)
# Use calculated dates if not provided
start_date_str = (
start_date if start_date else calculated_start_date.strftime("%Y-%m-%d")
)
end_date_str = (
end_date if end_date else calculated_end_date.strftime("%Y-%m-%d")
)
else:
# Use provided dates
start_date_str = start_date
end_date_str = end_date
await task_logger.log_task_progress(
log_entry,
f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
{
"stage": "fetching_events",
"start_date": start_date_str,
"end_date": end_date_str,
},
)
# Get events within date range from primary calendar
try:
events, error = calendar_client.get_all_primary_calendar_events(
start_date=start_date_str, end_date=end_date_str
)
if error:
logger.error(f"Failed to get Google Calendar events: {error}")
# Don't treat "No events found" as an error that should stop indexing
if "No events found" in error:
logger.info(
"No events found is not a critical error, continuing with update"
)
if update_last_indexed:
connector.last_indexed_at = datetime.now()
await session.commit()
logger.info(
f"Updated last_indexed_at to {connector.last_indexed_at} despite no events found"
)
await task_logger.log_task_success(
log_entry,
f"No Google Calendar events found in date range {start_date_str} to {end_date_str}",
{"events_found": 0},
)
return 0, None
else:
await task_logger.log_task_failure(
log_entry,
f"Failed to get Google Calendar events: {error}",
"API Error",
{"error_type": "APIError"},
)
return 0, f"Failed to get Google Calendar events: {error}"
logger.info(f"Retrieved {len(events)} events from Google Calendar API")
except Exception as e:
logger.error(f"Error fetching Google Calendar events: {e!s}", exc_info=True)
return 0, f"Error fetching Google Calendar events: {e!s}"
# Process and index each event
documents_indexed = 0
skipped_events = []
documents_skipped = 0
for event in events:
try:
event_id = event.get("id")
event_summary = event.get("summary", "No Title")
calendar_id = event.get("calendarId", "")
if not event_id:
logger.warning(f"Skipping event with missing ID: {event_summary}")
skipped_events.append(f"{event_summary} (missing ID)")
documents_skipped += 1
continue
# Format event as markdown
event_markdown = calendar_client.format_event_to_markdown(event)
if not event_markdown.strip():
logger.warning(f"Skipping event with no content: {event_summary}")
skipped_events.append(f"{event_summary} (no content)")
documents_skipped += 1
continue
# Create a simple summary for the document
start = event.get("start", {})
end = event.get("end", {})
start_time = start.get("dateTime") or start.get("date", "")
end_time = end.get("dateTime") or end.get("date", "")
location = event.get("location", "")
description = event.get("description", "")
summary_content = f"Google Calendar Event: {event_summary}\n\n"
summary_content += f"Calendar: {calendar_id}\n"
summary_content += f"Start: {start_time}\n"
summary_content += f"End: {end_time}\n"
if location:
summary_content += f"Location: {location}\n"
if description:
# Take first 300 characters of description for summary
desc_preview = description[:300]
if len(description) > 300:
desc_preview += "..."
summary_content += f"Description: {desc_preview}\n"
# Generate content hash
content_hash = generate_content_hash(event_markdown, search_space_id)
# Check if document already exists
existing_doc_by_hash_result = await session.execute(
select(Document).where(Document.content_hash == content_hash)
)
existing_document_by_hash = (
existing_doc_by_hash_result.scalars().first()
)
if existing_document_by_hash:
logger.info(
f"Document with content hash {content_hash} already exists for event {event_summary}. Skipping processing."
)
documents_skipped += 1
continue
# Generate embedding for the summary
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks - using the full event markdown
chunks = [
Chunk(
content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text),
)
for chunk in config.chunker_instance.chunk(event_markdown)
]
# Create and store new document
logger.info(f"Creating new document for event {event_summary}")
document = Document(
search_space_id=search_space_id,
title=f"Calendar Event - {event_summary}",
document_type=DocumentType.GOOGLE_CALENDAR_CONNECTOR,
document_metadata={
"event_id": event_id,
"event_summary": event_summary,
"calendar_id": calendar_id,
"start_time": start_time,
"end_time": end_time,
"location": location,
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
},
content=summary_content,
content_hash=content_hash,
embedding=summary_embedding,
chunks=chunks,
)
session.add(document)
documents_indexed += 1
logger.info(f"Successfully indexed new event {event_summary}")
except Exception as e:
logger.error(
f"Error processing event {event.get('summary', 'Unknown')}: {e!s}",
exc_info=True,
)
skipped_events.append(
f"{event.get('summary', 'Unknown')} (processing error)"
)
documents_skipped += 1
continue # Skip this event and continue with others
# Update the last_indexed_at timestamp for the connector only if requested
total_processed = documents_indexed
if update_last_indexed:
connector.last_indexed_at = datetime.now()
logger.info(f"Updated last_indexed_at to {connector.last_indexed_at}")
# Commit all changes
await session.commit()
logger.info(
"Successfully committed all Google Calendar document changes to database"
)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Google Calendar indexing for connector {connector_id}",
{
"events_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_events_count": len(skipped_events),
},
)
logger.info(
f"Google Calendar indexing completed: {documents_indexed} new events, {documents_skipped} skipped"
)
return (
total_processed,
None,
) # Return None as the error message to indicate success
except SQLAlchemyError as db_error:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Google Calendar indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"},
)
logger.error(f"Database error: {db_error!s}", exc_info=True)
return 0, f"Database error: {db_error!s}"
except Exception as e:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Google Calendar events for connector {connector_id}",
str(e),
{"error_type": type(e).__name__},
)
logger.error(f"Failed to index Google Calendar events: {e!s}", exc_info=True)
return 0, f"Failed to index Google Calendar events: {e!s}"

View file

@ -14,6 +14,8 @@ dependencies = [
"fastapi-users[oauth,sqlalchemy]>=14.0.1",
"firecrawl-py>=1.12.0",
"github3.py==4.0.1",
"google-api-python-client>=2.156.0",
"google-auth-oauthlib>=1.2.1",
"langchain-community>=0.3.17",
"langchain-unstructured>=0.1.6",
"langgraph>=0.3.29",

View file

@ -1252,6 +1252,22 @@ grpc = [
{ name = "grpcio-status" },
]
[[package]]
name = "google-api-python-client"
version = "2.177.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-api-core" },
{ name = "google-auth" },
{ name = "google-auth-httplib2" },
{ name = "httplib2" },
{ name = "uritemplate" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7a/75/a89cad519fa8910132e3b08571d0e682ae1163643da6f963f1930f3dc788/google_api_python_client-2.177.0.tar.gz", hash = "sha256:9ffd2b57d68f5afa7e6ac64e2c440534eaa056cbb394812a62ff94723c31b50e", size = 13184405, upload-time = "2025-07-23T16:22:46.321Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/47/f5/121248e18ca605a11720c81ae1b52a5a8cb690af9f01887c56de23cd9a5a/google_api_python_client-2.177.0-py3-none-any.whl", hash = "sha256:f2f50f11105ab883eb9b6cf38ec54ea5fd4b429249f76444bec90deba5be79b3", size = 13709470, upload-time = "2025-07-23T16:22:44.081Z" },
]
[[package]]
name = "google-auth"
version = "2.40.3"
@ -1266,6 +1282,32 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" },
]
[[package]]
name = "google-auth-httplib2"
version = "0.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-auth" },
{ name = "httplib2" },
]
sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" },
]
[[package]]
name = "google-auth-oauthlib"
version = "1.2.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "google-auth" },
{ name = "requests-oauthlib" },
]
sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
]
[[package]]
name = "google-cloud-vision"
version = "3.10.2"
@ -1470,6 +1512,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
[[package]]
name = "httplib2"
version = "0.22.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyparsing" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3d/ad/2371116b22d616c194aa25ec410c9c6c37f23599dcd590502b74db197584/httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81", size = 351116, upload-time = "2023-03-21T22:29:37.214Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" },
]
[[package]]
name = "httptools"
version = "0.6.4"
@ -4724,6 +4778,8 @@ dependencies = [
{ name = "fastapi-users", extra = ["oauth", "sqlalchemy"] },
{ name = "firecrawl-py" },
{ name = "github3-py" },
{ name = "google-api-python-client" },
{ name = "google-auth-oauthlib" },
{ name = "langchain-community" },
{ name = "langchain-unstructured" },
{ name = "langgraph" },
@ -4763,6 +4819,8 @@ requires-dist = [
{ name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" },
{ name = "firecrawl-py", specifier = ">=1.12.0" },
{ name = "github3-py", specifier = "==4.0.1" },
{ name = "google-api-python-client", specifier = ">=2.156.0" },
{ name = "google-auth-oauthlib", specifier = ">=1.2.1" },
{ name = "langchain-community", specifier = ">=0.3.17" },
{ name = "langchain-unstructured", specifier = ">=0.1.6" },
{ name = "langgraph", specifier = ">=0.3.29" },

View file

@ -0,0 +1,452 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { IconCalendar } from "@tabler/icons-react";
import { motion } from "framer-motion";
import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
import Link from "next/link";
import { useParams, useRouter, useSearchParams } from "next/navigation";
import { useEffect, useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import { z } from "zod";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardFooter,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import { Checkbox } from "@/components/ui/checkbox";
import {
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors";
// Define the form schema with Zod
const googleCalendarConnectorFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
calendar_ids: z.array(z.string()).min(1, {
message: "At least one calendar must be selected.",
}),
});
// Define the type for the form values
type GoogleCalendarConnectorFormValues = z.infer<typeof googleCalendarConnectorFormSchema>;
// Interface for calendar data
interface Calendar {
id: string;
summary: string;
description?: string;
primary?: boolean;
access_role: string;
time_zone?: string;
}
// Interface for OAuth credentials
interface OAuthCredentials {
client_id: string;
client_secret: string;
refresh_token: string;
access_token: string;
}
export default function GoogleCalendarConnectorPage() {
const router = useRouter();
const params = useParams();
const searchParams = useSearchParams();
const searchSpaceId = params.search_space_id as string;
const isSuccess = searchParams.get("success") === "true";
const { createConnector } = useSearchSourceConnectors();
const [isSubmitting, setIsSubmitting] = useState(false);
const [isConnecting, setIsConnecting] = useState(false);
const [isConnected, setIsConnected] = useState(false);
const [calendars, setCalendars] = useState<Calendar[]>([]);
const [credentials, setCredentials] = useState<OAuthCredentials | null>(null);
// Initialize the form
const form = useForm<GoogleCalendarConnectorFormValues>({
resolver: zodResolver(googleCalendarConnectorFormSchema),
defaultValues: {
name: "",
calendar_ids: [],
},
});
useEffect(() => {
if (isSuccess) {
toast.success("Google Calendar connector created successfully!");
}
}, [isSuccess]);
// Check for OAuth callback parameters
useEffect(() => {
const success = searchParams.get("success");
const error = searchParams.get("error");
const message = searchParams.get("message");
const sessionKey = searchParams.get("session_key");
if (success === "true" && sessionKey) {
// Fetch OAuth data from backend
fetchOAuthData(sessionKey);
} else if (error) {
toast.error(message || "Failed to connect to Google Calendar");
}
}, [searchParams]);
// Fetch OAuth data from backend
const fetchOAuthData = async (sessionKey: string) => {
try {
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/session?session_key=${sessionKey}`,
{
method: "GET",
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
}
);
if (!response.ok) {
throw new Error("Failed to fetch OAuth data");
}
const data = await response.json();
setCredentials(data.credentials);
setCalendars(data.calendars);
setIsConnected(true);
toast.success("Successfully connected to Google Calendar!");
} catch (error) {
console.error("Error fetching OAuth data:", error);
toast.error("Failed to retrieve Google Calendar data");
}
};
// Handle Google OAuth connection
const handleConnectGoogle = async () => {
setIsConnecting(true);
try {
// Call backend to initiate OAuth flow
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/calendar/connector/add/?space_id=${searchSpaceId}`,
{
method: "GET",
headers: {
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
},
}
);
if (!response.ok) {
throw new Error("Failed to initiate Google OAuth");
}
const data = await response.json();
// Redirect to Google for authentication
window.location.href = data.auth_url;
} catch (error) {
console.error("Error connecting to Google:", error);
toast.error("Failed to connect to Google Calendar");
setIsConnecting(false);
}
};
// Handle form submission
const onSubmit = async (values: GoogleCalendarConnectorFormValues) => {
if (!isConnected || !credentials) {
toast.error("Please connect your Google account first");
return;
}
if (values.calendar_ids.length === 0) {
toast.error("Please select at least one calendar");
return;
}
setIsSubmitting(true);
try {
await createConnector({
name: values.name,
connector_type: "GOOGLE_CALENDAR_CONNECTOR",
config: {
GOOGLE_CALENDAR_CLIENT_ID: credentials.client_id,
GOOGLE_CALENDAR_CLIENT_SECRET: credentials.client_secret,
GOOGLE_CALENDAR_REFRESH_TOKEN: credentials.refresh_token,
GOOGLE_CALENDAR_CALENDAR_IDS: values.calendar_ids,
},
is_indexable: true,
last_indexed_at: null,
});
toast.success("Google Calendar connector created successfully!");
// Navigate back to connectors page
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) {
console.error("Error creating connector:", error);
toast.error(error instanceof Error ? error.message : "Failed to create connector");
} finally {
setIsSubmitting(false);
}
};
return (
<div className="container mx-auto py-8 max-w-2xl">
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
>
{/* Header */}
<div className="mb-8">
<Link
href={`/dashboard/${searchSpaceId}/connectors/add`}
className="inline-flex items-center text-sm text-muted-foreground hover:text-foreground mb-4"
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to connectors
</Link>
<div className="flex items-center gap-4">
<div className="flex h-12 w-12 items-center justify-center rounded-lg bg-blue-100 dark:bg-blue-900">
<IconCalendar className="h-6 w-6 text-blue-600 dark:text-blue-400" />
</div>
<div>
<h1 className="text-3xl font-bold tracking-tight">Connect Google Calendar</h1>
<p className="text-muted-foreground">
Connect your Google Calendar to search events, meetings and schedules.
</p>
</div>
</div>
</div>
{/* OAuth Connection Card */}
{!isConnected ? (
<Card>
<CardHeader>
<CardTitle>Connect Your Google Account</CardTitle>
<CardDescription>
Connect your Google account to access your calendar events. We'll only request
read-only access to your calendars.
</CardDescription>
</CardHeader>
<CardContent className="space-y-4">
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
<Check className="h-4 w-4 text-green-500" />
<span>Read-only access to your calendar events</span>
</div>
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
<Check className="h-4 w-4 text-green-500" />
<span>Access works even when you're offline</span>
</div>
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
<Check className="h-4 w-4 text-green-500" />
<span>You can disconnect anytime</span>
</div>
</CardContent>
<CardFooter className="flex justify-between">
<Button
type="button"
variant="outline"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
Cancel
</Button>
<Button onClick={handleConnectGoogle} disabled={isConnecting}>
{isConnecting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<ExternalLink className="mr-2 h-4 w-4" />
Connect Your Google Account
</>
)}
</Button>
</CardFooter>
</Card>
) : (
/* Configuration Form Card */
<Card>
<CardHeader>
<CardTitle>Configure Google Calendar Connector</CardTitle>
<CardDescription>
Your Google account is connected! Now select which calendars to include and give
your connector a name.
</CardDescription>
</CardHeader>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)}>
<CardContent className="space-y-6">
{/* Connector Name */}
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My Google Calendar" {...field} />
</FormControl>
<FormDescription>
A friendly name to identify this connector.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
{/* Calendar Selection */}
<FormField
control={form.control}
name="calendar_ids"
render={() => (
<FormItem>
<div className="mb-4">
<FormLabel className="text-base">Select Calendars</FormLabel>
<FormDescription>
Choose which calendars you want to include in your search results.
</FormDescription>
</div>
{calendars.map((calendar) => (
<FormField
key={calendar.id}
control={form.control}
name="calendar_ids"
render={({ field }) => {
return (
<FormItem
key={calendar.id}
className="flex flex-row items-start space-x-3 space-y-0"
>
<FormControl>
<Checkbox
checked={field.value?.includes(calendar.id)}
onCheckedChange={(checked) => {
return checked
? field.onChange([...field.value, calendar.id])
: field.onChange(
field.value?.filter((value) => value !== calendar.id)
);
}}
/>
</FormControl>
<div className="space-y-1 leading-none">
<FormLabel className="font-normal">
{calendar.summary}
{calendar.primary && (
<span className="ml-2 text-xs bg-blue-100 text-blue-800 px-2 py-1 rounded">
Primary
</span>
)}
</FormLabel>
{calendar.description && (
<FormDescription className="text-xs">
{calendar.description}
</FormDescription>
)}
</div>
</FormItem>
);
}}
/>
))}
<FormMessage />
</FormItem>
)}
/>
</CardContent>
<CardFooter className="flex justify-between">
<Button
type="button"
variant="outline"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
Cancel
</Button>
<Button
type="submit"
disabled={isSubmitting || form.watch("calendar_ids").length === 0}
>
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Creating...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Create Connector
</>
)}
</Button>
</CardFooter>
</form>
</Form>
</Card>
)}
{/* Help Section */}
<Card className="mt-6">
<CardHeader>
<CardTitle className="text-lg">How It Works</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div>
<h4 className="font-medium mb-2">1. Connect Your Account</h4>
<p className="text-sm text-muted-foreground">
Click "Connect Your Google Account" to start the secure OAuth process. You'll be
redirected to Google to sign in.
</p>
</div>
<div>
<h4 className="font-medium mb-2">2. Grant Permissions</h4>
<p className="text-sm text-muted-foreground">
Google will ask for permission to read your calendar events. We only request
read-only access to keep your data safe.
</p>
</div>
<div>
<h4 className="font-medium mb-2">3. Select Calendars</h4>
<p className="text-sm text-muted-foreground">
Choose which calendars you want to include in your search results. You can select
multiple calendars.
</p>
</div>
<div>
<h4 className="font-medium mb-2">4. Start Searching</h4>
<p className="text-sm text-muted-foreground">
Once connected, your calendar events will be indexed and searchable alongside your
other content.
</p>
</div>
{isConnected && (
<div className="mt-4 p-3 bg-green-50 border border-green-200 rounded-md">
<p className="text-sm text-green-800">
Your Google account is successfully connected! You can now configure your
connector above.
</p>
</div>
)}
</CardContent>
</Card>
</motion.div>
</div>
);
}

View file

@ -9,6 +9,7 @@ import {
IconBrandWindows,
IconBrandZoom,
IconChecklist,
IconCalendar,
IconChevronDown,
IconChevronRight,
IconLayoutKanban,
@ -148,6 +149,13 @@ const connectorCategories: ConnectorCategory[] = [
id: "communication",
title: "Communication",
connectors: [
{
id: "google-calendar-connector",
title: "Google Calendar",
description: "Connect to Google Calendar to search events, meetings and schedules.",
icon: <IconCalendar className="h-6 w-6" />,
status: "available",
},
{
id: "gmail",
title: "Gmail",
@ -212,6 +220,7 @@ export default function ConnectorsPage() {
"knowledge-bases",
"project-management",
"team-chats",
"communication",
]);
const toggleCategory = (categoryId: string) => {

View file

@ -8,6 +8,7 @@ import {
IconBrandSlack,
IconBrandYoutube,
IconChecklist,
IconCalendar,
IconLayoutKanban,
IconTicket,
} from "@tabler/icons-react";
@ -148,6 +149,7 @@ const documentTypeIcons = {
DISCORD_CONNECTOR: IconBrandDiscord,
CONFLUENCE_CONNECTOR: IconBook,
CLICKUP_CONNECTOR: IconChecklist,
GOOGLE_CALENDAR_CONNECTOR: IconCalendar,
} as const;
const columns: ColumnDef<Document>[] = [

View file

@ -196,7 +196,7 @@ const ConnectorSelector = React.memo(
type="button"
>
{getConnectorIcon(connector.type)}
<span className="flex-1 text-sm font-medium">{connector.name}</span>
<span className="flex-1 text-sm truncate font-medium">{connector.name}</span>
</Button>
);
})

View file

@ -4,6 +4,7 @@ import {
IconBrandNotion,
IconBrandSlack,
IconBrandYoutube,
IconCalendar,
IconLayoutKanban,
IconLinkPlus,
IconTicket,
@ -56,6 +57,8 @@ export const getConnectorIcon = (connectorType: string) => {
return <IconBrandDiscord {...iconProps} />;
case "JIRA_CONNECTOR":
return <IconTicket {...iconProps} />;
case "GOOGLE_CALENDAR_CONNECTOR":
return <IconCalendar {...iconProps} />;
case "DEEP":
return <Sparkles {...iconProps} />;
case "DEEPER":

View file

@ -39,5 +39,9 @@ export const editConnectorSchema = z.object({
JIRA_BASE_URL: z.string().optional(),
JIRA_EMAIL: z.string().optional(),
JIRA_API_TOKEN: z.string().optional(),
GOOGLE_CALENDAR_CLIENT_ID: z.string().optional(),
GOOGLE_CALENDAR_CLIENT_SECRET: z.string().optional(),
GOOGLE_CALENDAR_REFRESH_TOKEN: z.string().optional(),
GOOGLE_CALENDAR_CALENDAR_IDS: z.string().optional(),
});
export type EditConnectorFormValues = z.infer<typeof editConnectorSchema>;