diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 95c9e08..ab69639 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -270,6 +270,8 @@ async def delete_search_source_connector( async def index_connector_content( connector_id: int, search_space_id: int = Query(..., description="ID of the search space to store indexed content"), + start_date: str = Query(None, description="Start date for indexing (YYYY-MM-DD format). If not provided, uses last_indexed_at or defaults to 365 days ago"), + end_date: str = Query(None, description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date"), session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), background_tasks: BackgroundTasks = None @@ -301,105 +303,59 @@ async def index_connector_content( # Handle different connector types response_message = "" - indexing_from = None - indexing_to = None today_str = datetime.now().strftime("%Y-%m-%d") - - if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR: - # Determine the time range that will be indexed - if not connector.last_indexed_at: - start_date = "365 days ago" # Or perhaps set a specific date if needed - else: - # Check if last_indexed_at is today + + # Determine the actual date range to use + if start_date is None: + # Use last_indexed_at or default to 365 days ago + if connector.last_indexed_at: today = datetime.now().date() if connector.last_indexed_at.date() == today: # If last indexed today, go back 1 day to ensure we don't miss anything - start_date = (today - timedelta(days=1)).strftime("%Y-%m-%d") + indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d") else: - start_date = connector.last_indexed_at.strftime("%Y-%m-%d") - + indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d") + else: + indexing_from = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") + else: indexing_from = start_date - indexing_to = today_str + if end_date is None: + indexing_to = today_str + else: + indexing_to = end_date + + if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR: # Run indexing in background - logger.info(f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id}") - background_tasks.add_task(run_slack_indexing_with_new_session, connector_id, search_space_id) + logger.info(f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") + background_tasks.add_task(run_slack_indexing_with_new_session, connector_id, search_space_id, indexing_from, indexing_to) response_message = "Slack indexing started in the background." elif connector.connector_type == SearchSourceConnectorType.NOTION_CONNECTOR: - # Determine the time range that will be indexed - if not connector.last_indexed_at: - start_date = "365 days ago" # Or perhaps set a specific date - else: - # Check if last_indexed_at is today - today = datetime.now().date() - if connector.last_indexed_at.date() == today: - # If last indexed today, go back 1 day to ensure we don't miss anything - start_date = (today - timedelta(days=1)).strftime("%Y-%m-%d") - else: - start_date = connector.last_indexed_at.strftime("%Y-%m-%d") - - indexing_from = start_date - indexing_to = today_str - # Run indexing in background - logger.info(f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id}") - background_tasks.add_task(run_notion_indexing_with_new_session, connector_id, search_space_id) + logger.info(f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") + background_tasks.add_task(run_notion_indexing_with_new_session, connector_id, search_space_id, indexing_from, indexing_to) response_message = "Notion indexing started in the background." elif connector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR: - # GitHub connector likely indexes everything relevant, or uses internal logic - # Setting indexing_from to None and indexing_to to today - indexing_from = None - indexing_to = today_str - # Run indexing in background - logger.info(f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id}") - background_tasks.add_task(run_github_indexing_with_new_session, connector_id, search_space_id) + logger.info(f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") + background_tasks.add_task(run_github_indexing_with_new_session, connector_id, search_space_id, indexing_from, indexing_to) response_message = "GitHub indexing started in the background." elif connector.connector_type == SearchSourceConnectorType.LINEAR_CONNECTOR: - # Determine the time range that will be indexed - if not connector.last_indexed_at: - start_date = "365 days ago" - else: - # Check if last_indexed_at is today - today = datetime.now().date() - if connector.last_indexed_at.date() == today: - # If last indexed today, go back 1 day to ensure we don't miss anything - start_date = (today - timedelta(days=1)).strftime("%Y-%m-%d") - else: - start_date = connector.last_indexed_at.strftime("%Y-%m-%d") - - indexing_from = start_date - indexing_to = today_str - # Run indexing in background - logger.info(f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id}") - background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id) + logger.info(f"Triggering Linear indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}") + background_tasks.add_task(run_linear_indexing_with_new_session, connector_id, search_space_id, indexing_from, indexing_to) response_message = "Linear indexing started in the background." elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR: - # Determine the time range that will be indexed - if not connector.last_indexed_at: - start_date = "365 days ago" - else: - today = datetime.now().date() - if connector.last_indexed_at.date() == today: - # If last indexed today, go back 1 day to ensure we don't miss anything - start_date = (today - timedelta(days=1)).strftime("%Y-%m-%d") - else: - start_date = connector.last_indexed_at.strftime("%Y-%m-%d") - - indexing_from = start_date - indexing_to = today_str - # Run indexing in background logger.info( - f"Triggering Discord indexing for connector {connector_id} into search space {search_space_id}" + f"Triggering Discord indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" ) background_tasks.add_task( - run_discord_indexing_with_new_session, connector_id, search_space_id + run_discord_indexing_with_new_session, connector_id, search_space_id, indexing_from, indexing_to ) response_message = "Discord indexing started in the background." @@ -453,19 +409,23 @@ async def update_connector_last_indexed( async def run_slack_indexing_with_new_session( connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Create a new session and run the Slack indexing task. This prevents session leaks by creating a dedicated session for the background task. """ async with async_session_maker() as session: - await run_slack_indexing(session, connector_id, search_space_id) + await run_slack_indexing(session, connector_id, search_space_id, start_date, end_date) async def run_slack_indexing( session: AsyncSession, connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Background task to run Slack indexing. @@ -474,6 +434,8 @@ async def run_slack_indexing( session: Database session connector_id: ID of the Slack connector search_space_id: ID of the search space + start_date: Start date for indexing + end_date: End date for indexing """ try: # Index Slack messages without updating last_indexed_at (we'll do it separately) @@ -481,6 +443,8 @@ async def run_slack_indexing( session=session, connector_id=connector_id, search_space_id=search_space_id, + start_date=start_date, + end_date=end_date, update_last_indexed=False # Don't update timestamp in the indexing function ) @@ -495,19 +459,23 @@ async def run_slack_indexing( async def run_notion_indexing_with_new_session( connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Create a new session and run the Notion indexing task. This prevents session leaks by creating a dedicated session for the background task. """ async with async_session_maker() as session: - await run_notion_indexing(session, connector_id, search_space_id) + await run_notion_indexing(session, connector_id, search_space_id, start_date, end_date) async def run_notion_indexing( session: AsyncSession, connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Background task to run Notion indexing. @@ -516,6 +484,8 @@ async def run_notion_indexing( session: Database session connector_id: ID of the Notion connector search_space_id: ID of the search space + start_date: Start date for indexing + end_date: End date for indexing """ try: # Index Notion pages without updating last_indexed_at (we'll do it separately) @@ -523,6 +493,8 @@ async def run_notion_indexing( session=session, connector_id=connector_id, search_space_id=search_space_id, + start_date=start_date, + end_date=end_date, update_last_indexed=False # Don't update timestamp in the indexing function ) @@ -538,23 +510,27 @@ async def run_notion_indexing( # Add new helper functions for GitHub indexing async def run_github_indexing_with_new_session( connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """Wrapper to run GitHub indexing with its own database session.""" - logger.info(f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id}") + logger.info(f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id} from {start_date} to {end_date}") async with async_session_maker() as session: - await run_github_indexing(session, connector_id, search_space_id) + await run_github_indexing(session, connector_id, search_space_id, start_date, end_date) logger.info(f"Background task finished: Indexing GitHub connector {connector_id}") async def run_github_indexing( session: AsyncSession, connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """Runs the GitHub indexing task and updates the timestamp.""" try: indexed_count, error_message = await index_github_repos( - session, connector_id, search_space_id, update_last_indexed=False + session, connector_id, search_space_id, start_date, end_date, update_last_indexed=False ) if error_message: logger.error(f"GitHub indexing failed for connector {connector_id}: {error_message}") @@ -572,23 +548,27 @@ async def run_github_indexing( # Add new helper functions for Linear indexing async def run_linear_indexing_with_new_session( connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """Wrapper to run Linear indexing with its own database session.""" - logger.info(f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id}") + logger.info(f"Background task started: Indexing Linear connector {connector_id} into space {search_space_id} from {start_date} to {end_date}") async with async_session_maker() as session: - await run_linear_indexing(session, connector_id, search_space_id) + await run_linear_indexing(session, connector_id, search_space_id, start_date, end_date) logger.info(f"Background task finished: Indexing Linear connector {connector_id}") async def run_linear_indexing( session: AsyncSession, connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """Runs the Linear indexing task and updates the timestamp.""" try: indexed_count, error_message = await index_linear_issues( - session, connector_id, search_space_id, update_last_indexed=False + session, connector_id, search_space_id, start_date, end_date, update_last_indexed=False ) if error_message: logger.error(f"Linear indexing failed for connector {connector_id}: {error_message}") @@ -606,19 +586,23 @@ async def run_linear_indexing( # Add new helper functions for discord indexing async def run_discord_indexing_with_new_session( connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Create a new session and run the Discord indexing task. This prevents session leaks by creating a dedicated session for the background task. """ async with async_session_maker() as session: - await run_discord_indexing(session, connector_id, search_space_id) + await run_discord_indexing(session, connector_id, search_space_id, start_date, end_date) async def run_discord_indexing( session: AsyncSession, connector_id: int, - search_space_id: int + search_space_id: int, + start_date: str, + end_date: str ): """ Background task to run Discord indexing. @@ -626,6 +610,8 @@ async def run_discord_indexing( session: Database session connector_id: ID of the Discord connector search_space_id: ID of the search space + start_date: Start date for indexing + end_date: End date for indexing """ try: # Index Discord messages without updating last_indexed_at (we'll do it separately) @@ -633,6 +619,8 @@ async def run_discord_indexing( session=session, connector_id=connector_id, search_space_id=search_space_id, + start_date=start_date, + end_date=end_date, update_last_indexed=False # Don't update timestamp in the indexing function ) diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py index 8317ac0..2572c7b 100644 --- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py +++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py @@ -24,6 +24,8 @@ async def index_slack_messages( session: AsyncSession, connector_id: int, search_space_id: int, + start_date: str = None, + end_date: str = None, update_last_indexed: bool = True ) -> Tuple[int, Optional[str]]: """ @@ -61,27 +63,35 @@ async def index_slack_messages( slack_client = SlackHistory(token=slack_token) # Calculate date range - end_date = datetime.now() - - # Use last_indexed_at as start date if available, otherwise use 365 days ago - if connector.last_indexed_at: - # Convert dates to be comparable (both timezone-naive) - last_indexed_naive = connector.last_indexed_at.replace(tzinfo=None) if connector.last_indexed_at.tzinfo else connector.last_indexed_at + if start_date is None or end_date is None: + # Fall back to calculating dates based on last_indexed_at + calculated_end_date = datetime.now() - # Check if last_indexed_at is in the future or after end_date - if last_indexed_naive > end_date: - logger.warning(f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead.") - start_date = end_date - timedelta(days=365) + # Use last_indexed_at as start date if available, otherwise use 365 days ago + if connector.last_indexed_at: + # Convert dates to be comparable (both timezone-naive) + last_indexed_naive = connector.last_indexed_at.replace(tzinfo=None) if connector.last_indexed_at.tzinfo else connector.last_indexed_at + + # Check if last_indexed_at is in the future or after end_date + if last_indexed_naive > calculated_end_date: + logger.warning(f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead.") + calculated_start_date = calculated_end_date - timedelta(days=365) + else: + calculated_start_date = last_indexed_naive + logger.info(f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date") else: - start_date = last_indexed_naive - logger.info(f"Using last_indexed_at ({start_date.strftime('%Y-%m-%d')}) as start date") + calculated_start_date = calculated_end_date - timedelta(days=365) # Use 365 days as default + logger.info(f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date") + + # Use calculated dates if not provided + start_date_str = start_date if start_date else calculated_start_date.strftime("%Y-%m-%d") + end_date_str = end_date if end_date else calculated_end_date.strftime("%Y-%m-%d") else: - start_date = end_date - timedelta(days=365) # Use 365 days as default - logger.info(f"No last_indexed_at found, using {start_date.strftime('%Y-%m-%d')} (30 days ago) as start date") - - # Format dates for Slack API - start_date_str = start_date.strftime("%Y-%m-%d") - end_date_str = end_date.strftime("%Y-%m-%d") + # Use provided dates + start_date_str = start_date + end_date_str = end_date + + logger.info(f"Indexing Slack messages from {start_date_str} to {end_date_str}") # Get all channels try: @@ -279,6 +289,8 @@ async def index_notion_pages( session: AsyncSession, connector_id: int, search_space_id: int, + start_date: str = None, + end_date: str = None, update_last_indexed: bool = True ) -> Tuple[int, Optional[str]]: """ @@ -317,20 +329,33 @@ async def index_notion_pages( notion_client = NotionHistoryConnector(token=notion_token) # Calculate date range - end_date = datetime.now() + if start_date is None or end_date is None: + # Fall back to calculating dates + calculated_end_date = datetime.now() + calculated_start_date = calculated_end_date - timedelta(days=365) # Check for last 1 year of pages + + # Use calculated dates if not provided + if start_date is None: + start_date_iso = calculated_start_date.strftime("%Y-%m-%dT%H:%M:%SZ") + else: + # Convert YYYY-MM-DD to ISO format + start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime("%Y-%m-%dT%H:%M:%SZ") + + if end_date is None: + end_date_iso = calculated_end_date.strftime("%Y-%m-%dT%H:%M:%SZ") + else: + # Convert YYYY-MM-DD to ISO format + end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime("%Y-%m-%dT%H:%M:%SZ") + else: + # Convert provided dates to ISO format for Notion API + start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").strftime("%Y-%m-%dT%H:%M:%SZ") + end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").strftime("%Y-%m-%dT%H:%M:%SZ") - # Check for last 1 year of pages - start_date = end_date - timedelta(days=365) - - # Format dates for Notion API (ISO format) - start_date_str = start_date.strftime("%Y-%m-%dT%H:%M:%SZ") - end_date_str = end_date.strftime("%Y-%m-%dT%H:%M:%SZ") - - logger.info(f"Fetching Notion pages from {start_date_str} to {end_date_str}") + logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}") # Get all pages try: - pages = notion_client.get_all_pages(start_date=start_date_str, end_date=end_date_str) + pages = notion_client.get_all_pages(start_date=start_date_iso, end_date=end_date_iso) logger.info(f"Found {len(pages)} Notion pages") except Exception as e: logger.error(f"Error fetching Notion pages: {str(e)}", exc_info=True) @@ -524,6 +549,8 @@ async def index_github_repos( session: AsyncSession, connector_id: int, search_space_id: int, + start_date: str = None, + end_date: str = None, update_last_indexed: bool = True ) -> Tuple[int, Optional[str]]: """ @@ -575,6 +602,8 @@ async def index_github_repos( # For simplicity, we'll proceed with the list provided. # If a repo is inaccessible, get_repository_files will likely fail gracefully later. logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.") + if start_date and end_date: + logger.info(f"Date range requested: {start_date} to {end_date} (Note: GitHub indexing processes all files regardless of dates)") # 6. Iterate through selected repositories and index files for repo_full_name in repo_full_names_to_index: @@ -688,6 +717,8 @@ async def index_linear_issues( session: AsyncSession, connector_id: int, search_space_id: int, + start_date: str = None, + end_date: str = None, update_last_indexed: bool = True ) -> Tuple[int, Optional[str]]: """ @@ -725,27 +756,33 @@ async def index_linear_issues( linear_client = LinearConnector(token=linear_token) # Calculate date range - end_date = datetime.now() - - # Use last_indexed_at as start date if available, otherwise use 365 days ago - if connector.last_indexed_at: - # Convert dates to be comparable (both timezone-naive) - last_indexed_naive = connector.last_indexed_at.replace(tzinfo=None) if connector.last_indexed_at.tzinfo else connector.last_indexed_at + if start_date is None or end_date is None: + # Fall back to calculating dates based on last_indexed_at + calculated_end_date = datetime.now() - # Check if last_indexed_at is in the future or after end_date - if last_indexed_naive > end_date: - logger.warning(f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead.") - start_date = end_date - timedelta(days=365) + # Use last_indexed_at as start date if available, otherwise use 365 days ago + if connector.last_indexed_at: + # Convert dates to be comparable (both timezone-naive) + last_indexed_naive = connector.last_indexed_at.replace(tzinfo=None) if connector.last_indexed_at.tzinfo else connector.last_indexed_at + + # Check if last_indexed_at is in the future or after end_date + if last_indexed_naive > calculated_end_date: + logger.warning(f"Last indexed date ({last_indexed_naive.strftime('%Y-%m-%d')}) is in the future. Using 365 days ago instead.") + calculated_start_date = calculated_end_date - timedelta(days=365) + else: + calculated_start_date = last_indexed_naive + logger.info(f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date") else: - start_date = last_indexed_naive - logger.info(f"Using last_indexed_at ({start_date.strftime('%Y-%m-%d')}) as start date") + calculated_start_date = calculated_end_date - timedelta(days=365) # Use 365 days as default + logger.info(f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date") + + # Use calculated dates if not provided + start_date_str = start_date if start_date else calculated_start_date.strftime("%Y-%m-%d") + end_date_str = end_date if end_date else calculated_end_date.strftime("%Y-%m-%d") else: - start_date = end_date - timedelta(days=365) # Use 365 days as default - logger.info(f"No last_indexed_at found, using {start_date.strftime('%Y-%m-%d')} (365 days ago) as start date") - - # Format dates for Linear API - start_date_str = start_date.strftime("%Y-%m-%d") - end_date_str = end_date.strftime("%Y-%m-%d") + # Use provided dates + start_date_str = start_date + end_date_str = end_date logger.info(f"Fetching Linear issues from {start_date_str} to {end_date_str}") @@ -918,6 +955,8 @@ async def index_discord_messages( session: AsyncSession, connector_id: int, search_space_id: int, + start_date: str = None, + end_date: str = None, update_last_indexed: bool = True ) -> Tuple[int, Optional[str]]: """ @@ -957,19 +996,36 @@ async def index_discord_messages( discord_client = DiscordConnector(token=discord_token) # Calculate date range - end_date = datetime.now(timezone.utc) + if start_date is None or end_date is None: + # Fall back to calculating dates based on last_indexed_at + calculated_end_date = datetime.now(timezone.utc) - # Use last_indexed_at as start date if available, otherwise use 365 days ago - if connector.last_indexed_at: - start_date = connector.last_indexed_at.replace(tzinfo=timezone.utc) - logger.info(f"Using last_indexed_at ({start_date.strftime('%Y-%m-%d')}) as start date") + # Use last_indexed_at as start date if available, otherwise use 365 days ago + if connector.last_indexed_at: + calculated_start_date = connector.last_indexed_at.replace(tzinfo=timezone.utc) + logger.info(f"Using last_indexed_at ({calculated_start_date.strftime('%Y-%m-%d')}) as start date") + else: + calculated_start_date = calculated_end_date - timedelta(days=365) + logger.info(f"No last_indexed_at found, using {calculated_start_date.strftime('%Y-%m-%d')} (365 days ago) as start date") + + # Use calculated dates if not provided, convert to ISO format for Discord API + if start_date is None: + start_date_iso = calculated_start_date.isoformat() + else: + # Convert YYYY-MM-DD to ISO format + start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc).isoformat() + + if end_date is None: + end_date_iso = calculated_end_date.isoformat() + else: + # Convert YYYY-MM-DD to ISO format + end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc).isoformat() else: - start_date = end_date - timedelta(days=365) - logger.info(f"No last_indexed_at found, using {start_date.strftime('%Y-%m-%d')} (365 days ago) as start date") - - # Format dates for Discord API - start_date_str = start_date.isoformat() - end_date_str = end_date.isoformat() + # Convert provided dates to ISO format for Discord API + start_date_iso = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc).isoformat() + end_date_iso = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc).isoformat() + + logger.info(f"Indexing Discord messages from {start_date_iso} to {end_date_iso}") documents_indexed = 0 documents_skipped = 0 @@ -1012,8 +1068,8 @@ async def index_discord_messages( try: messages = await discord_client.get_channel_history( channel_id=channel_id, - start_date=start_date_str, - end_date=end_date_str, + start_date=start_date_iso, + end_date=end_date_iso, ) except Exception as e: logger.error(f"Failed to get messages for channel {channel_name}: {str(e)}") @@ -1122,8 +1178,8 @@ async def index_discord_messages( "channel_name": channel_name, "channel_id": channel_id, "message_count": len(formatted_messages), - "start_date": start_date_str, - "end_date": end_date_str, + "start_date": start_date_iso, + "end_date": end_date_iso, "indexed_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") }, content=summary_content, diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx index e238e9d..d192fe5 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx @@ -9,6 +9,7 @@ import { Plus, Trash2, RefreshCw, + Calendar as CalendarIcon, } from "lucide-react"; import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors"; @@ -45,7 +46,21 @@ import { TooltipProvider, TooltipTrigger, } from "@/components/ui/tooltip"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Calendar } from "@/components/ui/calendar"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Label } from "@/components/ui/label"; import { getConnectorIcon } from "@/components/chat"; +import { cn } from "@/lib/utils"; +import { format } from "date-fns"; // Helper function to get connector type display name const getConnectorTypeDisplay = (type: string): string => { @@ -89,6 +104,10 @@ export default function ConnectorsPage() { const [indexingConnectorId, setIndexingConnectorId] = useState( null, ); + const [datePickerOpen, setDatePickerOpen] = useState(false); + const [selectedConnectorForIndexing, setSelectedConnectorForIndexing] = useState(null); + const [startDate, setStartDate] = useState(undefined); + const [endDate, setEndDate] = useState(undefined); useEffect(() => { if (error) { @@ -112,8 +131,42 @@ export default function ConnectorsPage() { } }; - // Handle connector indexing - const handleIndexConnector = async (connectorId: number) => { + // Handle opening date picker for indexing + const handleOpenDatePicker = (connectorId: number) => { + setSelectedConnectorForIndexing(connectorId); + setDatePickerOpen(true); + }; + + // Handle connector indexing with dates + const handleIndexConnector = async () => { + if (selectedConnectorForIndexing === null) return; + + setIndexingConnectorId(selectedConnectorForIndexing); + setDatePickerOpen(false); + + try { + const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined; + const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined; + + await indexConnector(selectedConnectorForIndexing, searchSpaceId, startDateStr, endDateStr); + toast.success("Connector content indexed successfully"); + } catch (error) { + console.error("Error indexing connector content:", error); + toast.error( + error instanceof Error + ? error.message + : "Failed to index connector content", + ); + } finally { + setIndexingConnectorId(null); + setSelectedConnectorForIndexing(null); + setStartDate(undefined); + setEndDate(undefined); + } + }; + + // Handle indexing without date picker (for quick indexing) + const handleQuickIndexConnector = async (connectorId: number) => { setIndexingConnectorId(connectorId); try { await indexConnector(connectorId, searchSpaceId); @@ -213,34 +266,64 @@ export default function ConnectorsPage() {
{connector.is_indexable && ( - - - - - - -

Index Content

-
-
-
+
+ + + + + + +

Index with Date Range

+
+
+
+ + + + + + +

Quick Index (Auto Date Range)

+
+
+
+
)} + + + + date > new Date() || (endDate ? date > endDate : false) + } + initialFocus + /> + + +
+
+ + + + + + + + date > new Date() || (startDate ? date < startDate : false) + } + initialFocus + /> + + +
+ +
+ + + +
+ + + + + + + ); } diff --git a/surfsense_web/components/ui/calendar.tsx b/surfsense_web/components/ui/calendar.tsx new file mode 100644 index 0000000..1d98dcb --- /dev/null +++ b/surfsense_web/components/ui/calendar.tsx @@ -0,0 +1,210 @@ +"use client" + +import * as React from "react" +import { + ChevronDownIcon, + ChevronLeftIcon, + ChevronRightIcon, +} from "lucide-react" +import { DayButton, DayPicker, getDefaultClassNames } from "react-day-picker" + +import { cn } from "@/lib/utils" +import { Button, buttonVariants } from "@/components/ui/button" + +function Calendar({ + className, + classNames, + showOutsideDays = true, + captionLayout = "label", + buttonVariant = "ghost", + formatters, + components, + ...props +}: React.ComponentProps & { + buttonVariant?: React.ComponentProps["variant"] +}) { + const defaultClassNames = getDefaultClassNames() + + return ( + svg]:rotate-180`, + String.raw`rtl:**:[.rdp-button\_previous>svg]:rotate-180`, + className + )} + captionLayout={captionLayout} + formatters={{ + formatMonthDropdown: (date) => + date.toLocaleString("default", { month: "short" }), + ...formatters, + }} + classNames={{ + root: cn("w-fit", defaultClassNames.root), + months: cn( + "flex gap-4 flex-col md:flex-row relative", + defaultClassNames.months + ), + month: cn("flex flex-col w-full gap-4", defaultClassNames.month), + nav: cn( + "flex items-center gap-1 w-full absolute top-0 inset-x-0 justify-between", + defaultClassNames.nav + ), + button_previous: cn( + buttonVariants({ variant: buttonVariant }), + "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none", + defaultClassNames.button_previous + ), + button_next: cn( + buttonVariants({ variant: buttonVariant }), + "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none", + defaultClassNames.button_next + ), + month_caption: cn( + "flex items-center justify-center h-(--cell-size) w-full px-(--cell-size)", + defaultClassNames.month_caption + ), + dropdowns: cn( + "w-full flex items-center text-sm font-medium justify-center h-(--cell-size) gap-1.5", + defaultClassNames.dropdowns + ), + dropdown_root: cn( + "relative has-focus:border-ring border border-input shadow-xs has-focus:ring-ring/50 has-focus:ring-[3px] rounded-md", + defaultClassNames.dropdown_root + ), + dropdown: cn("absolute inset-0 opacity-0", defaultClassNames.dropdown), + caption_label: cn( + "select-none font-medium", + captionLayout === "label" + ? "text-sm" + : "rounded-md pl-2 pr-1 flex items-center gap-1 text-sm h-8 [&>svg]:text-muted-foreground [&>svg]:size-3.5", + defaultClassNames.caption_label + ), + table: "w-full border-collapse", + weekdays: cn("flex", defaultClassNames.weekdays), + weekday: cn( + "text-muted-foreground rounded-md flex-1 font-normal text-[0.8rem] select-none", + defaultClassNames.weekday + ), + week: cn("flex w-full mt-2", defaultClassNames.week), + week_number_header: cn( + "select-none w-(--cell-size)", + defaultClassNames.week_number_header + ), + week_number: cn( + "text-[0.8rem] select-none text-muted-foreground", + defaultClassNames.week_number + ), + day: cn( + "relative w-full h-full p-0 text-center [&:first-child[data-selected=true]_button]:rounded-l-md [&:last-child[data-selected=true]_button]:rounded-r-md group/day aspect-square select-none", + defaultClassNames.day + ), + range_start: cn( + "rounded-l-md bg-accent", + defaultClassNames.range_start + ), + range_middle: cn("rounded-none", defaultClassNames.range_middle), + range_end: cn("rounded-r-md bg-accent", defaultClassNames.range_end), + today: cn( + "bg-accent text-accent-foreground rounded-md data-[selected=true]:rounded-none", + defaultClassNames.today + ), + outside: cn( + "text-muted-foreground aria-selected:text-muted-foreground", + defaultClassNames.outside + ), + disabled: cn( + "text-muted-foreground opacity-50", + defaultClassNames.disabled + ), + hidden: cn("invisible", defaultClassNames.hidden), + ...classNames, + }} + components={{ + Root: ({ className, rootRef, ...props }) => { + return ( +
+ ) + }, + Chevron: ({ className, orientation, ...props }) => { + if (orientation === "left") { + return ( + + ) + } + + if (orientation === "right") { + return ( + + ) + } + + return ( + + ) + }, + DayButton: CalendarDayButton, + WeekNumber: ({ children, ...props }) => { + return ( + +
+ {children} +
+ + ) + }, + ...components, + }} + {...props} + /> + ) +} + +function CalendarDayButton({ + className, + day, + modifiers, + ...props +}: React.ComponentProps) { + const defaultClassNames = getDefaultClassNames() + + const ref = React.useRef(null) + React.useEffect(() => { + if (modifiers.focused) ref.current?.focus() + }, [modifiers.focused]) + + return ( +