diff --git a/surfsense_backend/app/connectors/github_connector.py b/surfsense_backend/app/connectors/github_connector.py index 265f89b..a25bc98 100644 --- a/surfsense_backend/app/connectors/github_connector.py +++ b/surfsense_backend/app/connectors/github_connector.py @@ -80,7 +80,7 @@ class GitHubConnector: # type='owner' fetches repos owned by the user # type='member' fetches repos the user is a collaborator on (including orgs) # type='all' fetches both - for repo in self.gh.repositories(type='owner', sort='updated'): + for repo in self.gh.repositories(type='all', sort='updated'): repos_data.append({ "id": repo.id, "name": repo.name, diff --git a/surfsense_backend/app/connectors/slack_history.py b/surfsense_backend/app/connectors/slack_history.py index 67e5403..0bb90ec 100644 --- a/surfsense_backend/app/connectors/slack_history.py +++ b/surfsense_backend/app/connectors/slack_history.py @@ -6,11 +6,15 @@ Allows fetching channel lists and message history with date range filtering. """ import os +import time # Added import +import logging # Added import from slack_sdk import WebClient from slack_sdk.errors import SlackApiError from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple, Any, Union +logger = logging.getLogger(__name__) # Added logger + class SlackHistory: """Class for retrieving conversation history from Slack channels.""" @@ -33,56 +37,88 @@ class SlackHistory: """ self.client = WebClient(token=token) - def get_all_channels(self, include_private: bool = True) -> Dict[str, str]: + def get_all_channels(self, include_private: bool = True) -> List[Dict[str, Any]]: """ - Fetch all channels that the bot has access to. + Fetch all channels that the bot has access to, with rate limit handling. Args: include_private: Whether to include private channels Returns: - Dictionary mapping channel names to channel IDs + List of dictionaries, each representing a channel with id, name, is_private, is_member. Raises: ValueError: If no Slack client has been initialized - SlackApiError: If there's an error calling the Slack API + SlackApiError: If there's an unrecoverable error calling the Slack API + RuntimeError: For unexpected errors during channel fetching. """ if not self.client: raise ValueError("Slack client not initialized. Call set_token() first.") - channels_dict = {} + channels_list = [] # Changed from dict to list types = "public_channel" if include_private: types += ",private_channel" - - try: - # Call the conversations.list method - result = self.client.conversations_list( - types=types, - limit=1000 # Maximum allowed by API - ) - channels = result["channels"] - - # Handle pagination for workspaces with many channels - while result.get("response_metadata", {}).get("next_cursor"): - next_cursor = result["response_metadata"]["next_cursor"] - - # Get the next batch of channels - result = self.client.conversations_list( + + next_cursor = None + is_first_request = True + + while is_first_request or next_cursor: + try: + if not is_first_request: # Add delay only for paginated requests + logger.info(f"Paginating for channels, waiting 3 seconds before next call. Cursor: {next_cursor}") + time.sleep(3) + + current_limit = 1000 # Max limit + api_result = self.client.conversations_list( types=types, cursor=next_cursor, - limit=1000 + limit=current_limit ) - channels.extend(result["channels"]) - - # Create a dictionary mapping channel names to IDs - for channel in channels: - channels_dict[channel["name"]] = channel["id"] - - return channels_dict + + channels_on_page = api_result["channels"] + for channel in channels_on_page: + if "name" in channel and "id" in channel: + channel_data = { + "id": channel.get("id"), + "name": channel.get("name"), + "is_private": channel.get("is_private", False), + # is_member is often part of the channel object from conversations.list + # It indicates if the authenticated user (bot) is a member. + # For public channels, this might be true or the API might not focus on it + # if the bot can read it anyway. For private, it's crucial. + "is_member": channel.get("is_member", False) + } + channels_list.append(channel_data) + else: + logger.warning(f"Channel found with missing name or id. Data: {channel}") + + + next_cursor = api_result.get("response_metadata", {}).get("next_cursor") + is_first_request = False # Subsequent requests are not the first + + if not next_cursor: # All pages processed + break + + except SlackApiError as e: + if e.response is not None and e.response.status_code == 429: + retry_after_header = e.response.headers.get('Retry-After') + wait_duration = 60 # Default wait time + if retry_after_header and retry_after_header.isdigit(): + wait_duration = int(retry_after_header) + + logger.warning(f"Slack API rate limit hit while fetching channels. Waiting for {wait_duration} seconds. Cursor: {next_cursor}") + time.sleep(wait_duration) + # The loop will continue, retrying with the same cursor + else: + # Not a 429 error, or no response object, re-raise + raise SlackApiError(f"Error retrieving channels: {e}", e.response) + except Exception as general_error: + # Handle other potential errors like network issues if necessary, or re-raise + logger.error(f"An unexpected error occurred during channel fetching: {general_error}") + raise RuntimeError(f"An unexpected error occurred during channel fetching: {general_error}") - except SlackApiError as e: - raise SlackApiError(f"Error retrieving channels: {e}", e.response) + return channels_list def get_conversation_history( self, @@ -110,17 +146,18 @@ class SlackHistory: if not self.client: raise ValueError("Slack client not initialized. Call set_token() first.") - try: - # Call the conversations.history method - messages = [] - next_cursor = None - - while True: + messages = [] + next_cursor = None + + while True: + try: + # Proactive delay for conversations.history (Tier 3) + time.sleep(1.2) # Wait 1.2 seconds before each history call. + kwargs = { "channel": channel_id, "limit": min(limit, 1000), # API max is 1000 } - if oldest: kwargs["oldest"] = oldest if latest: @@ -128,22 +165,57 @@ class SlackHistory: if next_cursor: kwargs["cursor"] = next_cursor - result = self.client.conversations_history(**kwargs) + current_api_call_successful = False + result = None # Ensure result is defined + try: + result = self.client.conversations_history(**kwargs) + current_api_call_successful = True + except SlackApiError as e_history: + if e_history.response is not None and e_history.response.status_code == 429: + retry_after_str = e_history.response.headers.get('Retry-After') + wait_time = 60 # Default + if retry_after_str and retry_after_str.isdigit(): + wait_time = int(retry_after_str) + logger.warning( + f"Rate limited by Slack on conversations.history for channel {channel_id}. " + f"Retrying after {wait_time} seconds. Cursor: {next_cursor}" + ) + time.sleep(wait_time) + # current_api_call_successful remains False, loop will retry this page + else: + raise # Re-raise to outer handler for not_in_channel or other SlackApiErrors + + if not current_api_call_successful: + continue # Retry the current page fetch due to handled rate limit + + # Process result if successful batch = result["messages"] messages.extend(batch) - # Check if we need to paginate if result.get("has_more", False) and len(messages) < limit: next_cursor = result["response_metadata"]["next_cursor"] else: - break + break # Exit pagination loop - # Respect the overall limit parameter - return messages[:limit] + except SlackApiError as e: # Outer catch for not_in_channel or unhandled SlackApiErrors from inner try + if (e.response is not None and + hasattr(e.response, 'data') and + isinstance(e.response.data, dict) and + e.response.data.get('error') == 'not_in_channel'): + logger.warning( + f"Bot is not in channel '{channel_id}'. Cannot fetch history. " + "Please add the bot to this channel." + ) + return [] + # For other SlackApiErrors from inner block or this level + raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response) + except Exception as general_error: # Catch any other unexpected errors + logger.error(f"Unexpected error in get_conversation_history for channel {channel_id}: {general_error}") + # Re-raise the general error to allow higher-level handling or visibility + raise - except SlackApiError as e: - raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response) - + return messages[:limit] + @staticmethod def convert_date_to_timestamp(date_str: str) -> Optional[int]: """ @@ -220,12 +292,31 @@ class SlackHistory: """ if not self.client: raise ValueError("Slack client not initialized. Call set_token() first.") - - try: - result = self.client.users_info(user=user_id) - return result["user"] - except SlackApiError as e: - raise SlackApiError(f"Error retrieving user info for {user_id}: {e}", e.response) + + while True: + try: + # Proactive delay for users.info (Tier 4) - generally not needed unless called extremely rapidly. + # For now, we are only adding Retry-After as per plan. + # time.sleep(0.6) # Optional: ~100 req/min if ever needed. + + result = self.client.users_info(user=user_id) + return result["user"] # Success, return and exit loop implicitly + + except SlackApiError as e_user_info: + if e_user_info.response is not None and e_user_info.response.status_code == 429: + retry_after_str = e_user_info.response.headers.get('Retry-After') + wait_time = 30 # Default for Tier 4, can be adjusted + if retry_after_str and retry_after_str.isdigit(): + wait_time = int(retry_after_str) + logger.warning(f"Rate limited by Slack on users.info for user {user_id}. Retrying after {wait_time} seconds.") + time.sleep(wait_time) + continue # Retry the API call + else: + # Not a 429 error, or no response object, re-raise + raise SlackApiError(f"Error retrieving user info for {user_id}: {e_user_info}", e_user_info.response) + except Exception as general_error: # Catch any other unexpected errors + logger.error(f"Unexpected error in get_user_info for user {user_id}: {general_error}") + raise # Re-raise unexpected errors def format_message(self, msg: Dict[str, Any], include_user_info: bool = False) -> Dict[str, Any]: """ diff --git a/surfsense_backend/app/connectors/test_github_connector.py b/surfsense_backend/app/connectors/test_github_connector.py new file mode 100644 index 0000000..ad8a068 --- /dev/null +++ b/surfsense_backend/app/connectors/test_github_connector.py @@ -0,0 +1,154 @@ +import unittest +from unittest.mock import patch, Mock, call +from datetime import datetime + +# Adjust the import path based on the actual location if test_github_connector.py +# is not in the same directory as github_connector.py or if paths are set up differently. +# Assuming surfsend_backend/app/connectors/test_github_connector.py +from surfsense_backend.app.connectors.github_connector import GitHubConnector +from github3.exceptions import ForbiddenError # Import the specific exception + +class TestGitHubConnector(unittest.TestCase): + + @patch('surfsense_backend.app.connectors.github_connector.github_login') + def test_get_user_repositories_uses_type_all(self, mock_github_login): + # Mock the GitHub client object and its methods + mock_gh_instance = Mock() + mock_github_login.return_value = mock_gh_instance + + # Mock the self.gh.me() call in __init__ to prevent an actual API call + mock_gh_instance.me.return_value = Mock() # Simple mock to pass initialization + + # Prepare mock repository data + mock_repo1_data = Mock() + mock_repo1_data.id = 1 + mock_repo1_data.name = "repo1" + mock_repo1_data.full_name = "user/repo1" + mock_repo1_data.private = False + mock_repo1_data.html_url = "http://example.com/user/repo1" + mock_repo1_data.description = "Test repo 1" + mock_repo1_data.updated_at = datetime(2023, 1, 1, 10, 30, 0) # Added time component + + mock_repo2_data = Mock() + mock_repo2_data.id = 2 + mock_repo2_data.name = "org-repo" + mock_repo2_data.full_name = "org/org-repo" + mock_repo2_data.private = True + mock_repo2_data.html_url = "http://example.com/org/org-repo" + mock_repo2_data.description = "Org repo" + mock_repo2_data.updated_at = datetime(2023, 1, 2, 12, 0, 0) # Added time component + + # Configure the mock for gh.repositories() call + # This method is an iterator, so it should return an iterable (e.g., a list) + mock_gh_instance.repositories.return_value = [mock_repo1_data, mock_repo2_data] + + connector = GitHubConnector(token="fake_token") + repositories = connector.get_user_repositories() + + # Assert that gh.repositories was called correctly + mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated') + + # Assert the structure and content of the returned data + expected_repositories = [ + { + "id": 1, "name": "repo1", "full_name": "user/repo1", "private": False, + "url": "http://example.com/user/repo1", "description": "Test repo 1", + "last_updated": datetime(2023, 1, 1, 10, 30, 0) + }, + { + "id": 2, "name": "org-repo", "full_name": "org/org-repo", "private": True, + "url": "http://example.com/org/org-repo", "description": "Org repo", + "last_updated": datetime(2023, 1, 2, 12, 0, 0) + } + ] + self.assertEqual(repositories, expected_repositories) + self.assertEqual(len(repositories), 2) + + @patch('surfsense_backend.app.connectors.github_connector.github_login') + def test_get_user_repositories_handles_empty_description_and_none_updated_at(self, mock_github_login): + # Mock the GitHub client object and its methods + mock_gh_instance = Mock() + mock_github_login.return_value = mock_gh_instance + mock_gh_instance.me.return_value = Mock() + + mock_repo_data = Mock() + mock_repo_data.id = 1 + mock_repo_data.name = "repo_no_desc" + mock_repo_data.full_name = "user/repo_no_desc" + mock_repo_data.private = False + mock_repo_data.html_url = "http://example.com/user/repo_no_desc" + mock_repo_data.description = None # Test None description + mock_repo_data.updated_at = None # Test None updated_at + + mock_gh_instance.repositories.return_value = [mock_repo_data] + connector = GitHubConnector(token="fake_token") + repositories = connector.get_user_repositories() + + mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated') + expected_repositories = [ + { + "id": 1, "name": "repo_no_desc", "full_name": "user/repo_no_desc", "private": False, + "url": "http://example.com/user/repo_no_desc", "description": "", # Expect empty string + "last_updated": None # Expect None + } + ] + self.assertEqual(repositories, expected_repositories) + + @patch('surfsense_backend.app.connectors.github_connector.github_login') + def test_github_connector_initialization_failure_forbidden(self, mock_github_login): + # Test that __init__ raises ValueError on auth failure (ForbiddenError) + mock_gh_instance = Mock() + mock_github_login.return_value = mock_gh_instance + + # Create a mock response object for the ForbiddenError + # The actual response structure might vary, but github3.py's ForbiddenError + # can be instantiated with just a response object that has a status_code. + mock_response = Mock() + mock_response.status_code = 403 # Typically Forbidden + + # Setup the side_effect for self.gh.me() + mock_gh_instance.me.side_effect = ForbiddenError(mock_response) + + with self.assertRaises(ValueError) as context: + GitHubConnector(token="invalid_token_forbidden") + self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception)) + + @patch('surfsense_backend.app.connectors.github_connector.github_login') + def test_github_connector_initialization_failure_authentication_failed(self, mock_github_login): + # Test that __init__ raises ValueError on auth failure (AuthenticationFailed, which is a subclass of ForbiddenError) + # For github3.py, AuthenticationFailed is more specific for token issues. + from github3.exceptions import AuthenticationFailed + + mock_gh_instance = Mock() + mock_github_login.return_value = mock_gh_instance + + mock_response = Mock() + mock_response.status_code = 401 # Typically Unauthorized + + mock_gh_instance.me.side_effect = AuthenticationFailed(mock_response) + + with self.assertRaises(ValueError) as context: + GitHubConnector(token="invalid_token_authfailed") + self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception)) + + @patch('surfsense_backend.app.connectors.github_connector.github_login') + def test_get_user_repositories_handles_api_exception(self, mock_github_login): + mock_gh_instance = Mock() + mock_github_login.return_value = mock_gh_instance + mock_gh_instance.me.return_value = Mock() + + # Simulate an exception when calling repositories + mock_gh_instance.repositories.side_effect = Exception("API Error") + + connector = GitHubConnector(token="fake_token") + # We expect it to log an error and return an empty list + with patch('surfsense_backend.app.connectors.github_connector.logger') as mock_logger: + repositories = connector.get_user_repositories() + + self.assertEqual(repositories, []) + mock_logger.error.assert_called_once() + self.assertIn("Failed to fetch GitHub repositories: API Error", mock_logger.error.call_args[0][0]) + + +if __name__ == '__main__': + unittest.main() diff --git a/surfsense_backend/app/connectors/test_slack_history.py b/surfsense_backend/app/connectors/test_slack_history.py new file mode 100644 index 0000000..ecff2c5 --- /dev/null +++ b/surfsense_backend/app/connectors/test_slack_history.py @@ -0,0 +1,420 @@ +import unittest +import time # Imported to be available for patching target module +from unittest.mock import patch, Mock, call +from slack_sdk.errors import SlackApiError + +# Since test_slack_history.py is in the same directory as slack_history.py +from .slack_history import SlackHistory + +class TestSlackHistoryGetAllChannels(unittest.TestCase): + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_pagination_with_delay(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + # Mock API responses now include is_private and is_member + page1_response = { + "channels": [ + {"name": "general", "id": "C1", "is_private": False, "is_member": True}, + {"name": "dev", "id": "C0", "is_private": False, "is_member": True} + ], + "response_metadata": {"next_cursor": "cursor123"} + } + page2_response = { + "channels": [{"name": "random", "id": "C2", "is_private": True, "is_member": True}], + "response_metadata": {"next_cursor": ""} + } + + mock_client_instance.conversations_list.side_effect = [ + page1_response, + page2_response + ] + + slack_history = SlackHistory(token="fake_token") + channels_list = slack_history.get_all_channels(include_private=True) + + expected_channels_list = [ + {"id": "C1", "name": "general", "is_private": False, "is_member": True}, + {"id": "C0", "name": "dev", "is_private": False, "is_member": True}, + {"id": "C2", "name": "random", "is_private": True, "is_member": True} + ] + + self.assertEqual(len(channels_list), 3) + self.assertListEqual(channels_list, expected_channels_list) # Assert list equality + + expected_calls = [ + call(types="public_channel,private_channel", cursor=None, limit=1000), + call(types="public_channel,private_channel", cursor="cursor123", limit=1000) + ] + mock_client_instance.conversations_list.assert_has_calls(expected_calls) + self.assertEqual(mock_client_instance.conversations_list.call_count, 2) + + mock_sleep.assert_called_once_with(3) + mock_logger.info.assert_called_once_with("Paginating for channels, waiting 3 seconds before next call. Cursor: cursor123") + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_rate_limit_with_retry_after(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 429 + mock_error_response.headers = {'Retry-After': '5'} + + successful_response = { + "channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}], + "response_metadata": {"next_cursor": ""} + } + + mock_client_instance.conversations_list.side_effect = [ + SlackApiError(message="ratelimited", response=mock_error_response), + successful_response + ] + + slack_history = SlackHistory(token="fake_token") + channels_list = slack_history.get_all_channels(include_private=True) + + expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}] + self.assertEqual(len(channels_list), 1) + self.assertListEqual(channels_list, expected_channels_list) + + mock_sleep.assert_called_once_with(5) + mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 5 seconds. Cursor: None") + + expected_calls = [ + call(types="public_channel,private_channel", cursor=None, limit=1000), + call(types="public_channel,private_channel", cursor=None, limit=1000) + ] + mock_client_instance.conversations_list.assert_has_calls(expected_calls) + self.assertEqual(mock_client_instance.conversations_list.call_count, 2) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_rate_limit_no_retry_after_valid_header(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 429 + mock_error_response.headers = {'Retry-After': 'invalid_value'} + + successful_response = { + "channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}], + "response_metadata": {"next_cursor": ""} + } + + mock_client_instance.conversations_list.side_effect = [ + SlackApiError(message="ratelimited", response=mock_error_response), + successful_response + ] + + slack_history = SlackHistory(token="fake_token") + channels_list = slack_history.get_all_channels(include_private=True) + + expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}] + self.assertListEqual(channels_list, expected_channels_list) + mock_sleep.assert_called_once_with(60) # Default fallback + mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None") + self.assertEqual(mock_client_instance.conversations_list.call_count, 2) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_rate_limit_no_retry_after_header(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 429 + mock_error_response.headers = {} + + successful_response = { + "channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}], + "response_metadata": {"next_cursor": ""} + } + + mock_client_instance.conversations_list.side_effect = [ + SlackApiError(message="ratelimited", response=mock_error_response), + successful_response + ] + + slack_history = SlackHistory(token="fake_token") + channels_list = slack_history.get_all_channels(include_private=True) + + expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}] + self.assertListEqual(channels_list, expected_channels_list) + mock_sleep.assert_called_once_with(60) # Default fallback + mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None") + self.assertEqual(mock_client_instance.conversations_list.call_count, 2) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_other_slack_api_error(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 500 + mock_error_response.headers = {} + mock_error_response.data = {"ok": False, "error": "internal_error"} + + original_error = SlackApiError(message="server error", response=mock_error_response) + mock_client_instance.conversations_list.side_effect = original_error + + slack_history = SlackHistory(token="fake_token") + + with self.assertRaises(SlackApiError) as context: + slack_history.get_all_channels(include_private=True) + + self.assertEqual(context.exception.response.status_code, 500) + self.assertIn("server error", str(context.exception)) + mock_sleep.assert_not_called() + mock_logger.warning.assert_not_called() # Ensure no rate limit log + mock_client_instance.conversations_list.assert_called_once_with( + types="public_channel,private_channel", cursor=None, limit=1000 + ) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_get_all_channels_handles_missing_name_id_gracefully(self, MockWebClient, mock_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + response_with_malformed_data = { + "channels": [ + {"id": "C1_missing_name", "is_private": False, "is_member": True}, + {"name": "channel_missing_id", "is_private": False, "is_member": True}, + {"name": "general", "id": "C2_valid", "is_private": False, "is_member": True} + ], + "response_metadata": {"next_cursor": ""} + } + + mock_client_instance.conversations_list.return_value = response_with_malformed_data + + slack_history = SlackHistory(token="fake_token") + channels_list = slack_history.get_all_channels(include_private=True) + + expected_channels_list = [ + {"id": "C2_valid", "name": "general", "is_private": False, "is_member": True} + ] + self.assertEqual(len(channels_list), 1) + self.assertListEqual(channels_list, expected_channels_list) + + self.assertEqual(mock_logger.warning.call_count, 2) + mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'id': 'C1_missing_name', 'is_private': False, 'is_member': True}") + mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'name': 'channel_missing_id', 'is_private': False, 'is_member': True}") + + mock_sleep.assert_not_called() + mock_client_instance.conversations_list.assert_called_once_with( + types="public_channel,private_channel", cursor=None, limit=1000 + ) + +if __name__ == '__main__': + unittest.main() + +class TestSlackHistoryGetConversationHistory(unittest.TestCase): + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_proactive_delay_single_page(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + mock_client_instance.conversations_history.return_value = { + "messages": [{"text": "msg1"}], + "has_more": False + } + + slack_history = SlackHistory(token="fake_token") + slack_history.get_conversation_history(channel_id="C123") + + mock_time_sleep.assert_called_once_with(1.2) # Proactive delay + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_proactive_delay_multiple_pages(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + mock_client_instance.conversations_history.side_effect = [ + { + "messages": [{"text": "msg1"}], + "has_more": True, + "response_metadata": {"next_cursor": "cursor1"} + }, + { + "messages": [{"text": "msg2"}], + "has_more": False + } + ] + + slack_history = SlackHistory(token="fake_token") + slack_history.get_conversation_history(channel_id="C123") + + # Expected calls: 1.2 (page1), 1.2 (page2) + self.assertEqual(mock_time_sleep.call_count, 2) + mock_time_sleep.assert_has_calls([call(1.2), call(1.2)]) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 429 + mock_error_response.headers = {'Retry-After': '5'} + + mock_client_instance.conversations_history.side_effect = [ + SlackApiError(message="ratelimited", response=mock_error_response), + {"messages": [{"text": "msg1"}], "has_more": False} + ] + + slack_history = SlackHistory(token="fake_token") + messages = slack_history.get_conversation_history(channel_id="C123") + + self.assertEqual(len(messages), 1) + self.assertEqual(messages[0]["text"], "msg1") + + # Expected sleep calls: 1.2 (proactive for 1st attempt), 5 (rate limit), 1.2 (proactive for 2nd attempt) + mock_time_sleep.assert_has_calls([call(1.2), call(5), call(1.2)], any_order=False) + mock_logger.warning.assert_called_once() # Check that a warning was logged for rate limiting + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_not_in_channel_error(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 403 # Typical for not_in_channel, but data matters more + mock_error_response.data = {'ok': False, 'error': 'not_in_channel'} + + # This error is now raised by the inner try-except, then caught by the outer one + mock_client_instance.conversations_history.side_effect = SlackApiError( + message="not_in_channel error", + response=mock_error_response + ) + + slack_history = SlackHistory(token="fake_token") + messages = slack_history.get_conversation_history(channel_id="C123") + + self.assertEqual(messages, []) + mock_logger.warning.assert_called_with( + "Bot is not in channel 'C123'. Cannot fetch history. Please add the bot to this channel." + ) + mock_time_sleep.assert_called_once_with(1.2) # Proactive delay before the API call + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 500 + mock_error_response.data = {'ok': False, 'error': 'internal_error'} + original_error = SlackApiError(message="server error", response=mock_error_response) + + mock_client_instance.conversations_history.side_effect = original_error + + slack_history = SlackHistory(token="fake_token") + + with self.assertRaises(SlackApiError) as context: + slack_history.get_conversation_history(channel_id="C123") + + self.assertIn("Error retrieving history for channel C123", str(context.exception)) + self.assertIs(context.exception.response, mock_error_response) + mock_time_sleep.assert_called_once_with(1.2) # Proactive delay + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + original_error = Exception("Something broke") + mock_client_instance.conversations_history.side_effect = original_error + + slack_history = SlackHistory(token="fake_token") + + with self.assertRaises(Exception) as context: # Check for generic Exception + slack_history.get_conversation_history(channel_id="C123") + + self.assertIs(context.exception, original_error) # Should re-raise the original error + mock_logger.error.assert_called_once_with("Unexpected error in get_conversation_history for channel C123: Something broke") + mock_time_sleep.assert_called_once_with(1.2) # Proactive delay + +class TestSlackHistoryGetUserInfo(unittest.TestCase): + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 429 + mock_error_response.headers = {'Retry-After': '3'} # Using 3 seconds for test + + successful_user_data = {"id": "U123", "name": "testuser"} + + mock_client_instance.users_info.side_effect = [ + SlackApiError(message="ratelimited_userinfo", response=mock_error_response), + {"user": successful_user_data} + ] + + slack_history = SlackHistory(token="fake_token") + user_info = slack_history.get_user_info(user_id="U123") + + self.assertEqual(user_info, successful_user_data) + + # Assert that time.sleep was called for the rate limit + mock_time_sleep.assert_called_once_with(3) + mock_logger.warning.assert_called_once_with( + "Rate limited by Slack on users.info for user U123. Retrying after 3 seconds." + ) + # Assert users_info was called twice (original + retry) + self.assertEqual(mock_client_instance.users_info.call_count, 2) + mock_client_instance.users_info.assert_has_calls([call(user="U123"), call(user="U123")]) + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') # time.sleep might be called by other logic, but not expected here + @patch('slack_sdk.WebClient') + def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + + mock_error_response = Mock() + mock_error_response.status_code = 500 # Some other error + mock_error_response.data = {'ok': False, 'error': 'internal_server_error'} + original_error = SlackApiError(message="internal server error", response=mock_error_response) + + mock_client_instance.users_info.side_effect = original_error + + slack_history = SlackHistory(token="fake_token") + + with self.assertRaises(SlackApiError) as context: + slack_history.get_user_info(user_id="U123") + + # Check that the raised error is the one we expect + self.assertIn("Error retrieving user info for U123", str(context.exception)) + self.assertIs(context.exception.response, mock_error_response) + mock_time_sleep.assert_not_called() # No rate limit sleep + + @patch('surfsense_backend.app.connectors.slack_history.logger') + @patch('surfsense_backend.app.connectors.slack_history.time.sleep') + @patch('slack_sdk.WebClient') + def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger): + mock_client_instance = MockWebClient.return_value + original_error = Exception("A very generic problem") + mock_client_instance.users_info.side_effect = original_error + + slack_history = SlackHistory(token="fake_token") + + with self.assertRaises(Exception) as context: + slack_history.get_user_info(user_id="U123") + + self.assertIs(context.exception, original_error) # Check it's the exact same exception + mock_logger.error.assert_called_once_with( + "Unexpected error in get_user_info for user U123: A very generic problem" + ) + mock_time_sleep.assert_not_called() # No rate limit sleep diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py index 94643a4..6af36ad 100644 --- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py +++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py @@ -114,33 +114,25 @@ async def index_slack_messages( skipped_channels = [] # Process each channel - for channel_name, channel_id in channels.items(): + for channel_obj in channels: # Modified loop to iterate over list of channel objects + channel_id = channel_obj["id"] + channel_name = channel_obj["name"] + is_private = channel_obj["is_private"] + is_member = channel_obj["is_member"] # This might be False for public channels too + try: - # Check if the bot is a member of the channel - try: - # First try to get channel info to check if bot is a member - channel_info = slack_client.client.conversations_info(channel=channel_id) - - # For private channels, the bot needs to be a member - if channel_info.get("channel", {}).get("is_private", False): - # Check if bot is a member - is_member = channel_info.get("channel", {}).get("is_member", False) - if not is_member: - logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.") - skipped_channels.append(f"{channel_name} (private, bot not a member)") - documents_skipped += 1 - continue - except SlackApiError as e: - if "not_in_channel" in str(e) or "channel_not_found" in str(e): - logger.warning(f"Bot cannot access channel {channel_name} ({channel_id}). Skipping.") - skipped_channels.append(f"{channel_name} (access error)") - documents_skipped += 1 - continue - else: - # Re-raise if it's a different error - raise + # If it's a private channel and the bot is not a member, skip. + # For public channels, if they are listed by conversations.list, the bot can typically read history. + # The `not_in_channel` error in get_conversation_history will be the ultimate gatekeeper if history is inaccessible. + if is_private and not is_member: + logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.") + skipped_channels.append(f"{channel_name} (private, bot not a member)") + documents_skipped += 1 + continue # Get messages for this channel + # The get_history_by_date_range now uses get_conversation_history, + # which handles 'not_in_channel' by returning [] and logging. messages, error = slack_client.get_history_by_date_range( channel_id=channel_id, start_date=start_date_str,