Merge pull request #117 from fblgit/fix/slack-rate-limiting

Fix/slack rate limiting & Github Repos ORG Filtering
This commit is contained in:
Rohan Verma 2025-05-27 18:39:15 -07:00 committed by GitHub
commit fd6da4c472
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 733 additions and 76 deletions

View file

@ -80,7 +80,7 @@ class GitHubConnector:
# type='owner' fetches repos owned by the user # type='owner' fetches repos owned by the user
# type='member' fetches repos the user is a collaborator on (including orgs) # type='member' fetches repos the user is a collaborator on (including orgs)
# type='all' fetches both # type='all' fetches both
for repo in self.gh.repositories(type='owner', sort='updated'): for repo in self.gh.repositories(type='all', sort='updated'):
repos_data.append({ repos_data.append({
"id": repo.id, "id": repo.id,
"name": repo.name, "name": repo.name,

View file

@ -6,11 +6,15 @@ Allows fetching channel lists and message history with date range filtering.
""" """
import os import os
import time # Added import
import logging # Added import
from slack_sdk import WebClient from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError from slack_sdk.errors import SlackApiError
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any, Union from typing import Dict, List, Optional, Tuple, Any, Union
logger = logging.getLogger(__name__) # Added logger
class SlackHistory: class SlackHistory:
"""Class for retrieving conversation history from Slack channels.""" """Class for retrieving conversation history from Slack channels."""
@ -33,56 +37,88 @@ class SlackHistory:
""" """
self.client = WebClient(token=token) self.client = WebClient(token=token)
def get_all_channels(self, include_private: bool = True) -> Dict[str, str]: def get_all_channels(self, include_private: bool = True) -> List[Dict[str, Any]]:
""" """
Fetch all channels that the bot has access to. Fetch all channels that the bot has access to, with rate limit handling.
Args: Args:
include_private: Whether to include private channels include_private: Whether to include private channels
Returns: Returns:
Dictionary mapping channel names to channel IDs List of dictionaries, each representing a channel with id, name, is_private, is_member.
Raises: Raises:
ValueError: If no Slack client has been initialized ValueError: If no Slack client has been initialized
SlackApiError: If there's an error calling the Slack API SlackApiError: If there's an unrecoverable error calling the Slack API
RuntimeError: For unexpected errors during channel fetching.
""" """
if not self.client: if not self.client:
raise ValueError("Slack client not initialized. Call set_token() first.") raise ValueError("Slack client not initialized. Call set_token() first.")
channels_dict = {} channels_list = [] # Changed from dict to list
types = "public_channel" types = "public_channel"
if include_private: if include_private:
types += ",private_channel" types += ",private_channel"
try: next_cursor = None
# Call the conversations.list method is_first_request = True
result = self.client.conversations_list(
types=types, while is_first_request or next_cursor:
limit=1000 # Maximum allowed by API try:
) if not is_first_request: # Add delay only for paginated requests
channels = result["channels"] logger.info(f"Paginating for channels, waiting 3 seconds before next call. Cursor: {next_cursor}")
time.sleep(3)
# Handle pagination for workspaces with many channels
while result.get("response_metadata", {}).get("next_cursor"): current_limit = 1000 # Max limit
next_cursor = result["response_metadata"]["next_cursor"] api_result = self.client.conversations_list(
# Get the next batch of channels
result = self.client.conversations_list(
types=types, types=types,
cursor=next_cursor, cursor=next_cursor,
limit=1000 limit=current_limit
) )
channels.extend(result["channels"])
channels_on_page = api_result["channels"]
# Create a dictionary mapping channel names to IDs for channel in channels_on_page:
for channel in channels: if "name" in channel and "id" in channel:
channels_dict[channel["name"]] = channel["id"] channel_data = {
"id": channel.get("id"),
return channels_dict "name": channel.get("name"),
"is_private": channel.get("is_private", False),
# is_member is often part of the channel object from conversations.list
# It indicates if the authenticated user (bot) is a member.
# For public channels, this might be true or the API might not focus on it
# if the bot can read it anyway. For private, it's crucial.
"is_member": channel.get("is_member", False)
}
channels_list.append(channel_data)
else:
logger.warning(f"Channel found with missing name or id. Data: {channel}")
next_cursor = api_result.get("response_metadata", {}).get("next_cursor")
is_first_request = False # Subsequent requests are not the first
if not next_cursor: # All pages processed
break
except SlackApiError as e:
if e.response is not None and e.response.status_code == 429:
retry_after_header = e.response.headers.get('Retry-After')
wait_duration = 60 # Default wait time
if retry_after_header and retry_after_header.isdigit():
wait_duration = int(retry_after_header)
logger.warning(f"Slack API rate limit hit while fetching channels. Waiting for {wait_duration} seconds. Cursor: {next_cursor}")
time.sleep(wait_duration)
# The loop will continue, retrying with the same cursor
else:
# Not a 429 error, or no response object, re-raise
raise SlackApiError(f"Error retrieving channels: {e}", e.response)
except Exception as general_error:
# Handle other potential errors like network issues if necessary, or re-raise
logger.error(f"An unexpected error occurred during channel fetching: {general_error}")
raise RuntimeError(f"An unexpected error occurred during channel fetching: {general_error}")
except SlackApiError as e: return channels_list
raise SlackApiError(f"Error retrieving channels: {e}", e.response)
def get_conversation_history( def get_conversation_history(
self, self,
@ -110,17 +146,18 @@ class SlackHistory:
if not self.client: if not self.client:
raise ValueError("Slack client not initialized. Call set_token() first.") raise ValueError("Slack client not initialized. Call set_token() first.")
try: messages = []
# Call the conversations.history method next_cursor = None
messages = []
next_cursor = None while True:
try:
while True: # Proactive delay for conversations.history (Tier 3)
time.sleep(1.2) # Wait 1.2 seconds before each history call.
kwargs = { kwargs = {
"channel": channel_id, "channel": channel_id,
"limit": min(limit, 1000), # API max is 1000 "limit": min(limit, 1000), # API max is 1000
} }
if oldest: if oldest:
kwargs["oldest"] = oldest kwargs["oldest"] = oldest
if latest: if latest:
@ -128,22 +165,57 @@ class SlackHistory:
if next_cursor: if next_cursor:
kwargs["cursor"] = next_cursor kwargs["cursor"] = next_cursor
result = self.client.conversations_history(**kwargs) current_api_call_successful = False
result = None # Ensure result is defined
try:
result = self.client.conversations_history(**kwargs)
current_api_call_successful = True
except SlackApiError as e_history:
if e_history.response is not None and e_history.response.status_code == 429:
retry_after_str = e_history.response.headers.get('Retry-After')
wait_time = 60 # Default
if retry_after_str and retry_after_str.isdigit():
wait_time = int(retry_after_str)
logger.warning(
f"Rate limited by Slack on conversations.history for channel {channel_id}. "
f"Retrying after {wait_time} seconds. Cursor: {next_cursor}"
)
time.sleep(wait_time)
# current_api_call_successful remains False, loop will retry this page
else:
raise # Re-raise to outer handler for not_in_channel or other SlackApiErrors
if not current_api_call_successful:
continue # Retry the current page fetch due to handled rate limit
# Process result if successful
batch = result["messages"] batch = result["messages"]
messages.extend(batch) messages.extend(batch)
# Check if we need to paginate
if result.get("has_more", False) and len(messages) < limit: if result.get("has_more", False) and len(messages) < limit:
next_cursor = result["response_metadata"]["next_cursor"] next_cursor = result["response_metadata"]["next_cursor"]
else: else:
break break # Exit pagination loop
# Respect the overall limit parameter except SlackApiError as e: # Outer catch for not_in_channel or unhandled SlackApiErrors from inner try
return messages[:limit] if (e.response is not None and
hasattr(e.response, 'data') and
isinstance(e.response.data, dict) and
e.response.data.get('error') == 'not_in_channel'):
logger.warning(
f"Bot is not in channel '{channel_id}'. Cannot fetch history. "
"Please add the bot to this channel."
)
return []
# For other SlackApiErrors from inner block or this level
raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response)
except Exception as general_error: # Catch any other unexpected errors
logger.error(f"Unexpected error in get_conversation_history for channel {channel_id}: {general_error}")
# Re-raise the general error to allow higher-level handling or visibility
raise
except SlackApiError as e: return messages[:limit]
raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response)
@staticmethod @staticmethod
def convert_date_to_timestamp(date_str: str) -> Optional[int]: def convert_date_to_timestamp(date_str: str) -> Optional[int]:
""" """
@ -220,12 +292,31 @@ class SlackHistory:
""" """
if not self.client: if not self.client:
raise ValueError("Slack client not initialized. Call set_token() first.") raise ValueError("Slack client not initialized. Call set_token() first.")
try: while True:
result = self.client.users_info(user=user_id) try:
return result["user"] # Proactive delay for users.info (Tier 4) - generally not needed unless called extremely rapidly.
except SlackApiError as e: # For now, we are only adding Retry-After as per plan.
raise SlackApiError(f"Error retrieving user info for {user_id}: {e}", e.response) # time.sleep(0.6) # Optional: ~100 req/min if ever needed.
result = self.client.users_info(user=user_id)
return result["user"] # Success, return and exit loop implicitly
except SlackApiError as e_user_info:
if e_user_info.response is not None and e_user_info.response.status_code == 429:
retry_after_str = e_user_info.response.headers.get('Retry-After')
wait_time = 30 # Default for Tier 4, can be adjusted
if retry_after_str and retry_after_str.isdigit():
wait_time = int(retry_after_str)
logger.warning(f"Rate limited by Slack on users.info for user {user_id}. Retrying after {wait_time} seconds.")
time.sleep(wait_time)
continue # Retry the API call
else:
# Not a 429 error, or no response object, re-raise
raise SlackApiError(f"Error retrieving user info for {user_id}: {e_user_info}", e_user_info.response)
except Exception as general_error: # Catch any other unexpected errors
logger.error(f"Unexpected error in get_user_info for user {user_id}: {general_error}")
raise # Re-raise unexpected errors
def format_message(self, msg: Dict[str, Any], include_user_info: bool = False) -> Dict[str, Any]: def format_message(self, msg: Dict[str, Any], include_user_info: bool = False) -> Dict[str, Any]:
""" """

View file

@ -0,0 +1,154 @@
import unittest
from unittest.mock import patch, Mock, call
from datetime import datetime
# Adjust the import path based on the actual location if test_github_connector.py
# is not in the same directory as github_connector.py or if paths are set up differently.
# Assuming surfsend_backend/app/connectors/test_github_connector.py
from surfsense_backend.app.connectors.github_connector import GitHubConnector
from github3.exceptions import ForbiddenError # Import the specific exception
class TestGitHubConnector(unittest.TestCase):
@patch('surfsense_backend.app.connectors.github_connector.github_login')
def test_get_user_repositories_uses_type_all(self, mock_github_login):
# Mock the GitHub client object and its methods
mock_gh_instance = Mock()
mock_github_login.return_value = mock_gh_instance
# Mock the self.gh.me() call in __init__ to prevent an actual API call
mock_gh_instance.me.return_value = Mock() # Simple mock to pass initialization
# Prepare mock repository data
mock_repo1_data = Mock()
mock_repo1_data.id = 1
mock_repo1_data.name = "repo1"
mock_repo1_data.full_name = "user/repo1"
mock_repo1_data.private = False
mock_repo1_data.html_url = "http://example.com/user/repo1"
mock_repo1_data.description = "Test repo 1"
mock_repo1_data.updated_at = datetime(2023, 1, 1, 10, 30, 0) # Added time component
mock_repo2_data = Mock()
mock_repo2_data.id = 2
mock_repo2_data.name = "org-repo"
mock_repo2_data.full_name = "org/org-repo"
mock_repo2_data.private = True
mock_repo2_data.html_url = "http://example.com/org/org-repo"
mock_repo2_data.description = "Org repo"
mock_repo2_data.updated_at = datetime(2023, 1, 2, 12, 0, 0) # Added time component
# Configure the mock for gh.repositories() call
# This method is an iterator, so it should return an iterable (e.g., a list)
mock_gh_instance.repositories.return_value = [mock_repo1_data, mock_repo2_data]
connector = GitHubConnector(token="fake_token")
repositories = connector.get_user_repositories()
# Assert that gh.repositories was called correctly
mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated')
# Assert the structure and content of the returned data
expected_repositories = [
{
"id": 1, "name": "repo1", "full_name": "user/repo1", "private": False,
"url": "http://example.com/user/repo1", "description": "Test repo 1",
"last_updated": datetime(2023, 1, 1, 10, 30, 0)
},
{
"id": 2, "name": "org-repo", "full_name": "org/org-repo", "private": True,
"url": "http://example.com/org/org-repo", "description": "Org repo",
"last_updated": datetime(2023, 1, 2, 12, 0, 0)
}
]
self.assertEqual(repositories, expected_repositories)
self.assertEqual(len(repositories), 2)
@patch('surfsense_backend.app.connectors.github_connector.github_login')
def test_get_user_repositories_handles_empty_description_and_none_updated_at(self, mock_github_login):
# Mock the GitHub client object and its methods
mock_gh_instance = Mock()
mock_github_login.return_value = mock_gh_instance
mock_gh_instance.me.return_value = Mock()
mock_repo_data = Mock()
mock_repo_data.id = 1
mock_repo_data.name = "repo_no_desc"
mock_repo_data.full_name = "user/repo_no_desc"
mock_repo_data.private = False
mock_repo_data.html_url = "http://example.com/user/repo_no_desc"
mock_repo_data.description = None # Test None description
mock_repo_data.updated_at = None # Test None updated_at
mock_gh_instance.repositories.return_value = [mock_repo_data]
connector = GitHubConnector(token="fake_token")
repositories = connector.get_user_repositories()
mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated')
expected_repositories = [
{
"id": 1, "name": "repo_no_desc", "full_name": "user/repo_no_desc", "private": False,
"url": "http://example.com/user/repo_no_desc", "description": "", # Expect empty string
"last_updated": None # Expect None
}
]
self.assertEqual(repositories, expected_repositories)
@patch('surfsense_backend.app.connectors.github_connector.github_login')
def test_github_connector_initialization_failure_forbidden(self, mock_github_login):
# Test that __init__ raises ValueError on auth failure (ForbiddenError)
mock_gh_instance = Mock()
mock_github_login.return_value = mock_gh_instance
# Create a mock response object for the ForbiddenError
# The actual response structure might vary, but github3.py's ForbiddenError
# can be instantiated with just a response object that has a status_code.
mock_response = Mock()
mock_response.status_code = 403 # Typically Forbidden
# Setup the side_effect for self.gh.me()
mock_gh_instance.me.side_effect = ForbiddenError(mock_response)
with self.assertRaises(ValueError) as context:
GitHubConnector(token="invalid_token_forbidden")
self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception))
@patch('surfsense_backend.app.connectors.github_connector.github_login')
def test_github_connector_initialization_failure_authentication_failed(self, mock_github_login):
# Test that __init__ raises ValueError on auth failure (AuthenticationFailed, which is a subclass of ForbiddenError)
# For github3.py, AuthenticationFailed is more specific for token issues.
from github3.exceptions import AuthenticationFailed
mock_gh_instance = Mock()
mock_github_login.return_value = mock_gh_instance
mock_response = Mock()
mock_response.status_code = 401 # Typically Unauthorized
mock_gh_instance.me.side_effect = AuthenticationFailed(mock_response)
with self.assertRaises(ValueError) as context:
GitHubConnector(token="invalid_token_authfailed")
self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception))
@patch('surfsense_backend.app.connectors.github_connector.github_login')
def test_get_user_repositories_handles_api_exception(self, mock_github_login):
mock_gh_instance = Mock()
mock_github_login.return_value = mock_gh_instance
mock_gh_instance.me.return_value = Mock()
# Simulate an exception when calling repositories
mock_gh_instance.repositories.side_effect = Exception("API Error")
connector = GitHubConnector(token="fake_token")
# We expect it to log an error and return an empty list
with patch('surfsense_backend.app.connectors.github_connector.logger') as mock_logger:
repositories = connector.get_user_repositories()
self.assertEqual(repositories, [])
mock_logger.error.assert_called_once()
self.assertIn("Failed to fetch GitHub repositories: API Error", mock_logger.error.call_args[0][0])
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,420 @@
import unittest
import time # Imported to be available for patching target module
from unittest.mock import patch, Mock, call
from slack_sdk.errors import SlackApiError
# Since test_slack_history.py is in the same directory as slack_history.py
from .slack_history import SlackHistory
class TestSlackHistoryGetAllChannels(unittest.TestCase):
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_pagination_with_delay(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
# Mock API responses now include is_private and is_member
page1_response = {
"channels": [
{"name": "general", "id": "C1", "is_private": False, "is_member": True},
{"name": "dev", "id": "C0", "is_private": False, "is_member": True}
],
"response_metadata": {"next_cursor": "cursor123"}
}
page2_response = {
"channels": [{"name": "random", "id": "C2", "is_private": True, "is_member": True}],
"response_metadata": {"next_cursor": ""}
}
mock_client_instance.conversations_list.side_effect = [
page1_response,
page2_response
]
slack_history = SlackHistory(token="fake_token")
channels_list = slack_history.get_all_channels(include_private=True)
expected_channels_list = [
{"id": "C1", "name": "general", "is_private": False, "is_member": True},
{"id": "C0", "name": "dev", "is_private": False, "is_member": True},
{"id": "C2", "name": "random", "is_private": True, "is_member": True}
]
self.assertEqual(len(channels_list), 3)
self.assertListEqual(channels_list, expected_channels_list) # Assert list equality
expected_calls = [
call(types="public_channel,private_channel", cursor=None, limit=1000),
call(types="public_channel,private_channel", cursor="cursor123", limit=1000)
]
mock_client_instance.conversations_list.assert_has_calls(expected_calls)
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
mock_sleep.assert_called_once_with(3)
mock_logger.info.assert_called_once_with("Paginating for channels, waiting 3 seconds before next call. Cursor: cursor123")
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_rate_limit_with_retry_after(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 429
mock_error_response.headers = {'Retry-After': '5'}
successful_response = {
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
"response_metadata": {"next_cursor": ""}
}
mock_client_instance.conversations_list.side_effect = [
SlackApiError(message="ratelimited", response=mock_error_response),
successful_response
]
slack_history = SlackHistory(token="fake_token")
channels_list = slack_history.get_all_channels(include_private=True)
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
self.assertEqual(len(channels_list), 1)
self.assertListEqual(channels_list, expected_channels_list)
mock_sleep.assert_called_once_with(5)
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 5 seconds. Cursor: None")
expected_calls = [
call(types="public_channel,private_channel", cursor=None, limit=1000),
call(types="public_channel,private_channel", cursor=None, limit=1000)
]
mock_client_instance.conversations_list.assert_has_calls(expected_calls)
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_rate_limit_no_retry_after_valid_header(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 429
mock_error_response.headers = {'Retry-After': 'invalid_value'}
successful_response = {
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
"response_metadata": {"next_cursor": ""}
}
mock_client_instance.conversations_list.side_effect = [
SlackApiError(message="ratelimited", response=mock_error_response),
successful_response
]
slack_history = SlackHistory(token="fake_token")
channels_list = slack_history.get_all_channels(include_private=True)
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
self.assertListEqual(channels_list, expected_channels_list)
mock_sleep.assert_called_once_with(60) # Default fallback
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None")
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_rate_limit_no_retry_after_header(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 429
mock_error_response.headers = {}
successful_response = {
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
"response_metadata": {"next_cursor": ""}
}
mock_client_instance.conversations_list.side_effect = [
SlackApiError(message="ratelimited", response=mock_error_response),
successful_response
]
slack_history = SlackHistory(token="fake_token")
channels_list = slack_history.get_all_channels(include_private=True)
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
self.assertListEqual(channels_list, expected_channels_list)
mock_sleep.assert_called_once_with(60) # Default fallback
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None")
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_other_slack_api_error(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 500
mock_error_response.headers = {}
mock_error_response.data = {"ok": False, "error": "internal_error"}
original_error = SlackApiError(message="server error", response=mock_error_response)
mock_client_instance.conversations_list.side_effect = original_error
slack_history = SlackHistory(token="fake_token")
with self.assertRaises(SlackApiError) as context:
slack_history.get_all_channels(include_private=True)
self.assertEqual(context.exception.response.status_code, 500)
self.assertIn("server error", str(context.exception))
mock_sleep.assert_not_called()
mock_logger.warning.assert_not_called() # Ensure no rate limit log
mock_client_instance.conversations_list.assert_called_once_with(
types="public_channel,private_channel", cursor=None, limit=1000
)
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_get_all_channels_handles_missing_name_id_gracefully(self, MockWebClient, mock_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
response_with_malformed_data = {
"channels": [
{"id": "C1_missing_name", "is_private": False, "is_member": True},
{"name": "channel_missing_id", "is_private": False, "is_member": True},
{"name": "general", "id": "C2_valid", "is_private": False, "is_member": True}
],
"response_metadata": {"next_cursor": ""}
}
mock_client_instance.conversations_list.return_value = response_with_malformed_data
slack_history = SlackHistory(token="fake_token")
channels_list = slack_history.get_all_channels(include_private=True)
expected_channels_list = [
{"id": "C2_valid", "name": "general", "is_private": False, "is_member": True}
]
self.assertEqual(len(channels_list), 1)
self.assertListEqual(channels_list, expected_channels_list)
self.assertEqual(mock_logger.warning.call_count, 2)
mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'id': 'C1_missing_name', 'is_private': False, 'is_member': True}")
mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'name': 'channel_missing_id', 'is_private': False, 'is_member': True}")
mock_sleep.assert_not_called()
mock_client_instance.conversations_list.assert_called_once_with(
types="public_channel,private_channel", cursor=None, limit=1000
)
if __name__ == '__main__':
unittest.main()
class TestSlackHistoryGetConversationHistory(unittest.TestCase):
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_proactive_delay_single_page(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_client_instance.conversations_history.return_value = {
"messages": [{"text": "msg1"}],
"has_more": False
}
slack_history = SlackHistory(token="fake_token")
slack_history.get_conversation_history(channel_id="C123")
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_proactive_delay_multiple_pages(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_client_instance.conversations_history.side_effect = [
{
"messages": [{"text": "msg1"}],
"has_more": True,
"response_metadata": {"next_cursor": "cursor1"}
},
{
"messages": [{"text": "msg2"}],
"has_more": False
}
]
slack_history = SlackHistory(token="fake_token")
slack_history.get_conversation_history(channel_id="C123")
# Expected calls: 1.2 (page1), 1.2 (page2)
self.assertEqual(mock_time_sleep.call_count, 2)
mock_time_sleep.assert_has_calls([call(1.2), call(1.2)])
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 429
mock_error_response.headers = {'Retry-After': '5'}
mock_client_instance.conversations_history.side_effect = [
SlackApiError(message="ratelimited", response=mock_error_response),
{"messages": [{"text": "msg1"}], "has_more": False}
]
slack_history = SlackHistory(token="fake_token")
messages = slack_history.get_conversation_history(channel_id="C123")
self.assertEqual(len(messages), 1)
self.assertEqual(messages[0]["text"], "msg1")
# Expected sleep calls: 1.2 (proactive for 1st attempt), 5 (rate limit), 1.2 (proactive for 2nd attempt)
mock_time_sleep.assert_has_calls([call(1.2), call(5), call(1.2)], any_order=False)
mock_logger.warning.assert_called_once() # Check that a warning was logged for rate limiting
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_not_in_channel_error(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 403 # Typical for not_in_channel, but data matters more
mock_error_response.data = {'ok': False, 'error': 'not_in_channel'}
# This error is now raised by the inner try-except, then caught by the outer one
mock_client_instance.conversations_history.side_effect = SlackApiError(
message="not_in_channel error",
response=mock_error_response
)
slack_history = SlackHistory(token="fake_token")
messages = slack_history.get_conversation_history(channel_id="C123")
self.assertEqual(messages, [])
mock_logger.warning.assert_called_with(
"Bot is not in channel 'C123'. Cannot fetch history. Please add the bot to this channel."
)
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay before the API call
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 500
mock_error_response.data = {'ok': False, 'error': 'internal_error'}
original_error = SlackApiError(message="server error", response=mock_error_response)
mock_client_instance.conversations_history.side_effect = original_error
slack_history = SlackHistory(token="fake_token")
with self.assertRaises(SlackApiError) as context:
slack_history.get_conversation_history(channel_id="C123")
self.assertIn("Error retrieving history for channel C123", str(context.exception))
self.assertIs(context.exception.response, mock_error_response)
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
original_error = Exception("Something broke")
mock_client_instance.conversations_history.side_effect = original_error
slack_history = SlackHistory(token="fake_token")
with self.assertRaises(Exception) as context: # Check for generic Exception
slack_history.get_conversation_history(channel_id="C123")
self.assertIs(context.exception, original_error) # Should re-raise the original error
mock_logger.error.assert_called_once_with("Unexpected error in get_conversation_history for channel C123: Something broke")
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
class TestSlackHistoryGetUserInfo(unittest.TestCase):
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 429
mock_error_response.headers = {'Retry-After': '3'} # Using 3 seconds for test
successful_user_data = {"id": "U123", "name": "testuser"}
mock_client_instance.users_info.side_effect = [
SlackApiError(message="ratelimited_userinfo", response=mock_error_response),
{"user": successful_user_data}
]
slack_history = SlackHistory(token="fake_token")
user_info = slack_history.get_user_info(user_id="U123")
self.assertEqual(user_info, successful_user_data)
# Assert that time.sleep was called for the rate limit
mock_time_sleep.assert_called_once_with(3)
mock_logger.warning.assert_called_once_with(
"Rate limited by Slack on users.info for user U123. Retrying after 3 seconds."
)
# Assert users_info was called twice (original + retry)
self.assertEqual(mock_client_instance.users_info.call_count, 2)
mock_client_instance.users_info.assert_has_calls([call(user="U123"), call(user="U123")])
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep') # time.sleep might be called by other logic, but not expected here
@patch('slack_sdk.WebClient')
def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
mock_error_response = Mock()
mock_error_response.status_code = 500 # Some other error
mock_error_response.data = {'ok': False, 'error': 'internal_server_error'}
original_error = SlackApiError(message="internal server error", response=mock_error_response)
mock_client_instance.users_info.side_effect = original_error
slack_history = SlackHistory(token="fake_token")
with self.assertRaises(SlackApiError) as context:
slack_history.get_user_info(user_id="U123")
# Check that the raised error is the one we expect
self.assertIn("Error retrieving user info for U123", str(context.exception))
self.assertIs(context.exception.response, mock_error_response)
mock_time_sleep.assert_not_called() # No rate limit sleep
@patch('surfsense_backend.app.connectors.slack_history.logger')
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
@patch('slack_sdk.WebClient')
def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
mock_client_instance = MockWebClient.return_value
original_error = Exception("A very generic problem")
mock_client_instance.users_info.side_effect = original_error
slack_history = SlackHistory(token="fake_token")
with self.assertRaises(Exception) as context:
slack_history.get_user_info(user_id="U123")
self.assertIs(context.exception, original_error) # Check it's the exact same exception
mock_logger.error.assert_called_once_with(
"Unexpected error in get_user_info for user U123: A very generic problem"
)
mock_time_sleep.assert_not_called() # No rate limit sleep

View file

@ -114,33 +114,25 @@ async def index_slack_messages(
skipped_channels = [] skipped_channels = []
# Process each channel # Process each channel
for channel_name, channel_id in channels.items(): for channel_obj in channels: # Modified loop to iterate over list of channel objects
channel_id = channel_obj["id"]
channel_name = channel_obj["name"]
is_private = channel_obj["is_private"]
is_member = channel_obj["is_member"] # This might be False for public channels too
try: try:
# Check if the bot is a member of the channel # If it's a private channel and the bot is not a member, skip.
try: # For public channels, if they are listed by conversations.list, the bot can typically read history.
# First try to get channel info to check if bot is a member # The `not_in_channel` error in get_conversation_history will be the ultimate gatekeeper if history is inaccessible.
channel_info = slack_client.client.conversations_info(channel=channel_id) if is_private and not is_member:
logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.")
# For private channels, the bot needs to be a member skipped_channels.append(f"{channel_name} (private, bot not a member)")
if channel_info.get("channel", {}).get("is_private", False): documents_skipped += 1
# Check if bot is a member continue
is_member = channel_info.get("channel", {}).get("is_member", False)
if not is_member:
logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.")
skipped_channels.append(f"{channel_name} (private, bot not a member)")
documents_skipped += 1
continue
except SlackApiError as e:
if "not_in_channel" in str(e) or "channel_not_found" in str(e):
logger.warning(f"Bot cannot access channel {channel_name} ({channel_id}). Skipping.")
skipped_channels.append(f"{channel_name} (access error)")
documents_skipped += 1
continue
else:
# Re-raise if it's a different error
raise
# Get messages for this channel # Get messages for this channel
# The get_history_by_date_range now uses get_conversation_history,
# which handles 'not_in_channel' by returning [] and logging.
messages, error = slack_client.get_history_by_date_range( messages, error = slack_client.get_history_by_date_range(
channel_id=channel_id, channel_id=channel_id,
start_date=start_date_str, start_date=start_date_str,