mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-01 10:09:08 +00:00
Merge commit 'fd6da4c4723ab28fc40605d109a9ba66cbd364e3' into dev
This commit is contained in:
commit
a870e07cbf
5 changed files with 733 additions and 76 deletions
|
@ -80,7 +80,7 @@ class GitHubConnector:
|
|||
# type='owner' fetches repos owned by the user
|
||||
# type='member' fetches repos the user is a collaborator on (including orgs)
|
||||
# type='all' fetches both
|
||||
for repo in self.gh.repositories(type='owner', sort='updated'):
|
||||
for repo in self.gh.repositories(type='all', sort='updated'):
|
||||
repos_data.append({
|
||||
"id": repo.id,
|
||||
"name": repo.name,
|
||||
|
|
|
@ -6,11 +6,15 @@ Allows fetching channel lists and message history with date range filtering.
|
|||
"""
|
||||
|
||||
import os
|
||||
import time # Added import
|
||||
import logging # Added import
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.errors import SlackApiError
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any, Union
|
||||
|
||||
logger = logging.getLogger(__name__) # Added logger
|
||||
|
||||
|
||||
class SlackHistory:
|
||||
"""Class for retrieving conversation history from Slack channels."""
|
||||
|
@ -33,56 +37,88 @@ class SlackHistory:
|
|||
"""
|
||||
self.client = WebClient(token=token)
|
||||
|
||||
def get_all_channels(self, include_private: bool = True) -> Dict[str, str]:
|
||||
def get_all_channels(self, include_private: bool = True) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch all channels that the bot has access to.
|
||||
Fetch all channels that the bot has access to, with rate limit handling.
|
||||
|
||||
Args:
|
||||
include_private: Whether to include private channels
|
||||
|
||||
Returns:
|
||||
Dictionary mapping channel names to channel IDs
|
||||
List of dictionaries, each representing a channel with id, name, is_private, is_member.
|
||||
|
||||
Raises:
|
||||
ValueError: If no Slack client has been initialized
|
||||
SlackApiError: If there's an error calling the Slack API
|
||||
SlackApiError: If there's an unrecoverable error calling the Slack API
|
||||
RuntimeError: For unexpected errors during channel fetching.
|
||||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Slack client not initialized. Call set_token() first.")
|
||||
|
||||
channels_dict = {}
|
||||
channels_list = [] # Changed from dict to list
|
||||
types = "public_channel"
|
||||
if include_private:
|
||||
types += ",private_channel"
|
||||
|
||||
try:
|
||||
# Call the conversations.list method
|
||||
result = self.client.conversations_list(
|
||||
types=types,
|
||||
limit=1000 # Maximum allowed by API
|
||||
)
|
||||
channels = result["channels"]
|
||||
|
||||
# Handle pagination for workspaces with many channels
|
||||
while result.get("response_metadata", {}).get("next_cursor"):
|
||||
next_cursor = result["response_metadata"]["next_cursor"]
|
||||
|
||||
# Get the next batch of channels
|
||||
result = self.client.conversations_list(
|
||||
|
||||
next_cursor = None
|
||||
is_first_request = True
|
||||
|
||||
while is_first_request or next_cursor:
|
||||
try:
|
||||
if not is_first_request: # Add delay only for paginated requests
|
||||
logger.info(f"Paginating for channels, waiting 3 seconds before next call. Cursor: {next_cursor}")
|
||||
time.sleep(3)
|
||||
|
||||
current_limit = 1000 # Max limit
|
||||
api_result = self.client.conversations_list(
|
||||
types=types,
|
||||
cursor=next_cursor,
|
||||
limit=1000
|
||||
limit=current_limit
|
||||
)
|
||||
channels.extend(result["channels"])
|
||||
|
||||
# Create a dictionary mapping channel names to IDs
|
||||
for channel in channels:
|
||||
channels_dict[channel["name"]] = channel["id"]
|
||||
|
||||
return channels_dict
|
||||
|
||||
channels_on_page = api_result["channels"]
|
||||
for channel in channels_on_page:
|
||||
if "name" in channel and "id" in channel:
|
||||
channel_data = {
|
||||
"id": channel.get("id"),
|
||||
"name": channel.get("name"),
|
||||
"is_private": channel.get("is_private", False),
|
||||
# is_member is often part of the channel object from conversations.list
|
||||
# It indicates if the authenticated user (bot) is a member.
|
||||
# For public channels, this might be true or the API might not focus on it
|
||||
# if the bot can read it anyway. For private, it's crucial.
|
||||
"is_member": channel.get("is_member", False)
|
||||
}
|
||||
channels_list.append(channel_data)
|
||||
else:
|
||||
logger.warning(f"Channel found with missing name or id. Data: {channel}")
|
||||
|
||||
|
||||
next_cursor = api_result.get("response_metadata", {}).get("next_cursor")
|
||||
is_first_request = False # Subsequent requests are not the first
|
||||
|
||||
if not next_cursor: # All pages processed
|
||||
break
|
||||
|
||||
except SlackApiError as e:
|
||||
if e.response is not None and e.response.status_code == 429:
|
||||
retry_after_header = e.response.headers.get('Retry-After')
|
||||
wait_duration = 60 # Default wait time
|
||||
if retry_after_header and retry_after_header.isdigit():
|
||||
wait_duration = int(retry_after_header)
|
||||
|
||||
logger.warning(f"Slack API rate limit hit while fetching channels. Waiting for {wait_duration} seconds. Cursor: {next_cursor}")
|
||||
time.sleep(wait_duration)
|
||||
# The loop will continue, retrying with the same cursor
|
||||
else:
|
||||
# Not a 429 error, or no response object, re-raise
|
||||
raise SlackApiError(f"Error retrieving channels: {e}", e.response)
|
||||
except Exception as general_error:
|
||||
# Handle other potential errors like network issues if necessary, or re-raise
|
||||
logger.error(f"An unexpected error occurred during channel fetching: {general_error}")
|
||||
raise RuntimeError(f"An unexpected error occurred during channel fetching: {general_error}")
|
||||
|
||||
except SlackApiError as e:
|
||||
raise SlackApiError(f"Error retrieving channels: {e}", e.response)
|
||||
return channels_list
|
||||
|
||||
def get_conversation_history(
|
||||
self,
|
||||
|
@ -110,17 +146,18 @@ class SlackHistory:
|
|||
if not self.client:
|
||||
raise ValueError("Slack client not initialized. Call set_token() first.")
|
||||
|
||||
try:
|
||||
# Call the conversations.history method
|
||||
messages = []
|
||||
next_cursor = None
|
||||
|
||||
while True:
|
||||
messages = []
|
||||
next_cursor = None
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Proactive delay for conversations.history (Tier 3)
|
||||
time.sleep(1.2) # Wait 1.2 seconds before each history call.
|
||||
|
||||
kwargs = {
|
||||
"channel": channel_id,
|
||||
"limit": min(limit, 1000), # API max is 1000
|
||||
}
|
||||
|
||||
if oldest:
|
||||
kwargs["oldest"] = oldest
|
||||
if latest:
|
||||
|
@ -128,22 +165,57 @@ class SlackHistory:
|
|||
if next_cursor:
|
||||
kwargs["cursor"] = next_cursor
|
||||
|
||||
result = self.client.conversations_history(**kwargs)
|
||||
current_api_call_successful = False
|
||||
result = None # Ensure result is defined
|
||||
try:
|
||||
result = self.client.conversations_history(**kwargs)
|
||||
current_api_call_successful = True
|
||||
except SlackApiError as e_history:
|
||||
if e_history.response is not None and e_history.response.status_code == 429:
|
||||
retry_after_str = e_history.response.headers.get('Retry-After')
|
||||
wait_time = 60 # Default
|
||||
if retry_after_str and retry_after_str.isdigit():
|
||||
wait_time = int(retry_after_str)
|
||||
logger.warning(
|
||||
f"Rate limited by Slack on conversations.history for channel {channel_id}. "
|
||||
f"Retrying after {wait_time} seconds. Cursor: {next_cursor}"
|
||||
)
|
||||
time.sleep(wait_time)
|
||||
# current_api_call_successful remains False, loop will retry this page
|
||||
else:
|
||||
raise # Re-raise to outer handler for not_in_channel or other SlackApiErrors
|
||||
|
||||
if not current_api_call_successful:
|
||||
continue # Retry the current page fetch due to handled rate limit
|
||||
|
||||
# Process result if successful
|
||||
batch = result["messages"]
|
||||
messages.extend(batch)
|
||||
|
||||
# Check if we need to paginate
|
||||
if result.get("has_more", False) and len(messages) < limit:
|
||||
next_cursor = result["response_metadata"]["next_cursor"]
|
||||
else:
|
||||
break
|
||||
break # Exit pagination loop
|
||||
|
||||
# Respect the overall limit parameter
|
||||
return messages[:limit]
|
||||
except SlackApiError as e: # Outer catch for not_in_channel or unhandled SlackApiErrors from inner try
|
||||
if (e.response is not None and
|
||||
hasattr(e.response, 'data') and
|
||||
isinstance(e.response.data, dict) and
|
||||
e.response.data.get('error') == 'not_in_channel'):
|
||||
logger.warning(
|
||||
f"Bot is not in channel '{channel_id}'. Cannot fetch history. "
|
||||
"Please add the bot to this channel."
|
||||
)
|
||||
return []
|
||||
# For other SlackApiErrors from inner block or this level
|
||||
raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response)
|
||||
except Exception as general_error: # Catch any other unexpected errors
|
||||
logger.error(f"Unexpected error in get_conversation_history for channel {channel_id}: {general_error}")
|
||||
# Re-raise the general error to allow higher-level handling or visibility
|
||||
raise
|
||||
|
||||
except SlackApiError as e:
|
||||
raise SlackApiError(f"Error retrieving history for channel {channel_id}: {e}", e.response)
|
||||
|
||||
return messages[:limit]
|
||||
|
||||
@staticmethod
|
||||
def convert_date_to_timestamp(date_str: str) -> Optional[int]:
|
||||
"""
|
||||
|
@ -220,12 +292,31 @@ class SlackHistory:
|
|||
"""
|
||||
if not self.client:
|
||||
raise ValueError("Slack client not initialized. Call set_token() first.")
|
||||
|
||||
try:
|
||||
result = self.client.users_info(user=user_id)
|
||||
return result["user"]
|
||||
except SlackApiError as e:
|
||||
raise SlackApiError(f"Error retrieving user info for {user_id}: {e}", e.response)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Proactive delay for users.info (Tier 4) - generally not needed unless called extremely rapidly.
|
||||
# For now, we are only adding Retry-After as per plan.
|
||||
# time.sleep(0.6) # Optional: ~100 req/min if ever needed.
|
||||
|
||||
result = self.client.users_info(user=user_id)
|
||||
return result["user"] # Success, return and exit loop implicitly
|
||||
|
||||
except SlackApiError as e_user_info:
|
||||
if e_user_info.response is not None and e_user_info.response.status_code == 429:
|
||||
retry_after_str = e_user_info.response.headers.get('Retry-After')
|
||||
wait_time = 30 # Default for Tier 4, can be adjusted
|
||||
if retry_after_str and retry_after_str.isdigit():
|
||||
wait_time = int(retry_after_str)
|
||||
logger.warning(f"Rate limited by Slack on users.info for user {user_id}. Retrying after {wait_time} seconds.")
|
||||
time.sleep(wait_time)
|
||||
continue # Retry the API call
|
||||
else:
|
||||
# Not a 429 error, or no response object, re-raise
|
||||
raise SlackApiError(f"Error retrieving user info for {user_id}: {e_user_info}", e_user_info.response)
|
||||
except Exception as general_error: # Catch any other unexpected errors
|
||||
logger.error(f"Unexpected error in get_user_info for user {user_id}: {general_error}")
|
||||
raise # Re-raise unexpected errors
|
||||
|
||||
def format_message(self, msg: Dict[str, Any], include_user_info: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
154
surfsense_backend/app/connectors/test_github_connector.py
Normal file
154
surfsense_backend/app/connectors/test_github_connector.py
Normal file
|
@ -0,0 +1,154 @@
|
|||
import unittest
|
||||
from unittest.mock import patch, Mock, call
|
||||
from datetime import datetime
|
||||
|
||||
# Adjust the import path based on the actual location if test_github_connector.py
|
||||
# is not in the same directory as github_connector.py or if paths are set up differently.
|
||||
# Assuming surfsend_backend/app/connectors/test_github_connector.py
|
||||
from surfsense_backend.app.connectors.github_connector import GitHubConnector
|
||||
from github3.exceptions import ForbiddenError # Import the specific exception
|
||||
|
||||
class TestGitHubConnector(unittest.TestCase):
|
||||
|
||||
@patch('surfsense_backend.app.connectors.github_connector.github_login')
|
||||
def test_get_user_repositories_uses_type_all(self, mock_github_login):
|
||||
# Mock the GitHub client object and its methods
|
||||
mock_gh_instance = Mock()
|
||||
mock_github_login.return_value = mock_gh_instance
|
||||
|
||||
# Mock the self.gh.me() call in __init__ to prevent an actual API call
|
||||
mock_gh_instance.me.return_value = Mock() # Simple mock to pass initialization
|
||||
|
||||
# Prepare mock repository data
|
||||
mock_repo1_data = Mock()
|
||||
mock_repo1_data.id = 1
|
||||
mock_repo1_data.name = "repo1"
|
||||
mock_repo1_data.full_name = "user/repo1"
|
||||
mock_repo1_data.private = False
|
||||
mock_repo1_data.html_url = "http://example.com/user/repo1"
|
||||
mock_repo1_data.description = "Test repo 1"
|
||||
mock_repo1_data.updated_at = datetime(2023, 1, 1, 10, 30, 0) # Added time component
|
||||
|
||||
mock_repo2_data = Mock()
|
||||
mock_repo2_data.id = 2
|
||||
mock_repo2_data.name = "org-repo"
|
||||
mock_repo2_data.full_name = "org/org-repo"
|
||||
mock_repo2_data.private = True
|
||||
mock_repo2_data.html_url = "http://example.com/org/org-repo"
|
||||
mock_repo2_data.description = "Org repo"
|
||||
mock_repo2_data.updated_at = datetime(2023, 1, 2, 12, 0, 0) # Added time component
|
||||
|
||||
# Configure the mock for gh.repositories() call
|
||||
# This method is an iterator, so it should return an iterable (e.g., a list)
|
||||
mock_gh_instance.repositories.return_value = [mock_repo1_data, mock_repo2_data]
|
||||
|
||||
connector = GitHubConnector(token="fake_token")
|
||||
repositories = connector.get_user_repositories()
|
||||
|
||||
# Assert that gh.repositories was called correctly
|
||||
mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated')
|
||||
|
||||
# Assert the structure and content of the returned data
|
||||
expected_repositories = [
|
||||
{
|
||||
"id": 1, "name": "repo1", "full_name": "user/repo1", "private": False,
|
||||
"url": "http://example.com/user/repo1", "description": "Test repo 1",
|
||||
"last_updated": datetime(2023, 1, 1, 10, 30, 0)
|
||||
},
|
||||
{
|
||||
"id": 2, "name": "org-repo", "full_name": "org/org-repo", "private": True,
|
||||
"url": "http://example.com/org/org-repo", "description": "Org repo",
|
||||
"last_updated": datetime(2023, 1, 2, 12, 0, 0)
|
||||
}
|
||||
]
|
||||
self.assertEqual(repositories, expected_repositories)
|
||||
self.assertEqual(len(repositories), 2)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.github_connector.github_login')
|
||||
def test_get_user_repositories_handles_empty_description_and_none_updated_at(self, mock_github_login):
|
||||
# Mock the GitHub client object and its methods
|
||||
mock_gh_instance = Mock()
|
||||
mock_github_login.return_value = mock_gh_instance
|
||||
mock_gh_instance.me.return_value = Mock()
|
||||
|
||||
mock_repo_data = Mock()
|
||||
mock_repo_data.id = 1
|
||||
mock_repo_data.name = "repo_no_desc"
|
||||
mock_repo_data.full_name = "user/repo_no_desc"
|
||||
mock_repo_data.private = False
|
||||
mock_repo_data.html_url = "http://example.com/user/repo_no_desc"
|
||||
mock_repo_data.description = None # Test None description
|
||||
mock_repo_data.updated_at = None # Test None updated_at
|
||||
|
||||
mock_gh_instance.repositories.return_value = [mock_repo_data]
|
||||
connector = GitHubConnector(token="fake_token")
|
||||
repositories = connector.get_user_repositories()
|
||||
|
||||
mock_gh_instance.repositories.assert_called_once_with(type='all', sort='updated')
|
||||
expected_repositories = [
|
||||
{
|
||||
"id": 1, "name": "repo_no_desc", "full_name": "user/repo_no_desc", "private": False,
|
||||
"url": "http://example.com/user/repo_no_desc", "description": "", # Expect empty string
|
||||
"last_updated": None # Expect None
|
||||
}
|
||||
]
|
||||
self.assertEqual(repositories, expected_repositories)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.github_connector.github_login')
|
||||
def test_github_connector_initialization_failure_forbidden(self, mock_github_login):
|
||||
# Test that __init__ raises ValueError on auth failure (ForbiddenError)
|
||||
mock_gh_instance = Mock()
|
||||
mock_github_login.return_value = mock_gh_instance
|
||||
|
||||
# Create a mock response object for the ForbiddenError
|
||||
# The actual response structure might vary, but github3.py's ForbiddenError
|
||||
# can be instantiated with just a response object that has a status_code.
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 403 # Typically Forbidden
|
||||
|
||||
# Setup the side_effect for self.gh.me()
|
||||
mock_gh_instance.me.side_effect = ForbiddenError(mock_response)
|
||||
|
||||
with self.assertRaises(ValueError) as context:
|
||||
GitHubConnector(token="invalid_token_forbidden")
|
||||
self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception))
|
||||
|
||||
@patch('surfsense_backend.app.connectors.github_connector.github_login')
|
||||
def test_github_connector_initialization_failure_authentication_failed(self, mock_github_login):
|
||||
# Test that __init__ raises ValueError on auth failure (AuthenticationFailed, which is a subclass of ForbiddenError)
|
||||
# For github3.py, AuthenticationFailed is more specific for token issues.
|
||||
from github3.exceptions import AuthenticationFailed
|
||||
|
||||
mock_gh_instance = Mock()
|
||||
mock_github_login.return_value = mock_gh_instance
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 401 # Typically Unauthorized
|
||||
|
||||
mock_gh_instance.me.side_effect = AuthenticationFailed(mock_response)
|
||||
|
||||
with self.assertRaises(ValueError) as context:
|
||||
GitHubConnector(token="invalid_token_authfailed")
|
||||
self.assertIn("Invalid GitHub token or insufficient permissions.", str(context.exception))
|
||||
|
||||
@patch('surfsense_backend.app.connectors.github_connector.github_login')
|
||||
def test_get_user_repositories_handles_api_exception(self, mock_github_login):
|
||||
mock_gh_instance = Mock()
|
||||
mock_github_login.return_value = mock_gh_instance
|
||||
mock_gh_instance.me.return_value = Mock()
|
||||
|
||||
# Simulate an exception when calling repositories
|
||||
mock_gh_instance.repositories.side_effect = Exception("API Error")
|
||||
|
||||
connector = GitHubConnector(token="fake_token")
|
||||
# We expect it to log an error and return an empty list
|
||||
with patch('surfsense_backend.app.connectors.github_connector.logger') as mock_logger:
|
||||
repositories = connector.get_user_repositories()
|
||||
|
||||
self.assertEqual(repositories, [])
|
||||
mock_logger.error.assert_called_once()
|
||||
self.assertIn("Failed to fetch GitHub repositories: API Error", mock_logger.error.call_args[0][0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
420
surfsense_backend/app/connectors/test_slack_history.py
Normal file
420
surfsense_backend/app/connectors/test_slack_history.py
Normal file
|
@ -0,0 +1,420 @@
|
|||
import unittest
|
||||
import time # Imported to be available for patching target module
|
||||
from unittest.mock import patch, Mock, call
|
||||
from slack_sdk.errors import SlackApiError
|
||||
|
||||
# Since test_slack_history.py is in the same directory as slack_history.py
|
||||
from .slack_history import SlackHistory
|
||||
|
||||
class TestSlackHistoryGetAllChannels(unittest.TestCase):
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_pagination_with_delay(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
# Mock API responses now include is_private and is_member
|
||||
page1_response = {
|
||||
"channels": [
|
||||
{"name": "general", "id": "C1", "is_private": False, "is_member": True},
|
||||
{"name": "dev", "id": "C0", "is_private": False, "is_member": True}
|
||||
],
|
||||
"response_metadata": {"next_cursor": "cursor123"}
|
||||
}
|
||||
page2_response = {
|
||||
"channels": [{"name": "random", "id": "C2", "is_private": True, "is_member": True}],
|
||||
"response_metadata": {"next_cursor": ""}
|
||||
}
|
||||
|
||||
mock_client_instance.conversations_list.side_effect = [
|
||||
page1_response,
|
||||
page2_response
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
channels_list = slack_history.get_all_channels(include_private=True)
|
||||
|
||||
expected_channels_list = [
|
||||
{"id": "C1", "name": "general", "is_private": False, "is_member": True},
|
||||
{"id": "C0", "name": "dev", "is_private": False, "is_member": True},
|
||||
{"id": "C2", "name": "random", "is_private": True, "is_member": True}
|
||||
]
|
||||
|
||||
self.assertEqual(len(channels_list), 3)
|
||||
self.assertListEqual(channels_list, expected_channels_list) # Assert list equality
|
||||
|
||||
expected_calls = [
|
||||
call(types="public_channel,private_channel", cursor=None, limit=1000),
|
||||
call(types="public_channel,private_channel", cursor="cursor123", limit=1000)
|
||||
]
|
||||
mock_client_instance.conversations_list.assert_has_calls(expected_calls)
|
||||
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
|
||||
|
||||
mock_sleep.assert_called_once_with(3)
|
||||
mock_logger.info.assert_called_once_with("Paginating for channels, waiting 3 seconds before next call. Cursor: cursor123")
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_rate_limit_with_retry_after(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 429
|
||||
mock_error_response.headers = {'Retry-After': '5'}
|
||||
|
||||
successful_response = {
|
||||
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
|
||||
"response_metadata": {"next_cursor": ""}
|
||||
}
|
||||
|
||||
mock_client_instance.conversations_list.side_effect = [
|
||||
SlackApiError(message="ratelimited", response=mock_error_response),
|
||||
successful_response
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
channels_list = slack_history.get_all_channels(include_private=True)
|
||||
|
||||
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
|
||||
self.assertEqual(len(channels_list), 1)
|
||||
self.assertListEqual(channels_list, expected_channels_list)
|
||||
|
||||
mock_sleep.assert_called_once_with(5)
|
||||
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 5 seconds. Cursor: None")
|
||||
|
||||
expected_calls = [
|
||||
call(types="public_channel,private_channel", cursor=None, limit=1000),
|
||||
call(types="public_channel,private_channel", cursor=None, limit=1000)
|
||||
]
|
||||
mock_client_instance.conversations_list.assert_has_calls(expected_calls)
|
||||
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_rate_limit_no_retry_after_valid_header(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 429
|
||||
mock_error_response.headers = {'Retry-After': 'invalid_value'}
|
||||
|
||||
successful_response = {
|
||||
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
|
||||
"response_metadata": {"next_cursor": ""}
|
||||
}
|
||||
|
||||
mock_client_instance.conversations_list.side_effect = [
|
||||
SlackApiError(message="ratelimited", response=mock_error_response),
|
||||
successful_response
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
channels_list = slack_history.get_all_channels(include_private=True)
|
||||
|
||||
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
|
||||
self.assertListEqual(channels_list, expected_channels_list)
|
||||
mock_sleep.assert_called_once_with(60) # Default fallback
|
||||
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None")
|
||||
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_rate_limit_no_retry_after_header(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 429
|
||||
mock_error_response.headers = {}
|
||||
|
||||
successful_response = {
|
||||
"channels": [{"name": "general", "id": "C1", "is_private": False, "is_member": True}],
|
||||
"response_metadata": {"next_cursor": ""}
|
||||
}
|
||||
|
||||
mock_client_instance.conversations_list.side_effect = [
|
||||
SlackApiError(message="ratelimited", response=mock_error_response),
|
||||
successful_response
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
channels_list = slack_history.get_all_channels(include_private=True)
|
||||
|
||||
expected_channels_list = [{"id": "C1", "name": "general", "is_private": False, "is_member": True}]
|
||||
self.assertListEqual(channels_list, expected_channels_list)
|
||||
mock_sleep.assert_called_once_with(60) # Default fallback
|
||||
mock_logger.warning.assert_called_once_with("Slack API rate limit hit while fetching channels. Waiting for 60 seconds. Cursor: None")
|
||||
self.assertEqual(mock_client_instance.conversations_list.call_count, 2)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_other_slack_api_error(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 500
|
||||
mock_error_response.headers = {}
|
||||
mock_error_response.data = {"ok": False, "error": "internal_error"}
|
||||
|
||||
original_error = SlackApiError(message="server error", response=mock_error_response)
|
||||
mock_client_instance.conversations_list.side_effect = original_error
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
|
||||
with self.assertRaises(SlackApiError) as context:
|
||||
slack_history.get_all_channels(include_private=True)
|
||||
|
||||
self.assertEqual(context.exception.response.status_code, 500)
|
||||
self.assertIn("server error", str(context.exception))
|
||||
mock_sleep.assert_not_called()
|
||||
mock_logger.warning.assert_not_called() # Ensure no rate limit log
|
||||
mock_client_instance.conversations_list.assert_called_once_with(
|
||||
types="public_channel,private_channel", cursor=None, limit=1000
|
||||
)
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_get_all_channels_handles_missing_name_id_gracefully(self, MockWebClient, mock_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
response_with_malformed_data = {
|
||||
"channels": [
|
||||
{"id": "C1_missing_name", "is_private": False, "is_member": True},
|
||||
{"name": "channel_missing_id", "is_private": False, "is_member": True},
|
||||
{"name": "general", "id": "C2_valid", "is_private": False, "is_member": True}
|
||||
],
|
||||
"response_metadata": {"next_cursor": ""}
|
||||
}
|
||||
|
||||
mock_client_instance.conversations_list.return_value = response_with_malformed_data
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
channels_list = slack_history.get_all_channels(include_private=True)
|
||||
|
||||
expected_channels_list = [
|
||||
{"id": "C2_valid", "name": "general", "is_private": False, "is_member": True}
|
||||
]
|
||||
self.assertEqual(len(channels_list), 1)
|
||||
self.assertListEqual(channels_list, expected_channels_list)
|
||||
|
||||
self.assertEqual(mock_logger.warning.call_count, 2)
|
||||
mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'id': 'C1_missing_name', 'is_private': False, 'is_member': True}")
|
||||
mock_logger.warning.assert_any_call("Channel found with missing name or id. Data: {'name': 'channel_missing_id', 'is_private': False, 'is_member': True}")
|
||||
|
||||
mock_sleep.assert_not_called()
|
||||
mock_client_instance.conversations_list.assert_called_once_with(
|
||||
types="public_channel,private_channel", cursor=None, limit=1000
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
class TestSlackHistoryGetConversationHistory(unittest.TestCase):
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_proactive_delay_single_page(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
mock_client_instance.conversations_history.return_value = {
|
||||
"messages": [{"text": "msg1"}],
|
||||
"has_more": False
|
||||
}
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_proactive_delay_multiple_pages(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
mock_client_instance.conversations_history.side_effect = [
|
||||
{
|
||||
"messages": [{"text": "msg1"}],
|
||||
"has_more": True,
|
||||
"response_metadata": {"next_cursor": "cursor1"}
|
||||
},
|
||||
{
|
||||
"messages": [{"text": "msg2"}],
|
||||
"has_more": False
|
||||
}
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
# Expected calls: 1.2 (page1), 1.2 (page2)
|
||||
self.assertEqual(mock_time_sleep.call_count, 2)
|
||||
mock_time_sleep.assert_has_calls([call(1.2), call(1.2)])
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 429
|
||||
mock_error_response.headers = {'Retry-After': '5'}
|
||||
|
||||
mock_client_instance.conversations_history.side_effect = [
|
||||
SlackApiError(message="ratelimited", response=mock_error_response),
|
||||
{"messages": [{"text": "msg1"}], "has_more": False}
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
messages = slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
self.assertEqual(len(messages), 1)
|
||||
self.assertEqual(messages[0]["text"], "msg1")
|
||||
|
||||
# Expected sleep calls: 1.2 (proactive for 1st attempt), 5 (rate limit), 1.2 (proactive for 2nd attempt)
|
||||
mock_time_sleep.assert_has_calls([call(1.2), call(5), call(1.2)], any_order=False)
|
||||
mock_logger.warning.assert_called_once() # Check that a warning was logged for rate limiting
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_not_in_channel_error(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 403 # Typical for not_in_channel, but data matters more
|
||||
mock_error_response.data = {'ok': False, 'error': 'not_in_channel'}
|
||||
|
||||
# This error is now raised by the inner try-except, then caught by the outer one
|
||||
mock_client_instance.conversations_history.side_effect = SlackApiError(
|
||||
message="not_in_channel error",
|
||||
response=mock_error_response
|
||||
)
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
messages = slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
self.assertEqual(messages, [])
|
||||
mock_logger.warning.assert_called_with(
|
||||
"Bot is not in channel 'C123'. Cannot fetch history. Please add the bot to this channel."
|
||||
)
|
||||
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay before the API call
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 500
|
||||
mock_error_response.data = {'ok': False, 'error': 'internal_error'}
|
||||
original_error = SlackApiError(message="server error", response=mock_error_response)
|
||||
|
||||
mock_client_instance.conversations_history.side_effect = original_error
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
|
||||
with self.assertRaises(SlackApiError) as context:
|
||||
slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
self.assertIn("Error retrieving history for channel C123", str(context.exception))
|
||||
self.assertIs(context.exception.response, mock_error_response)
|
||||
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
original_error = Exception("Something broke")
|
||||
mock_client_instance.conversations_history.side_effect = original_error
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
|
||||
with self.assertRaises(Exception) as context: # Check for generic Exception
|
||||
slack_history.get_conversation_history(channel_id="C123")
|
||||
|
||||
self.assertIs(context.exception, original_error) # Should re-raise the original error
|
||||
mock_logger.error.assert_called_once_with("Unexpected error in get_conversation_history for channel C123: Something broke")
|
||||
mock_time_sleep.assert_called_once_with(1.2) # Proactive delay
|
||||
|
||||
class TestSlackHistoryGetUserInfo(unittest.TestCase):
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_retry_after_logic(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 429
|
||||
mock_error_response.headers = {'Retry-After': '3'} # Using 3 seconds for test
|
||||
|
||||
successful_user_data = {"id": "U123", "name": "testuser"}
|
||||
|
||||
mock_client_instance.users_info.side_effect = [
|
||||
SlackApiError(message="ratelimited_userinfo", response=mock_error_response),
|
||||
{"user": successful_user_data}
|
||||
]
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
user_info = slack_history.get_user_info(user_id="U123")
|
||||
|
||||
self.assertEqual(user_info, successful_user_data)
|
||||
|
||||
# Assert that time.sleep was called for the rate limit
|
||||
mock_time_sleep.assert_called_once_with(3)
|
||||
mock_logger.warning.assert_called_once_with(
|
||||
"Rate limited by Slack on users.info for user U123. Retrying after 3 seconds."
|
||||
)
|
||||
# Assert users_info was called twice (original + retry)
|
||||
self.assertEqual(mock_client_instance.users_info.call_count, 2)
|
||||
mock_client_instance.users_info.assert_has_calls([call(user="U123"), call(user="U123")])
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep') # time.sleep might be called by other logic, but not expected here
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_other_slack_api_error_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
|
||||
mock_error_response = Mock()
|
||||
mock_error_response.status_code = 500 # Some other error
|
||||
mock_error_response.data = {'ok': False, 'error': 'internal_server_error'}
|
||||
original_error = SlackApiError(message="internal server error", response=mock_error_response)
|
||||
|
||||
mock_client_instance.users_info.side_effect = original_error
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
|
||||
with self.assertRaises(SlackApiError) as context:
|
||||
slack_history.get_user_info(user_id="U123")
|
||||
|
||||
# Check that the raised error is the one we expect
|
||||
self.assertIn("Error retrieving user info for U123", str(context.exception))
|
||||
self.assertIs(context.exception.response, mock_error_response)
|
||||
mock_time_sleep.assert_not_called() # No rate limit sleep
|
||||
|
||||
@patch('surfsense_backend.app.connectors.slack_history.logger')
|
||||
@patch('surfsense_backend.app.connectors.slack_history.time.sleep')
|
||||
@patch('slack_sdk.WebClient')
|
||||
def test_general_exception_propagates(self, MockWebClient, mock_time_sleep, mock_logger):
|
||||
mock_client_instance = MockWebClient.return_value
|
||||
original_error = Exception("A very generic problem")
|
||||
mock_client_instance.users_info.side_effect = original_error
|
||||
|
||||
slack_history = SlackHistory(token="fake_token")
|
||||
|
||||
with self.assertRaises(Exception) as context:
|
||||
slack_history.get_user_info(user_id="U123")
|
||||
|
||||
self.assertIs(context.exception, original_error) # Check it's the exact same exception
|
||||
mock_logger.error.assert_called_once_with(
|
||||
"Unexpected error in get_user_info for user U123: A very generic problem"
|
||||
)
|
||||
mock_time_sleep.assert_not_called() # No rate limit sleep
|
|
@ -97,33 +97,25 @@ async def index_slack_messages(
|
|||
skipped_channels = []
|
||||
|
||||
# Process each channel
|
||||
for channel_name, channel_id in channels.items():
|
||||
for channel_obj in channels: # Modified loop to iterate over list of channel objects
|
||||
channel_id = channel_obj["id"]
|
||||
channel_name = channel_obj["name"]
|
||||
is_private = channel_obj["is_private"]
|
||||
is_member = channel_obj["is_member"] # This might be False for public channels too
|
||||
|
||||
try:
|
||||
# Check if the bot is a member of the channel
|
||||
try:
|
||||
# First try to get channel info to check if bot is a member
|
||||
channel_info = slack_client.client.conversations_info(channel=channel_id)
|
||||
|
||||
# For private channels, the bot needs to be a member
|
||||
if channel_info.get("channel", {}).get("is_private", False):
|
||||
# Check if bot is a member
|
||||
is_member = channel_info.get("channel", {}).get("is_member", False)
|
||||
if not is_member:
|
||||
logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.")
|
||||
skipped_channels.append(f"{channel_name} (private, bot not a member)")
|
||||
documents_skipped += 1
|
||||
continue
|
||||
except SlackApiError as e:
|
||||
if "not_in_channel" in str(e) or "channel_not_found" in str(e):
|
||||
logger.warning(f"Bot cannot access channel {channel_name} ({channel_id}). Skipping.")
|
||||
skipped_channels.append(f"{channel_name} (access error)")
|
||||
documents_skipped += 1
|
||||
continue
|
||||
else:
|
||||
# Re-raise if it's a different error
|
||||
raise
|
||||
# If it's a private channel and the bot is not a member, skip.
|
||||
# For public channels, if they are listed by conversations.list, the bot can typically read history.
|
||||
# The `not_in_channel` error in get_conversation_history will be the ultimate gatekeeper if history is inaccessible.
|
||||
if is_private and not is_member:
|
||||
logger.warning(f"Bot is not a member of private channel {channel_name} ({channel_id}). Skipping.")
|
||||
skipped_channels.append(f"{channel_name} (private, bot not a member)")
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Get messages for this channel
|
||||
# The get_history_by_date_range now uses get_conversation_history,
|
||||
# which handles 'not_in_channel' by returning [] and logging.
|
||||
messages, error = slack_client.get_history_by_date_range(
|
||||
channel_id=channel_id,
|
||||
start_date=start_date_str,
|
||||
|
|
Loading…
Add table
Reference in a new issue