From 52c10288313b2629486fd723bb6f4fdafe716005 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 26 Nov 2024 12:17:00 +1000 Subject: [PATCH] Delete search_manager.py --- search_manager.py | 219 ---------------------------------------------- 1 file changed, 219 deletions(-) delete mode 100644 search_manager.py diff --git a/search_manager.py b/search_manager.py deleted file mode 100644 index 507a525..0000000 --- a/search_manager.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -SearchManager handles search provider selection, fallback, and result normalization. -""" -import logging -from typing import Dict, List, Any, Optional -from time import sleep - -from system_config import get_search_config -from search_providers.factory import SearchProviderFactory - -logger = logging.getLogger(__name__) - -class SearchManager: - """ - Manages multiple search providers with fallback support and result normalization. - """ - - def __init__(self, tavily_api_key=None, brave_api_key=None, bing_api_key=None, exa_api_key=None): - """Initialize SearchManager with configuration and providers.""" - self.config = get_search_config() - self.factory = SearchProviderFactory() - self.providers = self._initialize_providers(tavily_api_key, brave_api_key, bing_api_key, exa_api_key) - self.current_provider = self.config["default_provider"] - - def _initialize_providers(self, tavily_api_key=None, brave_api_key=None, bing_api_key=None, exa_api_key=None) -> Dict[str, Any]: - """Initialize all configured search providers.""" - providers = {} - for provider_name in self.config["fallback_order"]: - try: - if provider_name == 'tavily': - provider = self.factory.get_provider(provider_name, api_key=tavily_api_key) - elif provider_name == 'brave': - provider = self.factory.get_provider(provider_name, api_key=brave_api_key) - elif provider_name == 'bing': - provider = self.factory.get_provider(provider_name, api_key=bing_api_key) - elif provider_name == 'exa': - provider = self.factory.get_provider(provider_name, api_key=exa_api_key) - else: - provider = self.factory.get_provider(provider_name) - - if provider.is_configured(): - providers[provider_name] = provider - logger.info(f"Successfully initialized {provider_name} provider") - else: - logger.warning(f"Provider {provider_name} not properly configured") - except Exception as e: - logger.error(f"Failed to initialize {provider_name} provider: {str(e)}") - return providers - - def _normalize_results(self, results: Dict[str, Any], provider: str) -> Dict[str, Any]: - """ - Normalize search results to a standard format regardless of provider. - - Standard format: - { - 'success': bool, - 'error': Optional[str], - 'results': List[{ - 'title': str, - 'url': str, - 'content': str, - 'score': float, - 'published_date': Optional[str] - }], - 'answer': Optional[str], # For providers that support AI-generated answers - 'provider': str - } - """ - if not isinstance(results, dict): - return { - 'success': False, - 'error': f'Invalid results format from {provider}', - 'results': [], - 'provider': provider - } - - if 'error' in results: - return { - 'success': False, - 'error': results['error'], - 'results': [], - 'provider': provider - } - - normalized = { - 'success': True, - 'error': None, - 'provider': provider, - 'results': [] - } - - # Handle Tavily's AI answer if present - if 'answer' in results: - normalized['answer'] = results['answer'] - - # Normalize results based on provider - if provider == 'tavily': - # Handle both general and news results from Tavily - if 'articles' in results: - normalized['results'] = [{ - 'title': r.get('title', ''), - 'url': r.get('url', ''), - 'content': r.get('content', '')[:500], - 'score': float(r.get('score', 0.0)), - 'published_date': r.get('published_date') - } for r in results.get('articles', [])] - else: - normalized['results'] = results.get('results', []) - elif provider == 'brave': - normalized['results'] = [{ - 'title': r.get('title', ''), - 'url': r.get('url', ''), - 'content': r.get('description', '')[:500], - 'score': float(r.get('relevance_score', 0.0)), - 'published_date': r.get('published_date') - } for r in results.get('results', [])] - elif provider == 'bing': - normalized['results'] = [{ - 'title': r.get('title', ''), - 'url': r.get('url', ''), - 'content': r.get('content', '')[:500], - 'score': 1.0, # Bing doesn't provide relevance scores - 'published_date': None - } for r in results.get('results', [])] - elif provider == 'exa': - normalized['results'] = [{ - 'title': r.get('title', ''), - 'url': r.get('url', ''), - 'content': r.get('text', '')[:500], - 'score': float(r.get('relevance_score', 0.0)), - 'published_date': r.get('published_date') - } for r in results.get('results', [])] - elif provider == 'duckduckgo': - if not isinstance(results, list): - results = [] - normalized['results'] = [{ - 'title': r.get('title', ''), - 'url': r.get('link', ''), - 'content': r.get('snippet', '')[:500], - 'score': 1.0, # DuckDuckGo doesn't provide relevance scores - 'published_date': None - } for r in results] - - return normalized - - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using configured providers with fallback support. - """ - tried_providers = set() - - # First try the default provider - if self.current_provider in self.providers: - try: - provider = self.providers[self.current_provider] - provider_settings = self.config["provider_settings"].get(self.current_provider, {}) - search_params = {**provider_settings, **kwargs} - - results = provider.search(query, **search_params) - normalized_results = self._normalize_results(results, self.current_provider) - - if normalized_results['success']: - return normalized_results - - logger.warning( - f"Search with default provider {self.current_provider} failed: {normalized_results.get('error')}" - ) - except Exception as e: - logger.error(f"Error using default provider {self.current_provider}: {str(e)}") - - tried_providers.add(self.current_provider) - - # Then try providers in fallback order - for provider_name in self.config["fallback_order"]: - if provider_name not in self.providers or provider_name in tried_providers: - continue - - tried_providers.add(provider_name) - provider = self.providers[provider_name] - - try: - # Get provider-specific settings - provider_settings = self.config["provider_settings"].get(provider_name, {}) - search_params = {**provider_settings, **kwargs} - - # Perform search - results = provider.search(query, **search_params) - normalized_results = self._normalize_results(results, provider_name) - - # If search was successful, update current provider and return results - if normalized_results['success']: - self.current_provider = provider_name - return normalized_results - - logger.warning( - f"Search with {provider_name} failed: {normalized_results.get('error')}" - ) - - except Exception as e: - logger.error(f"Error using {provider_name} provider: {str(e)}") - - # Apply rate limiting before trying next provider - sleep(self.config["rate_limiting"]["cooldown_period"] / len(self.providers)) - - # If all providers failed, return error - return { - 'success': False, - 'error': 'All search providers failed', - 'results': [], - 'provider': None - } - - def get_current_provider(self) -> str: - """Get the name of the currently active search provider.""" - return self.current_provider - - def get_available_providers(self) -> List[str]: - """Get list of available (properly configured) search providers.""" - return list(self.providers.keys())