From 148dc3db02ae7fe4221b7299efe31b8f4ab04a7d Mon Sep 17 00:00:00 2001 From: James Date: Tue, 26 Nov 2024 12:15:09 +1000 Subject: [PATCH] Delete search_providers directory --- search_providers/__init__.py | 5 - search_providers/base_provider.py | 42 --- search_providers/bing_provider.py | 200 ------------- search_providers/brave_provider.py | 308 --------------------- search_providers/exa_provider.py | 231 ---------------- search_providers/factory.py | 50 ---- search_providers/tavily_provider.py | 160 ----------- search_providers/trusted_news_sources.json | 71 ----- 8 files changed, 1067 deletions(-) delete mode 100644 search_providers/__init__.py delete mode 100644 search_providers/base_provider.py delete mode 100644 search_providers/bing_provider.py delete mode 100644 search_providers/brave_provider.py delete mode 100644 search_providers/exa_provider.py delete mode 100644 search_providers/factory.py delete mode 100644 search_providers/tavily_provider.py delete mode 100644 search_providers/trusted_news_sources.json diff --git a/search_providers/__init__.py b/search_providers/__init__.py deleted file mode 100644 index a9450b3..0000000 --- a/search_providers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .base_provider import BaseSearchProvider -from .tavily_provider import TavilySearchProvider -from .factory import SearchProviderFactory - -__all__ = ['BaseSearchProvider', 'TavilySearchProvider', 'SearchProviderFactory'] diff --git a/search_providers/base_provider.py b/search_providers/base_provider.py deleted file mode 100644 index e98942e..0000000 --- a/search_providers/base_provider.py +++ /dev/null @@ -1,42 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, Any, Optional - -class BaseSearchProvider(ABC): - """ - Abstract base class for search providers. - All search providers must implement these methods. - """ - - @abstractmethod - def __init__(self, api_key: Optional[str] = None): - """ - Initialize the search provider. - - Args: - api_key: Optional API key for the search provider - """ - pass - - @abstractmethod - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using the provider. - - Args: - query: The search query string - **kwargs: Additional search parameters specific to the provider - - Returns: - Dict containing the search results or error information - """ - pass - - @abstractmethod - def is_configured(self) -> bool: - """ - Check if the provider is properly configured (e.g., has valid API key). - - Returns: - bool indicating if the provider is ready to use - """ - pass diff --git a/search_providers/bing_provider.py b/search_providers/bing_provider.py deleted file mode 100644 index 2fe76ad..0000000 --- a/search_providers/bing_provider.py +++ /dev/null @@ -1,200 +0,0 @@ -from typing import Dict, Any, Optional -import os -import sys -from pathlib import Path -import requests -from datetime import datetime, timedelta -import json - -# Add parent directory to path for imports when running as script -if __name__ == "__main__": - sys.path.append(str(Path(__file__).parent.parent)) - from search_providers.base_provider import BaseSearchProvider -else: - from .base_provider import BaseSearchProvider - -class BingSearchProvider(BaseSearchProvider): - """ - Bing implementation of the search provider interface. - Handles both web and news-specific searches using Bing's APIs. - """ - - WEB_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" - NEWS_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/news/search" - - def __init__(self, api_key: Optional[str] = None): - """ - Initialize the Bing search provider. - - Args: - api_key: Optional Bing API key. If not provided, will try to get from environment. - """ - self.api_key = api_key or os.getenv("BING_API_KEY") - self.headers = { - 'Ocp-Apim-Subscription-Key': self.api_key, - 'Accept': 'application/json' - } if self.api_key else None - - # Load trusted news sources - self.trusted_sources = self._load_trusted_sources() - - def _load_trusted_sources(self) -> list: - """Load first 5 trusted news sources from JSON file.""" - try: - json_path = Path(__file__).parent / "trusted_news_sources.json" - with open(json_path) as f: - data = json.load(f) - # Only load the first 16 sources as per MSFT limits - return data.get("trusted_sources", [])[:16] - except Exception as e: - print(f"Warning: Could not load trusted news sources: {e}") - return [] - - def is_configured(self) -> bool: - """Check if Bing API is properly configured.""" - return self.headers is not None - - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using Bing API. - - Args: - query: The search query string - **kwargs: Additional search parameters: - - topic: Optional search topic (e.g., "news") - - max_results: Maximum number of results (default: 10) - - market: Market code (default: "en-US") - - days: Number of days to look back (for news searches) - - Returns: - Dict containing search results or error information - """ - if not self.is_configured(): - return {'error': 'Bing API key not configured'} - - try: - # Set default search parameters - search_params = { - 'count': str(kwargs.get('max_results', 10)), # Changed default from 5 to 10 - 'mkt': kwargs.get('market', 'en-US'), - 'textFormat': 'Raw' - } - - # Determine if this is a news search - if kwargs.get('topic') == 'news': - # Add freshness parameter for news if days specified - if 'days' in kwargs: - # Bing API expects 'day', 'week', or 'month' - search_params['freshness'] = 'week' if kwargs['days'] >1 else 'day' - - # Add site: operators for trusted sources - if self.trusted_sources: - site_operators = " OR ".join(f'site:{source}' for source in self.trusted_sources) - search_params['q'] = f"({query}) ({site_operators})" - else: - search_params['q'] = f"latest headlines about the topic: {query}" - - response = requests.get( - self.NEWS_SEARCH_ENDPOINT, - headers=self.headers, - params=search_params - ) - else: - search_params['q'] = query - response = requests.get( - self.WEB_SEARCH_ENDPOINT, - headers=self.headers, - params=search_params - ) - - if response.status_code != 200: - return {'error': f'API request failed with status {response.status_code}: {response.text}'} - - response_data = response.json() - - # Process results based on search type - if kwargs.get('topic') == 'news': - return self._process_news_results( - response_data, - days=kwargs.get('days', 3), - topic=query - ) - else: - return self._process_general_results(response_data) - - except requests.exceptions.RequestException as e: - return {'error': f'API request failed: {str(e)}'} - except Exception as e: - return {'error': f'An unexpected error occurred: {str(e)}'} - - def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]: - """Process results for general web searches.""" - webpages = response.get('webPages', {}).get('value', []) - return { - 'results': [{ - 'title': result.get('name', ''), - 'url': result.get('url', ''), - 'content': result.get('snippet', ''), - 'score': 1.0 # Bing doesn't provide relevance scores - } for result in webpages[:10]] # Changed from 3 to 10 - } - - def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]: - """Process results for news-specific searches.""" - articles = response.get('value', []) - return { - 'articles': [{ - 'title': article.get('name', ''), - 'url': article.get('url', ''), - 'published_date': article.get('datePublished', ''), - 'content': article.get('description', ''), - 'score': 1.0 # Bing doesn't provide relevance scores - } for article in articles], - 'time_period': f"Past {days} days", - 'topic': topic - } - -if __name__ == "__main__": - # Test code using actual API - provider = BingSearchProvider() - if not provider.is_configured(): - print("Error: Bing API key not configured") - exit(1) - - # Print loaded trusted sources - print("\n=== Loaded Trusted Sources ===") - print(provider.trusted_sources) - - # Test general search - print("\n=== Testing General Search ===") - general_result = provider.search( - "What is artificial intelligence?", - max_results=10 # Changed from 3 to 10 - ) - - if 'error' in general_result: - print(f"Error in general search: {general_result['error']}") - else: - print("\nTop Results:") - for idx, result in enumerate(general_result['results'], 1): - print(f"\n{idx}. {result['title']}") - print(f" URL: {result['url']}") - print(f" Preview: {result['content'][:400]}...") - - # Test news search - print("\n\n=== Testing News Search ===") - news_result = provider.search( - "mike tyson fight", - topic="news", - days=3 - ) - - if 'error' in news_result: - print(f"Error in news search: {news_result['error']}") - else: - print("\nRecent Articles:") - for idx, article in enumerate(news_result['articles'], 1): - print(f"\n{idx}. {article['title']}") - print(f" Published: {article['published_date']}") - print(f" URL: {article['url']}") - print(f" Preview: {article['content'][:400]}...") diff --git a/search_providers/brave_provider.py b/search_providers/brave_provider.py deleted file mode 100644 index 0533880..0000000 --- a/search_providers/brave_provider.py +++ /dev/null @@ -1,308 +0,0 @@ -from typing import Dict, Any, Optional -import os -import sys -from pathlib import Path -import requests -from datetime import datetime, timedelta -import json -from concurrent.futures import ThreadPoolExecutor - -# Add parent directory to path for imports when running as script -if __name__ == "__main__": - sys.path.append(str(Path(__file__).parent.parent)) - from search_providers.base_provider import BaseSearchProvider -else: - from .base_provider import BaseSearchProvider - -class BraveSearchProvider(BaseSearchProvider): - """ - Brave implementation of the search provider interface. - Handles both web and news-specific searches using Brave's APIs. - """ - - WEB_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" - NEWS_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/news/search" - SUMMARIZER_ENDPOINT = "https://api.search.brave.com/res/v1/summarizer/search" - - def __init__(self, api_key: Optional[str] = None): - """ - Initialize the Brave search provider. - - Args: - api_key: Optional Brave API key. If not provided, will try to get from environment. - """ - self.api_key = api_key or os.getenv("BRAVE_API_KEY") - self.pro_api_key = os.getenv("BRAVE_AI_PRO_API_KEY") #Optional, used for AI summary requests - self.headers = { - 'X-Subscription-Token': self.api_key, - 'Accept': 'application/json' - } if self.api_key else None - self.proheaders = { - 'X-Subscription-Token': self.pro_api_key, - 'Accept': 'application/json' - } if self.pro_api_key else None - def is_configured(self) -> bool: - """Check if Brave API is properly configured.""" - return self.headers is not None - - def get_brave_summary(self, query): - # Query parameters - params = { - "q": query, - "summary": 1 - } - - # Make the initial web search request to get summarizer key - search_response = requests.get(self.WEB_SEARCH_ENDPOINT, headers=self.proheaders, params=params) - - if search_response.status_code == 200: - data = search_response.json() - - if "summarizer" in data and "key" in data["summarizer"]: - summarizer_key = data["summarizer"]["key"] - - # Make request to summarizer endpoint - summarizer_params = { - "key": summarizer_key, - "entity_info": 1 - } - - summary_response = requests.get( - self.SUMMARIZER_ENDPOINT, - headers=self.proheaders, - params=summarizer_params - ) - - if summary_response.status_code == 200: - summary_data = summary_response.json() - try: - return summary_data['summary'][0]['data'] - except (KeyError, IndexError): - return None - - return None - - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using Brave API. - - Args: - query: The search query string - **kwargs: Additional search parameters: - - topic: Optional search topic (e.g., "news") - - max_results: Maximum number of results (default: 10) - - market: Market code (default: "en-US") - - days: Number of days to look back (for news searches) - - Returns: - Dict containing search results or error information - """ - if not self.is_configured(): - return {'error': 'Brave API key not configured'} - - try: - # Set default search parameters - search_params = { - 'count': str(kwargs.get('max_results', 10)), - 'country': kwargs.get('market', 'us'), # Brave uses country code - 'q': query - } - - # Determine if this is a news search - if kwargs.get('topic') == 'news': - # Add freshness parameter for news if days specified - if 'days' in kwargs: - days = kwargs['days'] - if days <= 1: - search_params['freshness'] = 'pd' # past day - elif days <= 7: - search_params['freshness'] = 'pw' # past week - else: - search_params['freshness'] = 'pm' # past month - - response = requests.get( - self.NEWS_SEARCH_ENDPOINT, - headers=self.headers, - params=search_params - ) - - response_data = response.json() - result = self._process_news_results(response_data, days=kwargs.get('days', 3), topic=query) - else: - response = requests.get( - self.WEB_SEARCH_ENDPOINT, - headers=self.headers, - params=search_params - ) - response_data = response.json() - result = self._process_general_results(response_data) - - # Include summarizer response if it exists - summary_response = self.get_brave_summary(query) - if summary_response: - result['summarizer'] = summary_response - - return result - - except requests.exceptions.RequestException as e: - return {'error': f'API request failed: {str(e)}'} - except Exception as e: - return {'error': f'An unexpected error occurred: {str(e)}'} - - def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]: - """Process results for general web searches.""" - web_results = response.get('web', {}).get('results', []) - with ThreadPoolExecutor() as executor: - # Use index as key instead of the result dictionary - futures = {i: executor.submit(self.get_brave_summary, result.get('title', '')) - for i, result in enumerate(web_results[:2])} - - results = [] - for i, result in enumerate(web_results): - summary = None - if i < 2: - try: - summary = futures[i].result() - except Exception as e: - print(f"Error getting summary: {e}") - - processed_result = { - 'title': result.get('title', ''), - 'url': result.get('url', ''), - 'content': result.get('description', ''), - 'score': result.get('score', 1.0), - 'extra_snippets': None, - 'summary': None - } - if summary: - processed_result['summary'] = summary - else: - processed_result['extra_snippets'] = result.get('extra_snippets', []) - results.append(processed_result) - return {'results': results} - - def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]: - """Process results for news-specific searches.""" - news_results = response.get('results', []) - def convert_age_to_minutes(age_str: str) -> int: - """ - Convert age string to minutes. - - Args: - age_str: Age string in the format of "X minutes", "X hours", "X days" - - Returns: - Age in minutes - """ - age_value = int(age_str.split()[0]) - age_unit = age_str.split()[1] - if age_unit == 'minutes': - return age_value - elif age_unit == 'hours': - return age_value * 60 - elif age_unit == 'days': - return age_value * 1440 # 24 hours * 60 minutes - else: - return 0 # Default to 0 if unknown unit - - # Sort news results based on the age field - news_results.sort(key=lambda x: convert_age_to_minutes(x.get('age', '0 minutes'))) - - with ThreadPoolExecutor() as executor: - # Use enumerate to create futures with index as key - futures = {i: executor.submit(self.get_brave_summary, article_data.get('title', '')) - for i, article_data in enumerate(news_results)} - - articles = [] - for i, article_data in enumerate(news_results): - try: - summary = futures[i].result() - except Exception as e: - print(f"Error getting summary: {e}") - summary = None - - article = { - 'title': article_data.get('title', ''), - 'url': article_data.get('url', ''), - 'published_date': article_data.get('age', ''), - 'breaking' : article_data.get('breaking', False), - 'content': article_data.get('description', ''), - 'extra_snippets': None, - 'summary': None, - 'score': article_data.get('score', 1.0) - } - if summary: - article['summary'] = summary - else: - article['extra_snippets'] = article_data.get('extra_snippets', []) - articles.append(article) - - return { - 'articles': articles, - 'time_period': f"Past {days} days", - 'topic': topic - } - -if __name__ == "__main__": - # Test code using actual API - provider = BraveSearchProvider() - if not provider.is_configured(): - print("Error: Brave API key not configured") - exit(1) - - # Test general search - print("\n=== Testing General Search ===") - general_result = provider.search( - "What is artificial intelligence?", - max_results=1 # Increased max_results to test summary limiting - ) - - if 'error' in general_result: - print(f"Error in general search: {general_result['error']}") - else: - print("\nTop Results:") - for idx, result in enumerate(general_result['results'], 1): - print(f"\n{idx}. {result['title']}") - print(f" URL: {result['url']}") - print(f" Preview: {result['content']}...") - print(f" Score: {result['score']}") - if result['extra_snippets']: - print(" Extra Snippets:") - for snippet in result['extra_snippets']: - print(f" - {snippet}") - if result['summary']: # Check if summary exists before printing - print(f" Summary: {result.get('summary', '')}...") - import time - time.sleep(1) - - # Test news search - print("\n\n=== Testing News Search ===") - import time - start_time = time.time() - news_result = provider.search( - "mike tyson fight", - topic="news", - days=3, - max_results=1 - ) - end_time = time.time() - - - if 'error' in news_result: - print(f"Error in news search: {news_result['error']}") - else: - print("\nRecent Articles:") - for idx, article in enumerate(news_result['articles'], 1): - print(f"\n{idx}. {article['title']}") - print(f" Published: {article['published_date']}") - print(f" Breaking: {article['breaking']}") - print(f" URL: {article['url']}") - print(f" Preview: {article['content'][:400]}...") - if article['extra_snippets']: - print(" Extra Snippets:") - for snippet in article['extra_snippets']: - print(f" - {snippet}") - if article['summary']: - print(f" Summary: {article.get('summary', '')}...") - - print(f"Execution time: {round(end_time - start_time, 1)} seconds") diff --git a/search_providers/exa_provider.py b/search_providers/exa_provider.py deleted file mode 100644 index a20404b..0000000 --- a/search_providers/exa_provider.py +++ /dev/null @@ -1,231 +0,0 @@ -from typing import Dict, Any, Optional -import os -import sys -import json -from pathlib import Path -import requests -from datetime import datetime, timedelta - -# Add parent directory to path for imports when running as script -if __name__ == "__main__": - sys.path.append(str(Path(__file__).parent.parent)) - from search_providers.base_provider import BaseSearchProvider -else: - from .base_provider import BaseSearchProvider - -class ExaSearchProvider(BaseSearchProvider): - """ - Exa.ai implementation of the search provider interface. - Handles web searches with optional full page content retrieval. - """ - - def __init__(self, api_key: Optional[str] = None): - """ - Initialize the Exa search provider. - - Args: - api_key: Optional Exa API key. If not provided, will try to get from environment. - """ - self.api_key = api_key or os.getenv("EXA_API_KEY") - self.base_url = "https://api.exa.ai/search" - self.trusted_sources = self._load_trusted_sources() - - def _load_trusted_sources(self) -> list: - """Load trusted news sources from JSON file.""" - try: - json_path = Path(__file__).parent / 'trusted_news_sources.json' - with open(json_path) as f: - data = json.load(f) - return data.get('trusted_sources', []) - except Exception as e: - print(f"Warning: Could not load trusted sources: {e}") - return [] - - def is_configured(self) -> bool: - """Check if Exa client is properly configured.""" - return bool(self.api_key) - - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using Exa API. - - Args: - query: The search query string - **kwargs: Additional search parameters: - - include_content: Whether to retrieve full page contents (default: False) - - max_results: Maximum number of results (default: 3) - - days: Number of days to look back (for news searches) - - Returns: - Dict containing search results or error information - """ - if not self.is_configured(): - return {'error': 'Exa API key not configured'} - - try: - # Set default search parameters - search_params = { - 'query': query, - 'type': 'neural', - 'useAutoprompt': True, - 'numResults': kwargs.get('max_results', 3), - } - - # Add optional parameters - if kwargs.get('include_content'): - search_params['contents'] = { - "highlights": True, - "summary": True - } - - if kwargs.get('days'): - # Convert days to timestamp for time-based filtering - date_limit = datetime.now() - timedelta(days=kwargs['days']) - search_params['startPublishedTime'] = date_limit.isoformat() - - # Add trusted domains for news searches - if kwargs.get('topic') == 'news' and self.trusted_sources: - search_params['includeDomains'] = self.trusted_sources - - # Make API request - headers = { - 'x-api-key': self.api_key, - 'Content-Type': 'application/json', - 'accept': 'application/json' - } - - response = requests.post( - self.base_url, - headers=headers, - json=search_params - ) - response.raise_for_status() - data = response.json() - - # Process results based on whether it's a news search - if kwargs.get('topic') == 'news': - return self._process_news_results( - data, - days=kwargs.get('days', 3), - topic=query - ) - else: - return self._process_general_results(data) - - except requests.exceptions.RequestException as e: - if e.response and e.response.status_code == 401: - return {'error': 'Invalid Exa API key'} - elif e.response and e.response.status_code == 429: - return {'error': 'Exa API rate limit exceeded'} - else: - return {'error': f'An error occurred while making the request: {str(e)}'} - except Exception as e: - return {'error': f'An unexpected error occurred: {str(e)}'} - - def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]: - """Process results for general searches.""" - results = [] - for result in response.get('results', []): - processed_result = { - 'title': result.get('title', ''), - 'url': result.get('url', ''), - 'highlights': result.get('highlights', []), - 'summary': result.get('summary', ''), - 'score': result.get('score', 0.0) - } - results.append(processed_result) - - return { - 'results': results, - 'autoprompt': response.get('autopromptString', '') - } - - def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]: - """Process results for news-specific searches.""" - articles = [] - for article in response.get('results', []): - processed_article = { - 'title': article.get('title', ''), - 'url': article.get('url', ''), - 'published_date': article.get('publishedDate', ''), - 'highlights': article.get('highlights', []), - 'summary': article.get('summary', ''), - 'score': article.get('score', 0.0) - } - articles.append(processed_article) - - return { - 'articles': articles, - 'time_period': f"Past {days} days", - 'topic': topic, - 'autoprompt': response.get('autopromptString', '') - } - -if __name__ == "__main__": - # Test code for the Exa provider - provider = ExaSearchProvider() - if not provider.is_configured(): - print("Error: Exa API key not configured") - exit(1) - - # Test general search - print("\n=== Testing General Search ===") - import time - start_time = time.time() - general_result = provider.search( - "What is artificial intelligence?", - max_results=3, - include_content=True - ) - end_time = time.time() - - if 'error' in general_result: - print("Error:", general_result['error']) - else: - print("\nTop Results:") - print(f"Autoprompt: {general_result.get('autoprompt', '')}") - for idx, result in enumerate(general_result['results'], 1): - print(f"\n{idx}. {result['title']}") - print(f" URL: {result['url']}") - print(f" Score: {result['score']}") - print(f" Summary: {result['summary']}") - if result['highlights']: - print(" Highlights:") - for highlight in result['highlights']: - print(f" - {highlight}") - print(f"\n\nTime taken for general search: {end_time - start_time} seconds") - - # Test news search - print("\n\n=== Testing News Search ===") - start_time = time.time() - news_result = provider.search( - "Latest developments in AI", - topic="news", - days=3, - max_results=3, - include_content=True - ) - end_time = time.time() - - if 'error' in news_result: - print("Error:", news_result['error']) - else: - print("\nRecent Articles:") - print(f"Autoprompt: {news_result.get('autoprompt', '')}") - for idx, article in enumerate(news_result['articles'], 1): - print(f"\n{idx}. {article['title']}") - print(f" Published: {article['published_date']}") - print(f" URL: {article['url']}") - print(f" Score: {article['score']}") - print(f" Summary: {article['summary']}") - if article['highlights']: - print(" Highlights:") - for highlight in article['highlights']: - print(f" - {highlight}") - print(f"\n\nTime taken for news search: {end_time - start_time} seconds") - - # Test error handling - print("\n\n=== Testing Error Handling ===") - bad_provider = ExaSearchProvider(api_key="invalid_key") - error_result = bad_provider.search("test query") - print("\nExpected error with invalid API key:", error_result['error']) diff --git a/search_providers/factory.py b/search_providers/factory.py deleted file mode 100644 index 12a0bdc..0000000 --- a/search_providers/factory.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Factory for creating search providers based on configuration.""" - -from typing import Type, Dict, Any -from search_providers.base_provider import BaseSearchProvider -from search_providers.bing_provider import BingSearchProvider -from search_providers.brave_provider import BraveSearchProvider -from search_providers.exa_provider import ExaSearchProvider -from search_providers.tavily_provider import TavilySearchProvider -from system_config import get_search_config - -class SearchProviderFactory: - """ - Factory class for creating instances of search providers. - """ - - _providers: Dict[str, Type[BaseSearchProvider]] = { - "bing": BingSearchProvider, - "brave": BraveSearchProvider, - "exa": ExaSearchProvider, - "tavily": TavilySearchProvider, - } - - @classmethod - def get_provider(cls, provider_type: str, **kwargs) -> BaseSearchProvider: - """ - Get an instance of the specified search provider. - - Args: - provider_type: The type of search provider to create (e.g., "bing", "google"). - **kwargs: Additional keyword arguments to pass to the provider's constructor. - - Returns: - An instance of the requested search provider, or None if the provider type is invalid. - """ - provider_class = cls._providers.get(provider_type.lower()) - if not provider_class: - raise ValueError(f"Invalid search provider type: {provider_type}") - - return provider_class(**kwargs) - - @classmethod - def get_available_providers(cls) -> Dict[str, Type[BaseSearchProvider]]: - """ - Get a dictionary of available search provider types and their corresponding classes. - - Returns: - A dictionary where keys are provider types (e.g., "bing", "google") and values are - the corresponding search provider classes. - """ - return cls._providers diff --git a/search_providers/tavily_provider.py b/search_providers/tavily_provider.py deleted file mode 100644 index ecd4b60..0000000 --- a/search_providers/tavily_provider.py +++ /dev/null @@ -1,160 +0,0 @@ -from typing import Dict, Any, Optional -import os -import sys -from pathlib import Path - -# Add parent directory to path for imports when running as script -if __name__ == "__main__": - sys.path.append(str(Path(__file__).parent.parent)) - from search_providers.base_provider import BaseSearchProvider -else: - from .base_provider import BaseSearchProvider - -from tavily import TavilyClient, MissingAPIKeyError, InvalidAPIKeyError, UsageLimitExceededError - -class TavilySearchProvider(BaseSearchProvider): - """ - Tavily implementation of the search provider interface. - Handles both general and news-specific searches. - """ - - def __init__(self, api_key: Optional[str] = None): - """ - Initialize the Tavily search provider. - - Args: - api_key: Optional Tavily API key. If not provided, will try to get from environment. - """ - self.api_key = api_key or os.getenv("TAVILY_API_KEY") - try: - self.client = TavilyClient(api_key=self.api_key) if self.api_key else None - except MissingAPIKeyError: - self.client = None - - def is_configured(self) -> bool: - """Check if Tavily client is properly configured.""" - return self.client is not None - - def search(self, query: str, **kwargs) -> Dict[str, Any]: - """ - Perform a search using Tavily API. - - Args: - query: The search query string - **kwargs: Additional search parameters: - - search_depth: "basic" or "advanced" (default: "basic") - - topic: Optional search topic (e.g., "news") - - max_results: Maximum number of results (default: 5) - - include_answer: Whether to include AI-generated answer (default: True) - - include_images: Whether to include images (default: False) - - days: Number of days to look back (for news searches) - - Returns: - Dict containing search results or error information - """ - if not self.is_configured(): - return {'error': 'Tavily API key not configured'} - - try: - # Set default search parameters - search_params = { - 'search_depth': "basic", - 'max_results': 5, - 'include_answer': True, - 'include_images': False - } - - # Update with any provided parameters - search_params.update(kwargs) - - # Execute search - response = self.client.search(query, **search_params) - - # Process results based on whether it's a news search - if kwargs.get('topic') == 'news': - return self._process_news_results( - response, - days=kwargs.get('days', 3), - topic=query - ) - else: - return self._process_general_results(response) - - except InvalidAPIKeyError: - return {'error': 'Invalid Tavily API key'} - except UsageLimitExceededError: - return {'error': 'Tavily API usage limit exceeded'} - except Exception as e: - return {'error': f'An unexpected error occurred: {e}'} - - def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]: - """Process results for general searches.""" - return { - 'answer': response.get('answer', ''), - 'results': [{ - 'title': result.get('title', ''), - 'url': result.get('url', ''), - 'content': result.get('content', '')[:500] + '...' if result.get('content') else '', - 'score': result.get('score', 0.0) - } for result in response.get('results', [])] - } - - def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]: - """Process results for news-specific searches.""" - return { - 'answer': response.get('answer', ''), - 'articles': [{ - 'title': article.get('title', ''), - 'url': article.get('url', ''), - 'published_date': article.get('published_date', ''), - 'content': article.get('content', '')[:500] + '...' if article.get('content') else '', - 'score': article.get('score', 0.0) - } for article in response.get('results', [])], - 'time_period': f"Past {days} days", - 'topic': topic - } - -if __name__ == "__main__": - # Test code for the Tavily provider - provider = TavilySearchProvider() - if not provider.is_configured(): - print("Error: Tavily API key not configured") - exit(1) - - # Test general search - print("\n=== Testing General Search ===") - general_result = provider.search( - "What is artificial intelligence?", - search_depth="advanced", - max_results=3 - ) - print("\nQuery Answer:", general_result['answer']) - print("\nTop Results:") - for idx, result in enumerate(general_result['results'], 1): - print(f"\n{idx}. {result['title']}") - print(f" URL: {result['url']}") - print(f" Score: {result['score']}") - print(f" Preview: {result['content'][:200]}...") - - # Test news search - print("\n\n=== Testing News Search ===") - news_result = provider.search( - "Latest developments in AI", - topic="news", - days=3, - search_depth="advanced" - ) - print("\nNews Summary:", news_result['answer']) - print("\nRecent Articles:") - for idx, article in enumerate(news_result['articles'], 1): - print(f"\n{idx}. {article['title']}") - print(f" Published: {article['published_date']}") - print(f" URL: {article['url']}") - print(f" Score: {article['score']}") - print(f" Preview: {article['content'][:400]}...") - - # Test error handling - print("\n\n=== Testing Error Handling ===") - bad_provider = TavilySearchProvider(api_key="invalid_key") - error_result = bad_provider.search("test query") - print("\nExpected error with invalid API key:", error_result['error']) diff --git a/search_providers/trusted_news_sources.json b/search_providers/trusted_news_sources.json deleted file mode 100644 index b5e3c77..0000000 --- a/search_providers/trusted_news_sources.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "trusted_sources": [ - "apnews.com", - "reuters.com", - "bbc.com", - "wsj.com", - "nytimes.com", - "economist.com", - "bloomberg.com", - "ft.com", - "aljazeera.com", - "afp.com", - "techcrunch.com", - "wired.com", - "arstechnica.com", - "theverge.com", - "cnet.com", - "theguardian.com", - "businessinsider.com", - "dw.com", - "time.com", - "afp.com", - "pbs.org", - "npr.org", - "cnbc.com", - "forbes.com", - "thehill.com", - "politico.com", - "axios.com", - "euronews.com", - "japantimes.co.jp", - "scmp.com", - "straitstimes.com", - "themoscowtimes.com", - "haaretz.com", - "timesofindia.com", - "globeandmail.com", - "abc.net.au", - "rte.ie", - "swissinfo.ch", - "thelocal.fr", - "thelocal.de", - "thelocal.se", - "kyivpost.com", - "arabnews.com", - "koreatimes.co.kr", - "bangkokpost.com", - "zdnet.com", - "cnet.com", - "engadget.com", - "gizmodo.com", - "thenextweb.com", - "venturebeat.com", - "techradar.com", - "tomshardware.com", - "anandtech.com", - "slashdot.org", - "techspot.com", - "phoronix.com", - "404media.co", - "theregister.com", - "techdirt.com", - "techrepublic.com", - "mit.edu", - "protocol.com", - "theinformation.com", - "restofworld.org", - "news.ycombinator.com" - ] - } - \ No newline at end of file