Delete search_providers directory

This commit is contained in:
James 2024-11-26 12:15:09 +10:00 committed by GitHub
parent a6899814ff
commit 148dc3db02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 0 additions and 1067 deletions

View file

@ -1,5 +0,0 @@
from .base_provider import BaseSearchProvider
from .tavily_provider import TavilySearchProvider
from .factory import SearchProviderFactory
__all__ = ['BaseSearchProvider', 'TavilySearchProvider', 'SearchProviderFactory']

View file

@ -1,42 +0,0 @@
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
class BaseSearchProvider(ABC):
"""
Abstract base class for search providers.
All search providers must implement these methods.
"""
@abstractmethod
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the search provider.
Args:
api_key: Optional API key for the search provider
"""
pass
@abstractmethod
def search(self, query: str, **kwargs) -> Dict[str, Any]:
"""
Perform a search using the provider.
Args:
query: The search query string
**kwargs: Additional search parameters specific to the provider
Returns:
Dict containing the search results or error information
"""
pass
@abstractmethod
def is_configured(self) -> bool:
"""
Check if the provider is properly configured (e.g., has valid API key).
Returns:
bool indicating if the provider is ready to use
"""
pass

View file

@ -1,200 +0,0 @@
from typing import Dict, Any, Optional
import os
import sys
from pathlib import Path
import requests
from datetime import datetime, timedelta
import json
# Add parent directory to path for imports when running as script
if __name__ == "__main__":
sys.path.append(str(Path(__file__).parent.parent))
from search_providers.base_provider import BaseSearchProvider
else:
from .base_provider import BaseSearchProvider
class BingSearchProvider(BaseSearchProvider):
"""
Bing implementation of the search provider interface.
Handles both web and news-specific searches using Bing's APIs.
"""
WEB_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
NEWS_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/news/search"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Bing search provider.
Args:
api_key: Optional Bing API key. If not provided, will try to get from environment.
"""
self.api_key = api_key or os.getenv("BING_API_KEY")
self.headers = {
'Ocp-Apim-Subscription-Key': self.api_key,
'Accept': 'application/json'
} if self.api_key else None
# Load trusted news sources
self.trusted_sources = self._load_trusted_sources()
def _load_trusted_sources(self) -> list:
"""Load first 5 trusted news sources from JSON file."""
try:
json_path = Path(__file__).parent / "trusted_news_sources.json"
with open(json_path) as f:
data = json.load(f)
# Only load the first 16 sources as per MSFT limits
return data.get("trusted_sources", [])[:16]
except Exception as e:
print(f"Warning: Could not load trusted news sources: {e}")
return []
def is_configured(self) -> bool:
"""Check if Bing API is properly configured."""
return self.headers is not None
def search(self, query: str, **kwargs) -> Dict[str, Any]:
"""
Perform a search using Bing API.
Args:
query: The search query string
**kwargs: Additional search parameters:
- topic: Optional search topic (e.g., "news")
- max_results: Maximum number of results (default: 10)
- market: Market code (default: "en-US")
- days: Number of days to look back (for news searches)
Returns:
Dict containing search results or error information
"""
if not self.is_configured():
return {'error': 'Bing API key not configured'}
try:
# Set default search parameters
search_params = {
'count': str(kwargs.get('max_results', 10)), # Changed default from 5 to 10
'mkt': kwargs.get('market', 'en-US'),
'textFormat': 'Raw'
}
# Determine if this is a news search
if kwargs.get('topic') == 'news':
# Add freshness parameter for news if days specified
if 'days' in kwargs:
# Bing API expects 'day', 'week', or 'month'
search_params['freshness'] = 'week' if kwargs['days'] >1 else 'day'
# Add site: operators for trusted sources
if self.trusted_sources:
site_operators = " OR ".join(f'site:{source}' for source in self.trusted_sources)
search_params['q'] = f"({query}) ({site_operators})"
else:
search_params['q'] = f"latest headlines about the topic: {query}"
response = requests.get(
self.NEWS_SEARCH_ENDPOINT,
headers=self.headers,
params=search_params
)
else:
search_params['q'] = query
response = requests.get(
self.WEB_SEARCH_ENDPOINT,
headers=self.headers,
params=search_params
)
if response.status_code != 200:
return {'error': f'API request failed with status {response.status_code}: {response.text}'}
response_data = response.json()
# Process results based on search type
if kwargs.get('topic') == 'news':
return self._process_news_results(
response_data,
days=kwargs.get('days', 3),
topic=query
)
else:
return self._process_general_results(response_data)
except requests.exceptions.RequestException as e:
return {'error': f'API request failed: {str(e)}'}
except Exception as e:
return {'error': f'An unexpected error occurred: {str(e)}'}
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Process results for general web searches."""
webpages = response.get('webPages', {}).get('value', [])
return {
'results': [{
'title': result.get('name', ''),
'url': result.get('url', ''),
'content': result.get('snippet', ''),
'score': 1.0 # Bing doesn't provide relevance scores
} for result in webpages[:10]] # Changed from 3 to 10
}
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
"""Process results for news-specific searches."""
articles = response.get('value', [])
return {
'articles': [{
'title': article.get('name', ''),
'url': article.get('url', ''),
'published_date': article.get('datePublished', ''),
'content': article.get('description', ''),
'score': 1.0 # Bing doesn't provide relevance scores
} for article in articles],
'time_period': f"Past {days} days",
'topic': topic
}
if __name__ == "__main__":
# Test code using actual API
provider = BingSearchProvider()
if not provider.is_configured():
print("Error: Bing API key not configured")
exit(1)
# Print loaded trusted sources
print("\n=== Loaded Trusted Sources ===")
print(provider.trusted_sources)
# Test general search
print("\n=== Testing General Search ===")
general_result = provider.search(
"What is artificial intelligence?",
max_results=10 # Changed from 3 to 10
)
if 'error' in general_result:
print(f"Error in general search: {general_result['error']}")
else:
print("\nTop Results:")
for idx, result in enumerate(general_result['results'], 1):
print(f"\n{idx}. {result['title']}")
print(f" URL: {result['url']}")
print(f" Preview: {result['content'][:400]}...")
# Test news search
print("\n\n=== Testing News Search ===")
news_result = provider.search(
"mike tyson fight",
topic="news",
days=3
)
if 'error' in news_result:
print(f"Error in news search: {news_result['error']}")
else:
print("\nRecent Articles:")
for idx, article in enumerate(news_result['articles'], 1):
print(f"\n{idx}. {article['title']}")
print(f" Published: {article['published_date']}")
print(f" URL: {article['url']}")
print(f" Preview: {article['content'][:400]}...")

View file

@ -1,308 +0,0 @@
from typing import Dict, Any, Optional
import os
import sys
from pathlib import Path
import requests
from datetime import datetime, timedelta
import json
from concurrent.futures import ThreadPoolExecutor
# Add parent directory to path for imports when running as script
if __name__ == "__main__":
sys.path.append(str(Path(__file__).parent.parent))
from search_providers.base_provider import BaseSearchProvider
else:
from .base_provider import BaseSearchProvider
class BraveSearchProvider(BaseSearchProvider):
"""
Brave implementation of the search provider interface.
Handles both web and news-specific searches using Brave's APIs.
"""
WEB_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
NEWS_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
SUMMARIZER_ENDPOINT = "https://api.search.brave.com/res/v1/summarizer/search"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Brave search provider.
Args:
api_key: Optional Brave API key. If not provided, will try to get from environment.
"""
self.api_key = api_key or os.getenv("BRAVE_API_KEY")
self.pro_api_key = os.getenv("BRAVE_AI_PRO_API_KEY") #Optional, used for AI summary requests
self.headers = {
'X-Subscription-Token': self.api_key,
'Accept': 'application/json'
} if self.api_key else None
self.proheaders = {
'X-Subscription-Token': self.pro_api_key,
'Accept': 'application/json'
} if self.pro_api_key else None
def is_configured(self) -> bool:
"""Check if Brave API is properly configured."""
return self.headers is not None
def get_brave_summary(self, query):
# Query parameters
params = {
"q": query,
"summary": 1
}
# Make the initial web search request to get summarizer key
search_response = requests.get(self.WEB_SEARCH_ENDPOINT, headers=self.proheaders, params=params)
if search_response.status_code == 200:
data = search_response.json()
if "summarizer" in data and "key" in data["summarizer"]:
summarizer_key = data["summarizer"]["key"]
# Make request to summarizer endpoint
summarizer_params = {
"key": summarizer_key,
"entity_info": 1
}
summary_response = requests.get(
self.SUMMARIZER_ENDPOINT,
headers=self.proheaders,
params=summarizer_params
)
if summary_response.status_code == 200:
summary_data = summary_response.json()
try:
return summary_data['summary'][0]['data']
except (KeyError, IndexError):
return None
return None
def search(self, query: str, **kwargs) -> Dict[str, Any]:
"""
Perform a search using Brave API.
Args:
query: The search query string
**kwargs: Additional search parameters:
- topic: Optional search topic (e.g., "news")
- max_results: Maximum number of results (default: 10)
- market: Market code (default: "en-US")
- days: Number of days to look back (for news searches)
Returns:
Dict containing search results or error information
"""
if not self.is_configured():
return {'error': 'Brave API key not configured'}
try:
# Set default search parameters
search_params = {
'count': str(kwargs.get('max_results', 10)),
'country': kwargs.get('market', 'us'), # Brave uses country code
'q': query
}
# Determine if this is a news search
if kwargs.get('topic') == 'news':
# Add freshness parameter for news if days specified
if 'days' in kwargs:
days = kwargs['days']
if days <= 1:
search_params['freshness'] = 'pd' # past day
elif days <= 7:
search_params['freshness'] = 'pw' # past week
else:
search_params['freshness'] = 'pm' # past month
response = requests.get(
self.NEWS_SEARCH_ENDPOINT,
headers=self.headers,
params=search_params
)
response_data = response.json()
result = self._process_news_results(response_data, days=kwargs.get('days', 3), topic=query)
else:
response = requests.get(
self.WEB_SEARCH_ENDPOINT,
headers=self.headers,
params=search_params
)
response_data = response.json()
result = self._process_general_results(response_data)
# Include summarizer response if it exists
summary_response = self.get_brave_summary(query)
if summary_response:
result['summarizer'] = summary_response
return result
except requests.exceptions.RequestException as e:
return {'error': f'API request failed: {str(e)}'}
except Exception as e:
return {'error': f'An unexpected error occurred: {str(e)}'}
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Process results for general web searches."""
web_results = response.get('web', {}).get('results', [])
with ThreadPoolExecutor() as executor:
# Use index as key instead of the result dictionary
futures = {i: executor.submit(self.get_brave_summary, result.get('title', ''))
for i, result in enumerate(web_results[:2])}
results = []
for i, result in enumerate(web_results):
summary = None
if i < 2:
try:
summary = futures[i].result()
except Exception as e:
print(f"Error getting summary: {e}")
processed_result = {
'title': result.get('title', ''),
'url': result.get('url', ''),
'content': result.get('description', ''),
'score': result.get('score', 1.0),
'extra_snippets': None,
'summary': None
}
if summary:
processed_result['summary'] = summary
else:
processed_result['extra_snippets'] = result.get('extra_snippets', [])
results.append(processed_result)
return {'results': results}
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
"""Process results for news-specific searches."""
news_results = response.get('results', [])
def convert_age_to_minutes(age_str: str) -> int:
"""
Convert age string to minutes.
Args:
age_str: Age string in the format of "X minutes", "X hours", "X days"
Returns:
Age in minutes
"""
age_value = int(age_str.split()[0])
age_unit = age_str.split()[1]
if age_unit == 'minutes':
return age_value
elif age_unit == 'hours':
return age_value * 60
elif age_unit == 'days':
return age_value * 1440 # 24 hours * 60 minutes
else:
return 0 # Default to 0 if unknown unit
# Sort news results based on the age field
news_results.sort(key=lambda x: convert_age_to_minutes(x.get('age', '0 minutes')))
with ThreadPoolExecutor() as executor:
# Use enumerate to create futures with index as key
futures = {i: executor.submit(self.get_brave_summary, article_data.get('title', ''))
for i, article_data in enumerate(news_results)}
articles = []
for i, article_data in enumerate(news_results):
try:
summary = futures[i].result()
except Exception as e:
print(f"Error getting summary: {e}")
summary = None
article = {
'title': article_data.get('title', ''),
'url': article_data.get('url', ''),
'published_date': article_data.get('age', ''),
'breaking' : article_data.get('breaking', False),
'content': article_data.get('description', ''),
'extra_snippets': None,
'summary': None,
'score': article_data.get('score', 1.0)
}
if summary:
article['summary'] = summary
else:
article['extra_snippets'] = article_data.get('extra_snippets', [])
articles.append(article)
return {
'articles': articles,
'time_period': f"Past {days} days",
'topic': topic
}
if __name__ == "__main__":
# Test code using actual API
provider = BraveSearchProvider()
if not provider.is_configured():
print("Error: Brave API key not configured")
exit(1)
# Test general search
print("\n=== Testing General Search ===")
general_result = provider.search(
"What is artificial intelligence?",
max_results=1 # Increased max_results to test summary limiting
)
if 'error' in general_result:
print(f"Error in general search: {general_result['error']}")
else:
print("\nTop Results:")
for idx, result in enumerate(general_result['results'], 1):
print(f"\n{idx}. {result['title']}")
print(f" URL: {result['url']}")
print(f" Preview: {result['content']}...")
print(f" Score: {result['score']}")
if result['extra_snippets']:
print(" Extra Snippets:")
for snippet in result['extra_snippets']:
print(f" - {snippet}")
if result['summary']: # Check if summary exists before printing
print(f" Summary: {result.get('summary', '')}...")
import time
time.sleep(1)
# Test news search
print("\n\n=== Testing News Search ===")
import time
start_time = time.time()
news_result = provider.search(
"mike tyson fight",
topic="news",
days=3,
max_results=1
)
end_time = time.time()
if 'error' in news_result:
print(f"Error in news search: {news_result['error']}")
else:
print("\nRecent Articles:")
for idx, article in enumerate(news_result['articles'], 1):
print(f"\n{idx}. {article['title']}")
print(f" Published: {article['published_date']}")
print(f" Breaking: {article['breaking']}")
print(f" URL: {article['url']}")
print(f" Preview: {article['content'][:400]}...")
if article['extra_snippets']:
print(" Extra Snippets:")
for snippet in article['extra_snippets']:
print(f" - {snippet}")
if article['summary']:
print(f" Summary: {article.get('summary', '')}...")
print(f"Execution time: {round(end_time - start_time, 1)} seconds")

View file

@ -1,231 +0,0 @@
from typing import Dict, Any, Optional
import os
import sys
import json
from pathlib import Path
import requests
from datetime import datetime, timedelta
# Add parent directory to path for imports when running as script
if __name__ == "__main__":
sys.path.append(str(Path(__file__).parent.parent))
from search_providers.base_provider import BaseSearchProvider
else:
from .base_provider import BaseSearchProvider
class ExaSearchProvider(BaseSearchProvider):
"""
Exa.ai implementation of the search provider interface.
Handles web searches with optional full page content retrieval.
"""
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Exa search provider.
Args:
api_key: Optional Exa API key. If not provided, will try to get from environment.
"""
self.api_key = api_key or os.getenv("EXA_API_KEY")
self.base_url = "https://api.exa.ai/search"
self.trusted_sources = self._load_trusted_sources()
def _load_trusted_sources(self) -> list:
"""Load trusted news sources from JSON file."""
try:
json_path = Path(__file__).parent / 'trusted_news_sources.json'
with open(json_path) as f:
data = json.load(f)
return data.get('trusted_sources', [])
except Exception as e:
print(f"Warning: Could not load trusted sources: {e}")
return []
def is_configured(self) -> bool:
"""Check if Exa client is properly configured."""
return bool(self.api_key)
def search(self, query: str, **kwargs) -> Dict[str, Any]:
"""
Perform a search using Exa API.
Args:
query: The search query string
**kwargs: Additional search parameters:
- include_content: Whether to retrieve full page contents (default: False)
- max_results: Maximum number of results (default: 3)
- days: Number of days to look back (for news searches)
Returns:
Dict containing search results or error information
"""
if not self.is_configured():
return {'error': 'Exa API key not configured'}
try:
# Set default search parameters
search_params = {
'query': query,
'type': 'neural',
'useAutoprompt': True,
'numResults': kwargs.get('max_results', 3),
}
# Add optional parameters
if kwargs.get('include_content'):
search_params['contents'] = {
"highlights": True,
"summary": True
}
if kwargs.get('days'):
# Convert days to timestamp for time-based filtering
date_limit = datetime.now() - timedelta(days=kwargs['days'])
search_params['startPublishedTime'] = date_limit.isoformat()
# Add trusted domains for news searches
if kwargs.get('topic') == 'news' and self.trusted_sources:
search_params['includeDomains'] = self.trusted_sources
# Make API request
headers = {
'x-api-key': self.api_key,
'Content-Type': 'application/json',
'accept': 'application/json'
}
response = requests.post(
self.base_url,
headers=headers,
json=search_params
)
response.raise_for_status()
data = response.json()
# Process results based on whether it's a news search
if kwargs.get('topic') == 'news':
return self._process_news_results(
data,
days=kwargs.get('days', 3),
topic=query
)
else:
return self._process_general_results(data)
except requests.exceptions.RequestException as e:
if e.response and e.response.status_code == 401:
return {'error': 'Invalid Exa API key'}
elif e.response and e.response.status_code == 429:
return {'error': 'Exa API rate limit exceeded'}
else:
return {'error': f'An error occurred while making the request: {str(e)}'}
except Exception as e:
return {'error': f'An unexpected error occurred: {str(e)}'}
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Process results for general searches."""
results = []
for result in response.get('results', []):
processed_result = {
'title': result.get('title', ''),
'url': result.get('url', ''),
'highlights': result.get('highlights', []),
'summary': result.get('summary', ''),
'score': result.get('score', 0.0)
}
results.append(processed_result)
return {
'results': results,
'autoprompt': response.get('autopromptString', '')
}
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
"""Process results for news-specific searches."""
articles = []
for article in response.get('results', []):
processed_article = {
'title': article.get('title', ''),
'url': article.get('url', ''),
'published_date': article.get('publishedDate', ''),
'highlights': article.get('highlights', []),
'summary': article.get('summary', ''),
'score': article.get('score', 0.0)
}
articles.append(processed_article)
return {
'articles': articles,
'time_period': f"Past {days} days",
'topic': topic,
'autoprompt': response.get('autopromptString', '')
}
if __name__ == "__main__":
# Test code for the Exa provider
provider = ExaSearchProvider()
if not provider.is_configured():
print("Error: Exa API key not configured")
exit(1)
# Test general search
print("\n=== Testing General Search ===")
import time
start_time = time.time()
general_result = provider.search(
"What is artificial intelligence?",
max_results=3,
include_content=True
)
end_time = time.time()
if 'error' in general_result:
print("Error:", general_result['error'])
else:
print("\nTop Results:")
print(f"Autoprompt: {general_result.get('autoprompt', '')}")
for idx, result in enumerate(general_result['results'], 1):
print(f"\n{idx}. {result['title']}")
print(f" URL: {result['url']}")
print(f" Score: {result['score']}")
print(f" Summary: {result['summary']}")
if result['highlights']:
print(" Highlights:")
for highlight in result['highlights']:
print(f" - {highlight}")
print(f"\n\nTime taken for general search: {end_time - start_time} seconds")
# Test news search
print("\n\n=== Testing News Search ===")
start_time = time.time()
news_result = provider.search(
"Latest developments in AI",
topic="news",
days=3,
max_results=3,
include_content=True
)
end_time = time.time()
if 'error' in news_result:
print("Error:", news_result['error'])
else:
print("\nRecent Articles:")
print(f"Autoprompt: {news_result.get('autoprompt', '')}")
for idx, article in enumerate(news_result['articles'], 1):
print(f"\n{idx}. {article['title']}")
print(f" Published: {article['published_date']}")
print(f" URL: {article['url']}")
print(f" Score: {article['score']}")
print(f" Summary: {article['summary']}")
if article['highlights']:
print(" Highlights:")
for highlight in article['highlights']:
print(f" - {highlight}")
print(f"\n\nTime taken for news search: {end_time - start_time} seconds")
# Test error handling
print("\n\n=== Testing Error Handling ===")
bad_provider = ExaSearchProvider(api_key="invalid_key")
error_result = bad_provider.search("test query")
print("\nExpected error with invalid API key:", error_result['error'])

View file

@ -1,50 +0,0 @@
"""Factory for creating search providers based on configuration."""
from typing import Type, Dict, Any
from search_providers.base_provider import BaseSearchProvider
from search_providers.bing_provider import BingSearchProvider
from search_providers.brave_provider import BraveSearchProvider
from search_providers.exa_provider import ExaSearchProvider
from search_providers.tavily_provider import TavilySearchProvider
from system_config import get_search_config
class SearchProviderFactory:
"""
Factory class for creating instances of search providers.
"""
_providers: Dict[str, Type[BaseSearchProvider]] = {
"bing": BingSearchProvider,
"brave": BraveSearchProvider,
"exa": ExaSearchProvider,
"tavily": TavilySearchProvider,
}
@classmethod
def get_provider(cls, provider_type: str, **kwargs) -> BaseSearchProvider:
"""
Get an instance of the specified search provider.
Args:
provider_type: The type of search provider to create (e.g., "bing", "google").
**kwargs: Additional keyword arguments to pass to the provider's constructor.
Returns:
An instance of the requested search provider, or None if the provider type is invalid.
"""
provider_class = cls._providers.get(provider_type.lower())
if not provider_class:
raise ValueError(f"Invalid search provider type: {provider_type}")
return provider_class(**kwargs)
@classmethod
def get_available_providers(cls) -> Dict[str, Type[BaseSearchProvider]]:
"""
Get a dictionary of available search provider types and their corresponding classes.
Returns:
A dictionary where keys are provider types (e.g., "bing", "google") and values are
the corresponding search provider classes.
"""
return cls._providers

View file

@ -1,160 +0,0 @@
from typing import Dict, Any, Optional
import os
import sys
from pathlib import Path
# Add parent directory to path for imports when running as script
if __name__ == "__main__":
sys.path.append(str(Path(__file__).parent.parent))
from search_providers.base_provider import BaseSearchProvider
else:
from .base_provider import BaseSearchProvider
from tavily import TavilyClient, MissingAPIKeyError, InvalidAPIKeyError, UsageLimitExceededError
class TavilySearchProvider(BaseSearchProvider):
"""
Tavily implementation of the search provider interface.
Handles both general and news-specific searches.
"""
def __init__(self, api_key: Optional[str] = None):
"""
Initialize the Tavily search provider.
Args:
api_key: Optional Tavily API key. If not provided, will try to get from environment.
"""
self.api_key = api_key or os.getenv("TAVILY_API_KEY")
try:
self.client = TavilyClient(api_key=self.api_key) if self.api_key else None
except MissingAPIKeyError:
self.client = None
def is_configured(self) -> bool:
"""Check if Tavily client is properly configured."""
return self.client is not None
def search(self, query: str, **kwargs) -> Dict[str, Any]:
"""
Perform a search using Tavily API.
Args:
query: The search query string
**kwargs: Additional search parameters:
- search_depth: "basic" or "advanced" (default: "basic")
- topic: Optional search topic (e.g., "news")
- max_results: Maximum number of results (default: 5)
- include_answer: Whether to include AI-generated answer (default: True)
- include_images: Whether to include images (default: False)
- days: Number of days to look back (for news searches)
Returns:
Dict containing search results or error information
"""
if not self.is_configured():
return {'error': 'Tavily API key not configured'}
try:
# Set default search parameters
search_params = {
'search_depth': "basic",
'max_results': 5,
'include_answer': True,
'include_images': False
}
# Update with any provided parameters
search_params.update(kwargs)
# Execute search
response = self.client.search(query, **search_params)
# Process results based on whether it's a news search
if kwargs.get('topic') == 'news':
return self._process_news_results(
response,
days=kwargs.get('days', 3),
topic=query
)
else:
return self._process_general_results(response)
except InvalidAPIKeyError:
return {'error': 'Invalid Tavily API key'}
except UsageLimitExceededError:
return {'error': 'Tavily API usage limit exceeded'}
except Exception as e:
return {'error': f'An unexpected error occurred: {e}'}
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
"""Process results for general searches."""
return {
'answer': response.get('answer', ''),
'results': [{
'title': result.get('title', ''),
'url': result.get('url', ''),
'content': result.get('content', '')[:500] + '...' if result.get('content') else '',
'score': result.get('score', 0.0)
} for result in response.get('results', [])]
}
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
"""Process results for news-specific searches."""
return {
'answer': response.get('answer', ''),
'articles': [{
'title': article.get('title', ''),
'url': article.get('url', ''),
'published_date': article.get('published_date', ''),
'content': article.get('content', '')[:500] + '...' if article.get('content') else '',
'score': article.get('score', 0.0)
} for article in response.get('results', [])],
'time_period': f"Past {days} days",
'topic': topic
}
if __name__ == "__main__":
# Test code for the Tavily provider
provider = TavilySearchProvider()
if not provider.is_configured():
print("Error: Tavily API key not configured")
exit(1)
# Test general search
print("\n=== Testing General Search ===")
general_result = provider.search(
"What is artificial intelligence?",
search_depth="advanced",
max_results=3
)
print("\nQuery Answer:", general_result['answer'])
print("\nTop Results:")
for idx, result in enumerate(general_result['results'], 1):
print(f"\n{idx}. {result['title']}")
print(f" URL: {result['url']}")
print(f" Score: {result['score']}")
print(f" Preview: {result['content'][:200]}...")
# Test news search
print("\n\n=== Testing News Search ===")
news_result = provider.search(
"Latest developments in AI",
topic="news",
days=3,
search_depth="advanced"
)
print("\nNews Summary:", news_result['answer'])
print("\nRecent Articles:")
for idx, article in enumerate(news_result['articles'], 1):
print(f"\n{idx}. {article['title']}")
print(f" Published: {article['published_date']}")
print(f" URL: {article['url']}")
print(f" Score: {article['score']}")
print(f" Preview: {article['content'][:400]}...")
# Test error handling
print("\n\n=== Testing Error Handling ===")
bad_provider = TavilySearchProvider(api_key="invalid_key")
error_result = bad_provider.search("test query")
print("\nExpected error with invalid API key:", error_result['error'])

View file

@ -1,71 +0,0 @@
{
"trusted_sources": [
"apnews.com",
"reuters.com",
"bbc.com",
"wsj.com",
"nytimes.com",
"economist.com",
"bloomberg.com",
"ft.com",
"aljazeera.com",
"afp.com",
"techcrunch.com",
"wired.com",
"arstechnica.com",
"theverge.com",
"cnet.com",
"theguardian.com",
"businessinsider.com",
"dw.com",
"time.com",
"afp.com",
"pbs.org",
"npr.org",
"cnbc.com",
"forbes.com",
"thehill.com",
"politico.com",
"axios.com",
"euronews.com",
"japantimes.co.jp",
"scmp.com",
"straitstimes.com",
"themoscowtimes.com",
"haaretz.com",
"timesofindia.com",
"globeandmail.com",
"abc.net.au",
"rte.ie",
"swissinfo.ch",
"thelocal.fr",
"thelocal.de",
"thelocal.se",
"kyivpost.com",
"arabnews.com",
"koreatimes.co.kr",
"bangkokpost.com",
"zdnet.com",
"cnet.com",
"engadget.com",
"gizmodo.com",
"thenextweb.com",
"venturebeat.com",
"techradar.com",
"tomshardware.com",
"anandtech.com",
"slashdot.org",
"techspot.com",
"phoronix.com",
"404media.co",
"theregister.com",
"techdirt.com",
"techrepublic.com",
"mit.edu",
"protocol.com",
"theinformation.com",
"restofworld.org",
"news.ycombinator.com"
]
}