mirror of
https://github.com/TheBlewish/Automated-AI-Web-Researcher-Ollama.git
synced 2025-01-18 16:37:47 +00:00
Delete search_providers directory
This commit is contained in:
parent
a6899814ff
commit
148dc3db02
|
@ -1,5 +0,0 @@
|
|||
from .base_provider import BaseSearchProvider
|
||||
from .tavily_provider import TavilySearchProvider
|
||||
from .factory import SearchProviderFactory
|
||||
|
||||
__all__ = ['BaseSearchProvider', 'TavilySearchProvider', 'SearchProviderFactory']
|
|
@ -1,42 +0,0 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
class BaseSearchProvider(ABC):
|
||||
"""
|
||||
Abstract base class for search providers.
|
||||
All search providers must implement these methods.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize the search provider.
|
||||
|
||||
Args:
|
||||
api_key: Optional API key for the search provider
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search using the provider.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
**kwargs: Additional search parameters specific to the provider
|
||||
|
||||
Returns:
|
||||
Dict containing the search results or error information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_configured(self) -> bool:
|
||||
"""
|
||||
Check if the provider is properly configured (e.g., has valid API key).
|
||||
|
||||
Returns:
|
||||
bool indicating if the provider is ready to use
|
||||
"""
|
||||
pass
|
|
@ -1,200 +0,0 @@
|
|||
from typing import Dict, Any, Optional
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
|
||||
# Add parent directory to path for imports when running as script
|
||||
if __name__ == "__main__":
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
from search_providers.base_provider import BaseSearchProvider
|
||||
else:
|
||||
from .base_provider import BaseSearchProvider
|
||||
|
||||
class BingSearchProvider(BaseSearchProvider):
|
||||
"""
|
||||
Bing implementation of the search provider interface.
|
||||
Handles both web and news-specific searches using Bing's APIs.
|
||||
"""
|
||||
|
||||
WEB_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search"
|
||||
NEWS_SEARCH_ENDPOINT = "https://api.bing.microsoft.com/v7.0/news/search"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize the Bing search provider.
|
||||
|
||||
Args:
|
||||
api_key: Optional Bing API key. If not provided, will try to get from environment.
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("BING_API_KEY")
|
||||
self.headers = {
|
||||
'Ocp-Apim-Subscription-Key': self.api_key,
|
||||
'Accept': 'application/json'
|
||||
} if self.api_key else None
|
||||
|
||||
# Load trusted news sources
|
||||
self.trusted_sources = self._load_trusted_sources()
|
||||
|
||||
def _load_trusted_sources(self) -> list:
|
||||
"""Load first 5 trusted news sources from JSON file."""
|
||||
try:
|
||||
json_path = Path(__file__).parent / "trusted_news_sources.json"
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
# Only load the first 16 sources as per MSFT limits
|
||||
return data.get("trusted_sources", [])[:16]
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load trusted news sources: {e}")
|
||||
return []
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Bing API is properly configured."""
|
||||
return self.headers is not None
|
||||
|
||||
def search(self, query: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search using Bing API.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
**kwargs: Additional search parameters:
|
||||
- topic: Optional search topic (e.g., "news")
|
||||
- max_results: Maximum number of results (default: 10)
|
||||
- market: Market code (default: "en-US")
|
||||
- days: Number of days to look back (for news searches)
|
||||
|
||||
Returns:
|
||||
Dict containing search results or error information
|
||||
"""
|
||||
if not self.is_configured():
|
||||
return {'error': 'Bing API key not configured'}
|
||||
|
||||
try:
|
||||
# Set default search parameters
|
||||
search_params = {
|
||||
'count': str(kwargs.get('max_results', 10)), # Changed default from 5 to 10
|
||||
'mkt': kwargs.get('market', 'en-US'),
|
||||
'textFormat': 'Raw'
|
||||
}
|
||||
|
||||
# Determine if this is a news search
|
||||
if kwargs.get('topic') == 'news':
|
||||
# Add freshness parameter for news if days specified
|
||||
if 'days' in kwargs:
|
||||
# Bing API expects 'day', 'week', or 'month'
|
||||
search_params['freshness'] = 'week' if kwargs['days'] >1 else 'day'
|
||||
|
||||
# Add site: operators for trusted sources
|
||||
if self.trusted_sources:
|
||||
site_operators = " OR ".join(f'site:{source}' for source in self.trusted_sources)
|
||||
search_params['q'] = f"({query}) ({site_operators})"
|
||||
else:
|
||||
search_params['q'] = f"latest headlines about the topic: {query}"
|
||||
|
||||
response = requests.get(
|
||||
self.NEWS_SEARCH_ENDPOINT,
|
||||
headers=self.headers,
|
||||
params=search_params
|
||||
)
|
||||
else:
|
||||
search_params['q'] = query
|
||||
response = requests.get(
|
||||
self.WEB_SEARCH_ENDPOINT,
|
||||
headers=self.headers,
|
||||
params=search_params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return {'error': f'API request failed with status {response.status_code}: {response.text}'}
|
||||
|
||||
response_data = response.json()
|
||||
|
||||
# Process results based on search type
|
||||
if kwargs.get('topic') == 'news':
|
||||
return self._process_news_results(
|
||||
response_data,
|
||||
days=kwargs.get('days', 3),
|
||||
topic=query
|
||||
)
|
||||
else:
|
||||
return self._process_general_results(response_data)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {'error': f'API request failed: {str(e)}'}
|
||||
except Exception as e:
|
||||
return {'error': f'An unexpected error occurred: {str(e)}'}
|
||||
|
||||
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process results for general web searches."""
|
||||
webpages = response.get('webPages', {}).get('value', [])
|
||||
return {
|
||||
'results': [{
|
||||
'title': result.get('name', ''),
|
||||
'url': result.get('url', ''),
|
||||
'content': result.get('snippet', ''),
|
||||
'score': 1.0 # Bing doesn't provide relevance scores
|
||||
} for result in webpages[:10]] # Changed from 3 to 10
|
||||
}
|
||||
|
||||
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
|
||||
"""Process results for news-specific searches."""
|
||||
articles = response.get('value', [])
|
||||
return {
|
||||
'articles': [{
|
||||
'title': article.get('name', ''),
|
||||
'url': article.get('url', ''),
|
||||
'published_date': article.get('datePublished', ''),
|
||||
'content': article.get('description', ''),
|
||||
'score': 1.0 # Bing doesn't provide relevance scores
|
||||
} for article in articles],
|
||||
'time_period': f"Past {days} days",
|
||||
'topic': topic
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test code using actual API
|
||||
provider = BingSearchProvider()
|
||||
if not provider.is_configured():
|
||||
print("Error: Bing API key not configured")
|
||||
exit(1)
|
||||
|
||||
# Print loaded trusted sources
|
||||
print("\n=== Loaded Trusted Sources ===")
|
||||
print(provider.trusted_sources)
|
||||
|
||||
# Test general search
|
||||
print("\n=== Testing General Search ===")
|
||||
general_result = provider.search(
|
||||
"What is artificial intelligence?",
|
||||
max_results=10 # Changed from 3 to 10
|
||||
)
|
||||
|
||||
if 'error' in general_result:
|
||||
print(f"Error in general search: {general_result['error']}")
|
||||
else:
|
||||
print("\nTop Results:")
|
||||
for idx, result in enumerate(general_result['results'], 1):
|
||||
print(f"\n{idx}. {result['title']}")
|
||||
print(f" URL: {result['url']}")
|
||||
print(f" Preview: {result['content'][:400]}...")
|
||||
|
||||
# Test news search
|
||||
print("\n\n=== Testing News Search ===")
|
||||
news_result = provider.search(
|
||||
"mike tyson fight",
|
||||
topic="news",
|
||||
days=3
|
||||
)
|
||||
|
||||
if 'error' in news_result:
|
||||
print(f"Error in news search: {news_result['error']}")
|
||||
else:
|
||||
print("\nRecent Articles:")
|
||||
for idx, article in enumerate(news_result['articles'], 1):
|
||||
print(f"\n{idx}. {article['title']}")
|
||||
print(f" Published: {article['published_date']}")
|
||||
print(f" URL: {article['url']}")
|
||||
print(f" Preview: {article['content'][:400]}...")
|
|
@ -1,308 +0,0 @@
|
|||
from typing import Dict, Any, Optional
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
# Add parent directory to path for imports when running as script
|
||||
if __name__ == "__main__":
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
from search_providers.base_provider import BaseSearchProvider
|
||||
else:
|
||||
from .base_provider import BaseSearchProvider
|
||||
|
||||
class BraveSearchProvider(BaseSearchProvider):
|
||||
"""
|
||||
Brave implementation of the search provider interface.
|
||||
Handles both web and news-specific searches using Brave's APIs.
|
||||
"""
|
||||
|
||||
WEB_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
||||
NEWS_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
|
||||
SUMMARIZER_ENDPOINT = "https://api.search.brave.com/res/v1/summarizer/search"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize the Brave search provider.
|
||||
|
||||
Args:
|
||||
api_key: Optional Brave API key. If not provided, will try to get from environment.
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("BRAVE_API_KEY")
|
||||
self.pro_api_key = os.getenv("BRAVE_AI_PRO_API_KEY") #Optional, used for AI summary requests
|
||||
self.headers = {
|
||||
'X-Subscription-Token': self.api_key,
|
||||
'Accept': 'application/json'
|
||||
} if self.api_key else None
|
||||
self.proheaders = {
|
||||
'X-Subscription-Token': self.pro_api_key,
|
||||
'Accept': 'application/json'
|
||||
} if self.pro_api_key else None
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Brave API is properly configured."""
|
||||
return self.headers is not None
|
||||
|
||||
def get_brave_summary(self, query):
|
||||
# Query parameters
|
||||
params = {
|
||||
"q": query,
|
||||
"summary": 1
|
||||
}
|
||||
|
||||
# Make the initial web search request to get summarizer key
|
||||
search_response = requests.get(self.WEB_SEARCH_ENDPOINT, headers=self.proheaders, params=params)
|
||||
|
||||
if search_response.status_code == 200:
|
||||
data = search_response.json()
|
||||
|
||||
if "summarizer" in data and "key" in data["summarizer"]:
|
||||
summarizer_key = data["summarizer"]["key"]
|
||||
|
||||
# Make request to summarizer endpoint
|
||||
summarizer_params = {
|
||||
"key": summarizer_key,
|
||||
"entity_info": 1
|
||||
}
|
||||
|
||||
summary_response = requests.get(
|
||||
self.SUMMARIZER_ENDPOINT,
|
||||
headers=self.proheaders,
|
||||
params=summarizer_params
|
||||
)
|
||||
|
||||
if summary_response.status_code == 200:
|
||||
summary_data = summary_response.json()
|
||||
try:
|
||||
return summary_data['summary'][0]['data']
|
||||
except (KeyError, IndexError):
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def search(self, query: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search using Brave API.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
**kwargs: Additional search parameters:
|
||||
- topic: Optional search topic (e.g., "news")
|
||||
- max_results: Maximum number of results (default: 10)
|
||||
- market: Market code (default: "en-US")
|
||||
- days: Number of days to look back (for news searches)
|
||||
|
||||
Returns:
|
||||
Dict containing search results or error information
|
||||
"""
|
||||
if not self.is_configured():
|
||||
return {'error': 'Brave API key not configured'}
|
||||
|
||||
try:
|
||||
# Set default search parameters
|
||||
search_params = {
|
||||
'count': str(kwargs.get('max_results', 10)),
|
||||
'country': kwargs.get('market', 'us'), # Brave uses country code
|
||||
'q': query
|
||||
}
|
||||
|
||||
# Determine if this is a news search
|
||||
if kwargs.get('topic') == 'news':
|
||||
# Add freshness parameter for news if days specified
|
||||
if 'days' in kwargs:
|
||||
days = kwargs['days']
|
||||
if days <= 1:
|
||||
search_params['freshness'] = 'pd' # past day
|
||||
elif days <= 7:
|
||||
search_params['freshness'] = 'pw' # past week
|
||||
else:
|
||||
search_params['freshness'] = 'pm' # past month
|
||||
|
||||
response = requests.get(
|
||||
self.NEWS_SEARCH_ENDPOINT,
|
||||
headers=self.headers,
|
||||
params=search_params
|
||||
)
|
||||
|
||||
response_data = response.json()
|
||||
result = self._process_news_results(response_data, days=kwargs.get('days', 3), topic=query)
|
||||
else:
|
||||
response = requests.get(
|
||||
self.WEB_SEARCH_ENDPOINT,
|
||||
headers=self.headers,
|
||||
params=search_params
|
||||
)
|
||||
response_data = response.json()
|
||||
result = self._process_general_results(response_data)
|
||||
|
||||
# Include summarizer response if it exists
|
||||
summary_response = self.get_brave_summary(query)
|
||||
if summary_response:
|
||||
result['summarizer'] = summary_response
|
||||
|
||||
return result
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return {'error': f'API request failed: {str(e)}'}
|
||||
except Exception as e:
|
||||
return {'error': f'An unexpected error occurred: {str(e)}'}
|
||||
|
||||
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process results for general web searches."""
|
||||
web_results = response.get('web', {}).get('results', [])
|
||||
with ThreadPoolExecutor() as executor:
|
||||
# Use index as key instead of the result dictionary
|
||||
futures = {i: executor.submit(self.get_brave_summary, result.get('title', ''))
|
||||
for i, result in enumerate(web_results[:2])}
|
||||
|
||||
results = []
|
||||
for i, result in enumerate(web_results):
|
||||
summary = None
|
||||
if i < 2:
|
||||
try:
|
||||
summary = futures[i].result()
|
||||
except Exception as e:
|
||||
print(f"Error getting summary: {e}")
|
||||
|
||||
processed_result = {
|
||||
'title': result.get('title', ''),
|
||||
'url': result.get('url', ''),
|
||||
'content': result.get('description', ''),
|
||||
'score': result.get('score', 1.0),
|
||||
'extra_snippets': None,
|
||||
'summary': None
|
||||
}
|
||||
if summary:
|
||||
processed_result['summary'] = summary
|
||||
else:
|
||||
processed_result['extra_snippets'] = result.get('extra_snippets', [])
|
||||
results.append(processed_result)
|
||||
return {'results': results}
|
||||
|
||||
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
|
||||
"""Process results for news-specific searches."""
|
||||
news_results = response.get('results', [])
|
||||
def convert_age_to_minutes(age_str: str) -> int:
|
||||
"""
|
||||
Convert age string to minutes.
|
||||
|
||||
Args:
|
||||
age_str: Age string in the format of "X minutes", "X hours", "X days"
|
||||
|
||||
Returns:
|
||||
Age in minutes
|
||||
"""
|
||||
age_value = int(age_str.split()[0])
|
||||
age_unit = age_str.split()[1]
|
||||
if age_unit == 'minutes':
|
||||
return age_value
|
||||
elif age_unit == 'hours':
|
||||
return age_value * 60
|
||||
elif age_unit == 'days':
|
||||
return age_value * 1440 # 24 hours * 60 minutes
|
||||
else:
|
||||
return 0 # Default to 0 if unknown unit
|
||||
|
||||
# Sort news results based on the age field
|
||||
news_results.sort(key=lambda x: convert_age_to_minutes(x.get('age', '0 minutes')))
|
||||
|
||||
with ThreadPoolExecutor() as executor:
|
||||
# Use enumerate to create futures with index as key
|
||||
futures = {i: executor.submit(self.get_brave_summary, article_data.get('title', ''))
|
||||
for i, article_data in enumerate(news_results)}
|
||||
|
||||
articles = []
|
||||
for i, article_data in enumerate(news_results):
|
||||
try:
|
||||
summary = futures[i].result()
|
||||
except Exception as e:
|
||||
print(f"Error getting summary: {e}")
|
||||
summary = None
|
||||
|
||||
article = {
|
||||
'title': article_data.get('title', ''),
|
||||
'url': article_data.get('url', ''),
|
||||
'published_date': article_data.get('age', ''),
|
||||
'breaking' : article_data.get('breaking', False),
|
||||
'content': article_data.get('description', ''),
|
||||
'extra_snippets': None,
|
||||
'summary': None,
|
||||
'score': article_data.get('score', 1.0)
|
||||
}
|
||||
if summary:
|
||||
article['summary'] = summary
|
||||
else:
|
||||
article['extra_snippets'] = article_data.get('extra_snippets', [])
|
||||
articles.append(article)
|
||||
|
||||
return {
|
||||
'articles': articles,
|
||||
'time_period': f"Past {days} days",
|
||||
'topic': topic
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test code using actual API
|
||||
provider = BraveSearchProvider()
|
||||
if not provider.is_configured():
|
||||
print("Error: Brave API key not configured")
|
||||
exit(1)
|
||||
|
||||
# Test general search
|
||||
print("\n=== Testing General Search ===")
|
||||
general_result = provider.search(
|
||||
"What is artificial intelligence?",
|
||||
max_results=1 # Increased max_results to test summary limiting
|
||||
)
|
||||
|
||||
if 'error' in general_result:
|
||||
print(f"Error in general search: {general_result['error']}")
|
||||
else:
|
||||
print("\nTop Results:")
|
||||
for idx, result in enumerate(general_result['results'], 1):
|
||||
print(f"\n{idx}. {result['title']}")
|
||||
print(f" URL: {result['url']}")
|
||||
print(f" Preview: {result['content']}...")
|
||||
print(f" Score: {result['score']}")
|
||||
if result['extra_snippets']:
|
||||
print(" Extra Snippets:")
|
||||
for snippet in result['extra_snippets']:
|
||||
print(f" - {snippet}")
|
||||
if result['summary']: # Check if summary exists before printing
|
||||
print(f" Summary: {result.get('summary', '')}...")
|
||||
import time
|
||||
time.sleep(1)
|
||||
|
||||
# Test news search
|
||||
print("\n\n=== Testing News Search ===")
|
||||
import time
|
||||
start_time = time.time()
|
||||
news_result = provider.search(
|
||||
"mike tyson fight",
|
||||
topic="news",
|
||||
days=3,
|
||||
max_results=1
|
||||
)
|
||||
end_time = time.time()
|
||||
|
||||
|
||||
if 'error' in news_result:
|
||||
print(f"Error in news search: {news_result['error']}")
|
||||
else:
|
||||
print("\nRecent Articles:")
|
||||
for idx, article in enumerate(news_result['articles'], 1):
|
||||
print(f"\n{idx}. {article['title']}")
|
||||
print(f" Published: {article['published_date']}")
|
||||
print(f" Breaking: {article['breaking']}")
|
||||
print(f" URL: {article['url']}")
|
||||
print(f" Preview: {article['content'][:400]}...")
|
||||
if article['extra_snippets']:
|
||||
print(" Extra Snippets:")
|
||||
for snippet in article['extra_snippets']:
|
||||
print(f" - {snippet}")
|
||||
if article['summary']:
|
||||
print(f" Summary: {article.get('summary', '')}...")
|
||||
|
||||
print(f"Execution time: {round(end_time - start_time, 1)} seconds")
|
|
@ -1,231 +0,0 @@
|
|||
from typing import Dict, Any, Optional
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Add parent directory to path for imports when running as script
|
||||
if __name__ == "__main__":
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
from search_providers.base_provider import BaseSearchProvider
|
||||
else:
|
||||
from .base_provider import BaseSearchProvider
|
||||
|
||||
class ExaSearchProvider(BaseSearchProvider):
|
||||
"""
|
||||
Exa.ai implementation of the search provider interface.
|
||||
Handles web searches with optional full page content retrieval.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize the Exa search provider.
|
||||
|
||||
Args:
|
||||
api_key: Optional Exa API key. If not provided, will try to get from environment.
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("EXA_API_KEY")
|
||||
self.base_url = "https://api.exa.ai/search"
|
||||
self.trusted_sources = self._load_trusted_sources()
|
||||
|
||||
def _load_trusted_sources(self) -> list:
|
||||
"""Load trusted news sources from JSON file."""
|
||||
try:
|
||||
json_path = Path(__file__).parent / 'trusted_news_sources.json'
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
return data.get('trusted_sources', [])
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load trusted sources: {e}")
|
||||
return []
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Exa client is properly configured."""
|
||||
return bool(self.api_key)
|
||||
|
||||
def search(self, query: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search using Exa API.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
**kwargs: Additional search parameters:
|
||||
- include_content: Whether to retrieve full page contents (default: False)
|
||||
- max_results: Maximum number of results (default: 3)
|
||||
- days: Number of days to look back (for news searches)
|
||||
|
||||
Returns:
|
||||
Dict containing search results or error information
|
||||
"""
|
||||
if not self.is_configured():
|
||||
return {'error': 'Exa API key not configured'}
|
||||
|
||||
try:
|
||||
# Set default search parameters
|
||||
search_params = {
|
||||
'query': query,
|
||||
'type': 'neural',
|
||||
'useAutoprompt': True,
|
||||
'numResults': kwargs.get('max_results', 3),
|
||||
}
|
||||
|
||||
# Add optional parameters
|
||||
if kwargs.get('include_content'):
|
||||
search_params['contents'] = {
|
||||
"highlights": True,
|
||||
"summary": True
|
||||
}
|
||||
|
||||
if kwargs.get('days'):
|
||||
# Convert days to timestamp for time-based filtering
|
||||
date_limit = datetime.now() - timedelta(days=kwargs['days'])
|
||||
search_params['startPublishedTime'] = date_limit.isoformat()
|
||||
|
||||
# Add trusted domains for news searches
|
||||
if kwargs.get('topic') == 'news' and self.trusted_sources:
|
||||
search_params['includeDomains'] = self.trusted_sources
|
||||
|
||||
# Make API request
|
||||
headers = {
|
||||
'x-api-key': self.api_key,
|
||||
'Content-Type': 'application/json',
|
||||
'accept': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
self.base_url,
|
||||
headers=headers,
|
||||
json=search_params
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Process results based on whether it's a news search
|
||||
if kwargs.get('topic') == 'news':
|
||||
return self._process_news_results(
|
||||
data,
|
||||
days=kwargs.get('days', 3),
|
||||
topic=query
|
||||
)
|
||||
else:
|
||||
return self._process_general_results(data)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if e.response and e.response.status_code == 401:
|
||||
return {'error': 'Invalid Exa API key'}
|
||||
elif e.response and e.response.status_code == 429:
|
||||
return {'error': 'Exa API rate limit exceeded'}
|
||||
else:
|
||||
return {'error': f'An error occurred while making the request: {str(e)}'}
|
||||
except Exception as e:
|
||||
return {'error': f'An unexpected error occurred: {str(e)}'}
|
||||
|
||||
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process results for general searches."""
|
||||
results = []
|
||||
for result in response.get('results', []):
|
||||
processed_result = {
|
||||
'title': result.get('title', ''),
|
||||
'url': result.get('url', ''),
|
||||
'highlights': result.get('highlights', []),
|
||||
'summary': result.get('summary', ''),
|
||||
'score': result.get('score', 0.0)
|
||||
}
|
||||
results.append(processed_result)
|
||||
|
||||
return {
|
||||
'results': results,
|
||||
'autoprompt': response.get('autopromptString', '')
|
||||
}
|
||||
|
||||
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
|
||||
"""Process results for news-specific searches."""
|
||||
articles = []
|
||||
for article in response.get('results', []):
|
||||
processed_article = {
|
||||
'title': article.get('title', ''),
|
||||
'url': article.get('url', ''),
|
||||
'published_date': article.get('publishedDate', ''),
|
||||
'highlights': article.get('highlights', []),
|
||||
'summary': article.get('summary', ''),
|
||||
'score': article.get('score', 0.0)
|
||||
}
|
||||
articles.append(processed_article)
|
||||
|
||||
return {
|
||||
'articles': articles,
|
||||
'time_period': f"Past {days} days",
|
||||
'topic': topic,
|
||||
'autoprompt': response.get('autopromptString', '')
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test code for the Exa provider
|
||||
provider = ExaSearchProvider()
|
||||
if not provider.is_configured():
|
||||
print("Error: Exa API key not configured")
|
||||
exit(1)
|
||||
|
||||
# Test general search
|
||||
print("\n=== Testing General Search ===")
|
||||
import time
|
||||
start_time = time.time()
|
||||
general_result = provider.search(
|
||||
"What is artificial intelligence?",
|
||||
max_results=3,
|
||||
include_content=True
|
||||
)
|
||||
end_time = time.time()
|
||||
|
||||
if 'error' in general_result:
|
||||
print("Error:", general_result['error'])
|
||||
else:
|
||||
print("\nTop Results:")
|
||||
print(f"Autoprompt: {general_result.get('autoprompt', '')}")
|
||||
for idx, result in enumerate(general_result['results'], 1):
|
||||
print(f"\n{idx}. {result['title']}")
|
||||
print(f" URL: {result['url']}")
|
||||
print(f" Score: {result['score']}")
|
||||
print(f" Summary: {result['summary']}")
|
||||
if result['highlights']:
|
||||
print(" Highlights:")
|
||||
for highlight in result['highlights']:
|
||||
print(f" - {highlight}")
|
||||
print(f"\n\nTime taken for general search: {end_time - start_time} seconds")
|
||||
|
||||
# Test news search
|
||||
print("\n\n=== Testing News Search ===")
|
||||
start_time = time.time()
|
||||
news_result = provider.search(
|
||||
"Latest developments in AI",
|
||||
topic="news",
|
||||
days=3,
|
||||
max_results=3,
|
||||
include_content=True
|
||||
)
|
||||
end_time = time.time()
|
||||
|
||||
if 'error' in news_result:
|
||||
print("Error:", news_result['error'])
|
||||
else:
|
||||
print("\nRecent Articles:")
|
||||
print(f"Autoprompt: {news_result.get('autoprompt', '')}")
|
||||
for idx, article in enumerate(news_result['articles'], 1):
|
||||
print(f"\n{idx}. {article['title']}")
|
||||
print(f" Published: {article['published_date']}")
|
||||
print(f" URL: {article['url']}")
|
||||
print(f" Score: {article['score']}")
|
||||
print(f" Summary: {article['summary']}")
|
||||
if article['highlights']:
|
||||
print(" Highlights:")
|
||||
for highlight in article['highlights']:
|
||||
print(f" - {highlight}")
|
||||
print(f"\n\nTime taken for news search: {end_time - start_time} seconds")
|
||||
|
||||
# Test error handling
|
||||
print("\n\n=== Testing Error Handling ===")
|
||||
bad_provider = ExaSearchProvider(api_key="invalid_key")
|
||||
error_result = bad_provider.search("test query")
|
||||
print("\nExpected error with invalid API key:", error_result['error'])
|
|
@ -1,50 +0,0 @@
|
|||
"""Factory for creating search providers based on configuration."""
|
||||
|
||||
from typing import Type, Dict, Any
|
||||
from search_providers.base_provider import BaseSearchProvider
|
||||
from search_providers.bing_provider import BingSearchProvider
|
||||
from search_providers.brave_provider import BraveSearchProvider
|
||||
from search_providers.exa_provider import ExaSearchProvider
|
||||
from search_providers.tavily_provider import TavilySearchProvider
|
||||
from system_config import get_search_config
|
||||
|
||||
class SearchProviderFactory:
|
||||
"""
|
||||
Factory class for creating instances of search providers.
|
||||
"""
|
||||
|
||||
_providers: Dict[str, Type[BaseSearchProvider]] = {
|
||||
"bing": BingSearchProvider,
|
||||
"brave": BraveSearchProvider,
|
||||
"exa": ExaSearchProvider,
|
||||
"tavily": TavilySearchProvider,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_provider(cls, provider_type: str, **kwargs) -> BaseSearchProvider:
|
||||
"""
|
||||
Get an instance of the specified search provider.
|
||||
|
||||
Args:
|
||||
provider_type: The type of search provider to create (e.g., "bing", "google").
|
||||
**kwargs: Additional keyword arguments to pass to the provider's constructor.
|
||||
|
||||
Returns:
|
||||
An instance of the requested search provider, or None if the provider type is invalid.
|
||||
"""
|
||||
provider_class = cls._providers.get(provider_type.lower())
|
||||
if not provider_class:
|
||||
raise ValueError(f"Invalid search provider type: {provider_type}")
|
||||
|
||||
return provider_class(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_available_providers(cls) -> Dict[str, Type[BaseSearchProvider]]:
|
||||
"""
|
||||
Get a dictionary of available search provider types and their corresponding classes.
|
||||
|
||||
Returns:
|
||||
A dictionary where keys are provider types (e.g., "bing", "google") and values are
|
||||
the corresponding search provider classes.
|
||||
"""
|
||||
return cls._providers
|
|
@ -1,160 +0,0 @@
|
|||
from typing import Dict, Any, Optional
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports when running as script
|
||||
if __name__ == "__main__":
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
from search_providers.base_provider import BaseSearchProvider
|
||||
else:
|
||||
from .base_provider import BaseSearchProvider
|
||||
|
||||
from tavily import TavilyClient, MissingAPIKeyError, InvalidAPIKeyError, UsageLimitExceededError
|
||||
|
||||
class TavilySearchProvider(BaseSearchProvider):
|
||||
"""
|
||||
Tavily implementation of the search provider interface.
|
||||
Handles both general and news-specific searches.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize the Tavily search provider.
|
||||
|
||||
Args:
|
||||
api_key: Optional Tavily API key. If not provided, will try to get from environment.
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("TAVILY_API_KEY")
|
||||
try:
|
||||
self.client = TavilyClient(api_key=self.api_key) if self.api_key else None
|
||||
except MissingAPIKeyError:
|
||||
self.client = None
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Tavily client is properly configured."""
|
||||
return self.client is not None
|
||||
|
||||
def search(self, query: str, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform a search using Tavily API.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
**kwargs: Additional search parameters:
|
||||
- search_depth: "basic" or "advanced" (default: "basic")
|
||||
- topic: Optional search topic (e.g., "news")
|
||||
- max_results: Maximum number of results (default: 5)
|
||||
- include_answer: Whether to include AI-generated answer (default: True)
|
||||
- include_images: Whether to include images (default: False)
|
||||
- days: Number of days to look back (for news searches)
|
||||
|
||||
Returns:
|
||||
Dict containing search results or error information
|
||||
"""
|
||||
if not self.is_configured():
|
||||
return {'error': 'Tavily API key not configured'}
|
||||
|
||||
try:
|
||||
# Set default search parameters
|
||||
search_params = {
|
||||
'search_depth': "basic",
|
||||
'max_results': 5,
|
||||
'include_answer': True,
|
||||
'include_images': False
|
||||
}
|
||||
|
||||
# Update with any provided parameters
|
||||
search_params.update(kwargs)
|
||||
|
||||
# Execute search
|
||||
response = self.client.search(query, **search_params)
|
||||
|
||||
# Process results based on whether it's a news search
|
||||
if kwargs.get('topic') == 'news':
|
||||
return self._process_news_results(
|
||||
response,
|
||||
days=kwargs.get('days', 3),
|
||||
topic=query
|
||||
)
|
||||
else:
|
||||
return self._process_general_results(response)
|
||||
|
||||
except InvalidAPIKeyError:
|
||||
return {'error': 'Invalid Tavily API key'}
|
||||
except UsageLimitExceededError:
|
||||
return {'error': 'Tavily API usage limit exceeded'}
|
||||
except Exception as e:
|
||||
return {'error': f'An unexpected error occurred: {e}'}
|
||||
|
||||
def _process_general_results(self, response: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process results for general searches."""
|
||||
return {
|
||||
'answer': response.get('answer', ''),
|
||||
'results': [{
|
||||
'title': result.get('title', ''),
|
||||
'url': result.get('url', ''),
|
||||
'content': result.get('content', '')[:500] + '...' if result.get('content') else '',
|
||||
'score': result.get('score', 0.0)
|
||||
} for result in response.get('results', [])]
|
||||
}
|
||||
|
||||
def _process_news_results(self, response: Dict[str, Any], days: int, topic: str) -> Dict[str, Any]:
|
||||
"""Process results for news-specific searches."""
|
||||
return {
|
||||
'answer': response.get('answer', ''),
|
||||
'articles': [{
|
||||
'title': article.get('title', ''),
|
||||
'url': article.get('url', ''),
|
||||
'published_date': article.get('published_date', ''),
|
||||
'content': article.get('content', '')[:500] + '...' if article.get('content') else '',
|
||||
'score': article.get('score', 0.0)
|
||||
} for article in response.get('results', [])],
|
||||
'time_period': f"Past {days} days",
|
||||
'topic': topic
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test code for the Tavily provider
|
||||
provider = TavilySearchProvider()
|
||||
if not provider.is_configured():
|
||||
print("Error: Tavily API key not configured")
|
||||
exit(1)
|
||||
|
||||
# Test general search
|
||||
print("\n=== Testing General Search ===")
|
||||
general_result = provider.search(
|
||||
"What is artificial intelligence?",
|
||||
search_depth="advanced",
|
||||
max_results=3
|
||||
)
|
||||
print("\nQuery Answer:", general_result['answer'])
|
||||
print("\nTop Results:")
|
||||
for idx, result in enumerate(general_result['results'], 1):
|
||||
print(f"\n{idx}. {result['title']}")
|
||||
print(f" URL: {result['url']}")
|
||||
print(f" Score: {result['score']}")
|
||||
print(f" Preview: {result['content'][:200]}...")
|
||||
|
||||
# Test news search
|
||||
print("\n\n=== Testing News Search ===")
|
||||
news_result = provider.search(
|
||||
"Latest developments in AI",
|
||||
topic="news",
|
||||
days=3,
|
||||
search_depth="advanced"
|
||||
)
|
||||
print("\nNews Summary:", news_result['answer'])
|
||||
print("\nRecent Articles:")
|
||||
for idx, article in enumerate(news_result['articles'], 1):
|
||||
print(f"\n{idx}. {article['title']}")
|
||||
print(f" Published: {article['published_date']}")
|
||||
print(f" URL: {article['url']}")
|
||||
print(f" Score: {article['score']}")
|
||||
print(f" Preview: {article['content'][:400]}...")
|
||||
|
||||
# Test error handling
|
||||
print("\n\n=== Testing Error Handling ===")
|
||||
bad_provider = TavilySearchProvider(api_key="invalid_key")
|
||||
error_result = bad_provider.search("test query")
|
||||
print("\nExpected error with invalid API key:", error_result['error'])
|
|
@ -1,71 +0,0 @@
|
|||
{
|
||||
"trusted_sources": [
|
||||
"apnews.com",
|
||||
"reuters.com",
|
||||
"bbc.com",
|
||||
"wsj.com",
|
||||
"nytimes.com",
|
||||
"economist.com",
|
||||
"bloomberg.com",
|
||||
"ft.com",
|
||||
"aljazeera.com",
|
||||
"afp.com",
|
||||
"techcrunch.com",
|
||||
"wired.com",
|
||||
"arstechnica.com",
|
||||
"theverge.com",
|
||||
"cnet.com",
|
||||
"theguardian.com",
|
||||
"businessinsider.com",
|
||||
"dw.com",
|
||||
"time.com",
|
||||
"afp.com",
|
||||
"pbs.org",
|
||||
"npr.org",
|
||||
"cnbc.com",
|
||||
"forbes.com",
|
||||
"thehill.com",
|
||||
"politico.com",
|
||||
"axios.com",
|
||||
"euronews.com",
|
||||
"japantimes.co.jp",
|
||||
"scmp.com",
|
||||
"straitstimes.com",
|
||||
"themoscowtimes.com",
|
||||
"haaretz.com",
|
||||
"timesofindia.com",
|
||||
"globeandmail.com",
|
||||
"abc.net.au",
|
||||
"rte.ie",
|
||||
"swissinfo.ch",
|
||||
"thelocal.fr",
|
||||
"thelocal.de",
|
||||
"thelocal.se",
|
||||
"kyivpost.com",
|
||||
"arabnews.com",
|
||||
"koreatimes.co.kr",
|
||||
"bangkokpost.com",
|
||||
"zdnet.com",
|
||||
"cnet.com",
|
||||
"engadget.com",
|
||||
"gizmodo.com",
|
||||
"thenextweb.com",
|
||||
"venturebeat.com",
|
||||
"techradar.com",
|
||||
"tomshardware.com",
|
||||
"anandtech.com",
|
||||
"slashdot.org",
|
||||
"techspot.com",
|
||||
"phoronix.com",
|
||||
"404media.co",
|
||||
"theregister.com",
|
||||
"techdirt.com",
|
||||
"techrepublic.com",
|
||||
"mit.edu",
|
||||
"protocol.com",
|
||||
"theinformation.com",
|
||||
"restofworld.org",
|
||||
"news.ycombinator.com"
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in a new issue