mirror of
https://github.com/TheBlewish/Automated-AI-Web-Researcher-Ollama.git
synced 2025-01-18 16:37:47 +00:00
148 lines
4.6 KiB
Python
148 lines
4.6 KiB
Python
"""
|
|
System-wide configuration settings for Web Scraper, Logging, and Research components
|
|
"""
|
|
import logging
|
|
import logging.handlers
|
|
|
|
# Web Scraper Configuration
|
|
SCRAPER_CONFIG = {
|
|
"user_agent": "WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)",
|
|
"rate_limit": 1, # Seconds between requests to same domain
|
|
"timeout": 10, # Request timeout in seconds
|
|
"max_retries": 3, # Number of retry attempts for failed requests
|
|
"max_workers": 5, # Maximum number of concurrent scraping threads
|
|
"content_limits": {
|
|
"max_content_length": 2400, # Maximum characters to extract from content
|
|
"max_links": 10 # Maximum number of links to extract
|
|
},
|
|
"respect_robots_txt": False # Whether to respect robots.txt
|
|
}
|
|
|
|
# Search Provider Configuration
|
|
SEARCH_CONFIG = {
|
|
"default_provider": "duckduckgo", # Default search provider to use
|
|
"fallback_order": [ # Order of providers to try if default fails
|
|
"exa",
|
|
"bing",
|
|
"brave",
|
|
"tavily",
|
|
"duckduckgo" # Keep DuckDuckGo as final fallback
|
|
],
|
|
"provider_settings": {
|
|
"tavily": {
|
|
"search_depth": "basic",
|
|
"max_results": 5,
|
|
"include_answer": True,
|
|
"include_images": False
|
|
},
|
|
"brave": {
|
|
"max_results": 10
|
|
},
|
|
"bing": {
|
|
"max_results": 10,
|
|
"freshness": "Month" # Time range for results
|
|
},
|
|
"exa": {
|
|
"max_results": 10,
|
|
"use_highlights": True
|
|
},
|
|
"duckduckgo": {
|
|
"max_results": 10,
|
|
"region": "wt-wt", # Worldwide results
|
|
"safesearch": "off"
|
|
}
|
|
},
|
|
"rate_limiting": {
|
|
"requests_per_minute": 10,
|
|
"cooldown_period": 60 # Seconds to wait after hitting rate limit
|
|
}
|
|
}
|
|
|
|
# System-wide Logging Configuration
|
|
LOGGING_CONFIG = {
|
|
"level": logging.INFO,
|
|
"format": "%(asctime)s - %(levelname)s - %(message)s",
|
|
"handlers": {
|
|
"console": {
|
|
"enabled": True,
|
|
"level": logging.INFO
|
|
},
|
|
"file": {
|
|
"enabled": True,
|
|
"level": logging.DEBUG,
|
|
"filename": "web_llm.log",
|
|
"max_bytes": 1024 * 1024, # 1MB
|
|
"backup_count": 3
|
|
}
|
|
}
|
|
}
|
|
|
|
# Research Configuration
|
|
RESEARCH_CONFIG = {
|
|
"search": {
|
|
"max_searches_per_cycle": 5,
|
|
"max_results_per_search": 10,
|
|
"min_relevance_score": 0.6
|
|
},
|
|
"content": {
|
|
"max_document_size": 12000, # Maximum size of research document in characters
|
|
"max_chunk_size": 2000, # Maximum size of content chunks for processing
|
|
"min_chunk_size": 100 # Minimum size of content chunks to process
|
|
},
|
|
"storage": {
|
|
"auto_save": True,
|
|
"auto_save_interval": 150, # Auto-save interval in seconds
|
|
"backup_enabled": True,
|
|
"max_backups": 2
|
|
},
|
|
"rate_limiting": {
|
|
"requests_per_minute": 60,
|
|
"concurrent_requests": 5,
|
|
"cooldown_period": 60 # Seconds to wait after hitting rate limit
|
|
}
|
|
}
|
|
|
|
def setup_logging():
|
|
"""Configure logging based on LOGGING_CONFIG settings"""
|
|
logging.basicConfig(
|
|
level=LOGGING_CONFIG["level"],
|
|
format=LOGGING_CONFIG["format"]
|
|
)
|
|
|
|
logger = logging.getLogger()
|
|
|
|
# Clear existing handlers
|
|
logger.handlers.clear()
|
|
|
|
# Console handler
|
|
if LOGGING_CONFIG["handlers"]["console"]["enabled"]:
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(LOGGING_CONFIG["handlers"]["console"]["level"])
|
|
console_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
|
|
logger.addHandler(console_handler)
|
|
|
|
# File handler
|
|
if LOGGING_CONFIG["handlers"]["file"]["enabled"]:
|
|
file_handler = logging.handlers.RotatingFileHandler(
|
|
LOGGING_CONFIG["handlers"]["file"]["filename"],
|
|
maxBytes=LOGGING_CONFIG["handlers"]["file"]["max_bytes"],
|
|
backupCount=LOGGING_CONFIG["handlers"]["file"]["backup_count"]
|
|
)
|
|
file_handler.setLevel(LOGGING_CONFIG["handlers"]["file"]["level"])
|
|
file_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
|
|
logger.addHandler(file_handler)
|
|
|
|
return logger
|
|
|
|
def get_scraper_config():
|
|
"""Get the web scraper configuration"""
|
|
return SCRAPER_CONFIG
|
|
|
|
def get_research_config():
|
|
"""Get the research configuration"""
|
|
return RESEARCH_CONFIG
|
|
|
|
def get_search_config():
|
|
"""Get the search provider configuration"""
|
|
return SEARCH_CONFIG
|