Delete system_config.py

This commit is contained in:
James 2024-11-26 12:17:46 +10:00 committed by GitHub
parent 6201fa7ca1
commit 9b8778dc3e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,147 +0,0 @@
"""
System-wide configuration settings for Web Scraper, Logging, and Research components
"""
import logging
import logging.handlers
# Web Scraper Configuration
SCRAPER_CONFIG = {
"user_agent": "WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)",
"rate_limit": 1, # Seconds between requests to same domain
"timeout": 10, # Request timeout in seconds
"max_retries": 3, # Number of retry attempts for failed requests
"max_workers": 5, # Maximum number of concurrent scraping threads
"content_limits": {
"max_content_length": 2400, # Maximum characters to extract from content
"max_links": 10 # Maximum number of links to extract
},
"respect_robots_txt": False # Whether to respect robots.txt
}
# Search Provider Configuration
SEARCH_CONFIG = {
"default_provider": "duckduckgo", # Default search provider to use
"fallback_order": [ # Order of providers to try if default fails
"exa",
"bing",
"brave",
"tavily",
"duckduckgo" # Keep DuckDuckGo as final fallback
],
"provider_settings": {
"tavily": {
"search_depth": "basic",
"max_results": 5,
"include_answer": True,
"include_images": False
},
"brave": {
"max_results": 10
},
"bing": {
"max_results": 10,
"freshness": "Month" # Time range for results
},
"exa": {
"max_results": 10,
"use_highlights": True
},
"duckduckgo": {
"max_results": 10,
"region": "wt-wt", # Worldwide results
"safesearch": "off"
}
},
"rate_limiting": {
"requests_per_minute": 10,
"cooldown_period": 60 # Seconds to wait after hitting rate limit
}
}
# System-wide Logging Configuration
LOGGING_CONFIG = {
"level": logging.INFO,
"format": "%(asctime)s - %(levelname)s - %(message)s",
"handlers": {
"console": {
"enabled": True,
"level": logging.INFO
},
"file": {
"enabled": True,
"level": logging.DEBUG,
"filename": "web_llm.log",
"max_bytes": 1024 * 1024, # 1MB
"backup_count": 3
}
}
}
# Research Configuration
RESEARCH_CONFIG = {
"search": {
"max_searches_per_cycle": 5,
"max_results_per_search": 10,
"min_relevance_score": 0.6
},
"content": {
"max_document_size": 12000, # Maximum size of research document in characters
"max_chunk_size": 2000, # Maximum size of content chunks for processing
"min_chunk_size": 100 # Minimum size of content chunks to process
},
"storage": {
"auto_save": True,
"auto_save_interval": 150, # Auto-save interval in seconds
"backup_enabled": True,
"max_backups": 2
},
"rate_limiting": {
"requests_per_minute": 60,
"concurrent_requests": 5,
"cooldown_period": 60 # Seconds to wait after hitting rate limit
}
}
def setup_logging():
"""Configure logging based on LOGGING_CONFIG settings"""
logging.basicConfig(
level=LOGGING_CONFIG["level"],
format=LOGGING_CONFIG["format"]
)
logger = logging.getLogger()
# Clear existing handlers
logger.handlers.clear()
# Console handler
if LOGGING_CONFIG["handlers"]["console"]["enabled"]:
console_handler = logging.StreamHandler()
console_handler.setLevel(LOGGING_CONFIG["handlers"]["console"]["level"])
console_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
logger.addHandler(console_handler)
# File handler
if LOGGING_CONFIG["handlers"]["file"]["enabled"]:
file_handler = logging.handlers.RotatingFileHandler(
LOGGING_CONFIG["handlers"]["file"]["filename"],
maxBytes=LOGGING_CONFIG["handlers"]["file"]["max_bytes"],
backupCount=LOGGING_CONFIG["handlers"]["file"]["backup_count"]
)
file_handler.setLevel(LOGGING_CONFIG["handlers"]["file"]["level"])
file_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"]))
logger.addHandler(file_handler)
return logger
def get_scraper_config():
"""Get the web scraper configuration"""
return SCRAPER_CONFIG
def get_research_config():
"""Get the research configuration"""
return RESEARCH_CONFIG
def get_search_config():
"""Get the search provider configuration"""
return SEARCH_CONFIG