diff --git a/system_config.py b/system_config.py deleted file mode 100644 index 1e1260b..0000000 --- a/system_config.py +++ /dev/null @@ -1,147 +0,0 @@ -""" -System-wide configuration settings for Web Scraper, Logging, and Research components -""" -import logging -import logging.handlers - -# Web Scraper Configuration -SCRAPER_CONFIG = { - "user_agent": "WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)", - "rate_limit": 1, # Seconds between requests to same domain - "timeout": 10, # Request timeout in seconds - "max_retries": 3, # Number of retry attempts for failed requests - "max_workers": 5, # Maximum number of concurrent scraping threads - "content_limits": { - "max_content_length": 2400, # Maximum characters to extract from content - "max_links": 10 # Maximum number of links to extract - }, - "respect_robots_txt": False # Whether to respect robots.txt -} - -# Search Provider Configuration -SEARCH_CONFIG = { - "default_provider": "duckduckgo", # Default search provider to use - "fallback_order": [ # Order of providers to try if default fails - "exa", - "bing", - "brave", - "tavily", - "duckduckgo" # Keep DuckDuckGo as final fallback - ], - "provider_settings": { - "tavily": { - "search_depth": "basic", - "max_results": 5, - "include_answer": True, - "include_images": False - }, - "brave": { - "max_results": 10 - }, - "bing": { - "max_results": 10, - "freshness": "Month" # Time range for results - }, - "exa": { - "max_results": 10, - "use_highlights": True - }, - "duckduckgo": { - "max_results": 10, - "region": "wt-wt", # Worldwide results - "safesearch": "off" - } - }, - "rate_limiting": { - "requests_per_minute": 10, - "cooldown_period": 60 # Seconds to wait after hitting rate limit - } -} - -# System-wide Logging Configuration -LOGGING_CONFIG = { - "level": logging.INFO, - "format": "%(asctime)s - %(levelname)s - %(message)s", - "handlers": { - "console": { - "enabled": True, - "level": logging.INFO - }, - "file": { - "enabled": True, - "level": logging.DEBUG, - "filename": "web_llm.log", - "max_bytes": 1024 * 1024, # 1MB - "backup_count": 3 - } - } -} - -# Research Configuration -RESEARCH_CONFIG = { - "search": { - "max_searches_per_cycle": 5, - "max_results_per_search": 10, - "min_relevance_score": 0.6 - }, - "content": { - "max_document_size": 12000, # Maximum size of research document in characters - "max_chunk_size": 2000, # Maximum size of content chunks for processing - "min_chunk_size": 100 # Minimum size of content chunks to process - }, - "storage": { - "auto_save": True, - "auto_save_interval": 150, # Auto-save interval in seconds - "backup_enabled": True, - "max_backups": 2 - }, - "rate_limiting": { - "requests_per_minute": 60, - "concurrent_requests": 5, - "cooldown_period": 60 # Seconds to wait after hitting rate limit - } -} - -def setup_logging(): - """Configure logging based on LOGGING_CONFIG settings""" - logging.basicConfig( - level=LOGGING_CONFIG["level"], - format=LOGGING_CONFIG["format"] - ) - - logger = logging.getLogger() - - # Clear existing handlers - logger.handlers.clear() - - # Console handler - if LOGGING_CONFIG["handlers"]["console"]["enabled"]: - console_handler = logging.StreamHandler() - console_handler.setLevel(LOGGING_CONFIG["handlers"]["console"]["level"]) - console_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"])) - logger.addHandler(console_handler) - - # File handler - if LOGGING_CONFIG["handlers"]["file"]["enabled"]: - file_handler = logging.handlers.RotatingFileHandler( - LOGGING_CONFIG["handlers"]["file"]["filename"], - maxBytes=LOGGING_CONFIG["handlers"]["file"]["max_bytes"], - backupCount=LOGGING_CONFIG["handlers"]["file"]["backup_count"] - ) - file_handler.setLevel(LOGGING_CONFIG["handlers"]["file"]["level"]) - file_handler.setFormatter(logging.Formatter(LOGGING_CONFIG["format"])) - logger.addHandler(file_handler) - - return logger - -def get_scraper_config(): - """Get the web scraper configuration""" - return SCRAPER_CONFIG - -def get_research_config(): - """Get the research configuration""" - return RESEARCH_CONFIG - -def get_search_config(): - """Get the search provider configuration""" - return SEARCH_CONFIG