Update Self_Improving_Search.py for windows

2025-01-19 00:47:46 +00:00 · 2024-11-20 22:29:43 +05:30 · 2024-11-20 22:29:43 +05:30 · 1029885bd0
parent ab194f62a0
commit 1029885bd0
1 changed files with 280 additions and 401 deletions
--- a/Self_Improving_Search.py
+++ b/Self_Improving_Search.py
@ -1,26 +1,32 @@
 import time
 import re
 import os
 from typing import List, Dict, Tuple, Union
 from colorama import Fore, Style
 import logging
 import sys
 import msvcrt
 import os
 from colorama import init, Fore, Style
 import logging
 import time
 from io import StringIO
-from web_scraper import get_web_content, can_fetch
+from Self_Improving_Search import EnhancedSelfImprovingSearch
 from llm_config import get_llm_config
 from llm_response_parser import UltimateLLMResponseParser
 from llm_wrapper import LLMWrapper
-from urllib.parse import urlparse
+from strategic_analysis_parser import StrategicAnalysisParser
 from research_manager import ResearchManager
 # Initialize colorama
 if os.name != 'nt':
  print("This version is Windows-specific. Please use the Unix version for other operating systems.")
  sys.exit(1)
 init()  # Initialize colorama
 # Set up logging
 log_directory = 'logs'
 if not os.path.exists(log_directory):
-    os.makedirs(log_directory)
+  os.makedirs(log_directory)
 # Configure logger
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-log_file = os.path.join(log_directory, 'llama_output.log')
+log_file = os.path.join(log_directory, 'web_llm.log')
 file_handler = logging.FileHandler(log_file)
 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 file_handler.setFormatter(formatter)
@ -28,19 +34,17 @@ logger.handlers = []
 logger.addHandler(file_handler)
 logger.propagate = False
-# Suppress other loggers
+# Disable other loggers
-for name in ['root', 'duckduckgo_search', 'requests', 'urllib3']:
+for name in logging.root.manager.loggerDict:
-    logging.getLogger(name).setLevel(logging.WARNING)
+  if name != __name__:
-    logging.getLogger(name).handlers = []
+      logging.getLogger(name).disabled = True
    logging.getLogger(name).propagate = False
 class OutputRedirector:
  """Windows-compatible output redirection"""
  def __init__(self, stream=None):
      self.stream = stream or StringIO()
      self.original_stdout = sys.stdout
      self.original_stderr = sys.stderr
-      
+
  def __enter__(self):
      sys.stdout = self.stream
      sys.stderr = self.stream
@ -50,386 +54,261 @@ class OutputRedirector:
      sys.stdout = self.original_stdout
      sys.stderr = self.original_stderr
-class EnhancedSelfImprovingSearch:
+def print_header():
-    def __init__(self, llm: LLMWrapper, parser: UltimateLLMResponseParser, max_attempts: int = 5):
+  print(Fore.CYAN + Style.BRIGHT + """
-        self.llm = llm
+  ╔══════════════════════════════════════════════════════════╗
-        self.parser = parser
+  ║             🌐 Advanced Research Assistant 🤖             ║
-        self.max_attempts = max_attempts
+  ╚══════════════════════════════════════════════════════════╝
-        self.llm_config = get_llm_config()
+  """ + Style.RESET_ALL)
-
+  print(Fore.YELLOW + """
-    @staticmethod
+  Welcome to the Advanced Research Assistant!
-    def initialize_llm():
+
-        llm_wrapper = LLMWrapper()
+  Commands:
-        return llm_wrapper
+  - For web search: start message with '/'
-
+    Example: "/latest news on AI advancements"
-    def print_thinking(self):
+
-        print(Fore.MAGENTA + "🧠 Thinking..." + Style.RESET_ALL)
+  - For research mode: start message with '@'
-
+    Example: "@analyze the impact of AI on healthcare"
-    def print_searching(self):
+
-        print(Fore.MAGENTA + "📝 Searching..." + Style.RESET_ALL)
+  Press CTRL+Z to submit input.
-
+  """ + Style.RESET_ALL)
-    def search_and_improve(self, user_query: str) -> str:
+
-        attempt = 0
+def get_multiline_input() -> str:
-        while attempt < self.max_attempts:
+    """Windows-compatible multiline input handler with improved reliability"""
-            print(f"\n{Fore.CYAN}Search attempt {attempt + 1}:{Style.RESET_ALL}")
+    print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+Z to submit):{Style.RESET_ALL}")
-            self.print_searching()
+    lines = []
-
+    current_line = ""
-            try:
+
-                formulated_query, time_range = self.formulate_query(user_query, attempt)
+    try:
-
+        while True:
-                print(f"{Fore.YELLOW}Original query: {user_query}{Style.RESET_ALL}")
+            if msvcrt.kbhit():
-                print(f"{Fore.YELLOW}Formulated query: {formulated_query}{Style.RESET_ALL}")
+                char = msvcrt.getch()
-                print(f"{Fore.YELLOW}Time range: {time_range}{Style.RESET_ALL}")
+                
-
+                # Convert bytes to string for comparison
-                if not formulated_query:
+                char_code = ord(char)
-                    print(f"{Fore.RED}Error: Empty search query. Retrying...{Style.RESET_ALL}")
+                
-                    attempt += 1
+                # CTRL+Z detection (Windows EOF)
-                    continue
+                if char_code == 26:  # ASCII code for CTRL+Z
-
+                    print()  # New line
-                search_results = self.perform_search(formulated_query, time_range)
+                    if current_line:
-
+                        lines.append(current_line)
-                if not search_results:
+                    return ' '.join(lines).strip() or "q"
-                    print(f"{Fore.RED}No results found. Retrying with a different query...{Style.RESET_ALL}")
+                
-                    attempt += 1
+                # Enter key
-                    continue
+                elif char in [b'\r', b'\n']:
-
+                    print()  # New line
-                self.display_search_results(search_results)
+                    lines.append(current_line)
-
+                    current_line = ""
-                selected_urls = self.select_relevant_pages(search_results, user_query)
+                
-
+                # Backspace
-                if not selected_urls:
+                elif char_code == 8:  # ASCII code for backspace
-                    print(f"{Fore.RED}No relevant URLs found. Retrying...{Style.RESET_ALL}")
+                    if current_line:
-                    attempt += 1
+                        current_line = current_line[:-1]
-                    continue
+                        print('\b \b', end='', flush=True)
-
+                
-                print(Fore.MAGENTA + "⚙️ Scraping selected pages..." + Style.RESET_ALL)
+                # Regular character input
-                # Scraping is done without OutputRedirector to ensure messages are visible
+                elif 32 <= char_code <= 126:  # Printable ASCII range
-                scraped_content = self.scrape_content(selected_urls)
+                    try:
-
+                        char_str = char.decode('utf-8')
-                if not scraped_content:
+                        current_line += char_str
-                    print(f"{Fore.RED}Failed to scrape content. Retrying...{Style.RESET_ALL}")
+                        print(char_str, end='', flush=True)
-                    attempt += 1
+                    except UnicodeDecodeError:
-                    continue
+                        continue
-
+
-                self.display_scraped_content(scraped_content)
+            time.sleep(0.01)  # Prevent high CPU usage
-
+
-                self.print_thinking()
+    except KeyboardInterrupt:
-
+        print("\nInput interrupted")
-                with OutputRedirector() as output:
+        return "q"
-                    evaluation, decision = self.evaluate_scraped_content(user_query, scraped_content)
+    except Exception as e:
-                llm_output = output.getvalue()
+        logger.error(f"Input error: {str(e)}")
-                logger.info(f"LLM Output in evaluate_scraped_content:\n{llm_output}")
+        return "q"
-
+
-                print(f"{Fore.MAGENTA}Evaluation: {evaluation}{Style.RESET_ALL}")
+def initialize_system():
-                print(f"{Fore.MAGENTA}Decision: {decision}{Style.RESET_ALL}")
+  """Initialize system with enhanced error checking and recovery"""
-
+  try:
-                if decision == "answer":
+      print(Fore.YELLOW + "Initializing system..." + Style.RESET_ALL)
-                    return self.generate_final_answer(user_query, scraped_content)
+
-                elif decision == "refine":
+      # Load configuration
-                    print(f"{Fore.YELLOW}Refining search...{Style.RESET_ALL}")
+      llm_config = get_llm_config()
-                    attempt += 1
+      
-                else:
+      # Validate Ollama connection
-                    print(f"{Fore.RED}Unexpected decision. Proceeding to answer.{Style.RESET_ALL}")
+      if llm_config['llm_type'] == 'ollama':
-                    return self.generate_final_answer(user_query, scraped_content)
+          import requests
-
+          max_retries = 3
-            except Exception as e:
+          retry_delay = 2
-                print(f"{Fore.RED}An error occurred during search attempt. Check the log file for details.{Style.RESET_ALL}")
+          
-                logger.error(f"An error occurred during search: {str(e)}", exc_info=True)
+          for attempt in range(max_retries):
-                attempt += 1
+              try:
-
+                  response = requests.get(llm_config['base_url'], timeout=5)
-        return self.synthesize_final_answer(user_query)
+                  if response.status_code == 200:
-
+                      break
-    def evaluate_scraped_content(self, user_query: str, scraped_content: Dict[str, str]) -> Tuple[str, str]:
+                  elif attempt < max_retries - 1:
-        user_query_short = user_query[:200]
+                      print(f"{Fore.YELLOW}Retrying Ollama connection ({attempt + 1}/{max_retries})...{Style.RESET_ALL}")
-        prompt = f"""
+                      time.sleep(retry_delay)
-Evaluate if the following scraped content contains sufficient information to answer the user's question comprehensively:
+                  else:
-
+                      raise ConnectionError("Cannot connect to Ollama server")
-User's question: "{user_query_short}"
+              except requests.exceptions.RequestException as e:
-
+                  if attempt == max_retries - 1:
-Scraped Content:
+                      raise ConnectionError(
-{self.format_scraped_content(scraped_content)}
+                          "\nCannot connect to Ollama server!"
-
+                          "\nPlease ensure:"
-Your task:
+                          "\n1. Ollama is installed"
-1. Determine if the scraped content provides enough relevant and detailed information to answer the user's question thoroughly.
+                          "\n2. Ollama server is running (try 'ollama serve')"
-2. If the information is sufficient, decide to 'answer'. If more information or clarification is needed, decide to 'refine' the search.
+                          "\n3. The model specified in llm_config.py is pulled"
-
+                      )
-Respond using EXACTLY this format:
+                  time.sleep(retry_delay)
-Evaluation: [Your evaluation of the scraped content]
+
-Decision: [ONLY 'answer' if content is sufficient, or 'refine' if more information is needed]
+      # Initialize components with output redirection
-"""
+      with OutputRedirector() as output:
-        max_retries = 3
+          llm_wrapper = LLMWrapper()
-        for attempt in range(max_retries):
+          parser = UltimateLLMResponseParser()
-            try:
+          search_engine = EnhancedSelfImprovingSearch(llm_wrapper, parser)
-                response_text = self.llm.generate(prompt, max_tokens=200, stop=None)
+          research_manager = ResearchManager(llm_wrapper, parser, search_engine)
-                evaluation, decision = self.parse_evaluation_response(response_text)
+
-                if decision in ['answer', 'refine']:
+          # Validate LLM
-                    return evaluation, decision
+          test_response = llm_wrapper.generate("Test", max_tokens=10)
-            except Exception as e:
+          if not test_response:
-                logger.warning(f"Error in evaluate_scraped_content (attempt {attempt + 1}): {str(e)}")
+              raise ConnectionError("LLM failed to generate response")
-
+
-        logger.warning("Failed to get a valid decision in evaluate_scraped_content. Defaulting to 'refine'.")
+      print(Fore.GREEN + "System initialized successfully." + Style.RESET_ALL)
-        return "Failed to evaluate content.", "refine"
+      return llm_wrapper, parser, search_engine, research_manager
-
+
-    def parse_evaluation_response(self, response: str) -> Tuple[str, str]:
+  except Exception as e:
-        evaluation = ""
+      logger.error(f"Error initializing system: {str(e)}", exc_info=True)
-        decision = ""
+      print(Fore.RED + f"System initialization failed: {str(e)}" + Style.RESET_ALL)
-        for line in response.strip().split('\n'):
+      return None, None, None, None
-            if line.startswith('Evaluation:'):
+
-                evaluation = line.split(':', 1)[1].strip()
+def handle_search_mode(search_engine, query):
-            elif line.startswith('Decision:'):
+  """Handles web search operations"""
-                decision = line.split(':', 1)[1].strip().lower()
+  print(f"{Fore.CYAN}Initiating web search...{Style.RESET_ALL}")
-        return evaluation, decision
+  try:
-
+      # Change search() to search_and_improve() which is the correct method name
-    def formulate_query(self, user_query: str, attempt: int) -> Tuple[str, str]:
+      results = search_engine.search_and_improve(query)
-        user_query_short = user_query[:200]
+      print(f"\n{Fore.GREEN}Search Results:{Style.RESET_ALL}")
-        prompt = f"""
+      print(results)
-Based on the following user question, formulate a concise and effective search query:
+  except Exception as e:
-"{user_query_short}"
+      logger.error(f"Search error: {str(e)}")
-Your task:
+      print(f"{Fore.RED}Search failed: {str(e)}{Style.RESET_ALL}")
-1. Create a search query of 2-5 words that will yield relevant results.
+
-2. Determine if a specific time range is needed for the search.
+def handle_research_mode(research_manager, query):
-Time range options:
+  """Handles research mode operations"""
- 'd': Limit results to the past day. Use for very recent events or rapidly changing information.
+  print(f"{Fore.CYAN}Initiating research mode...{Style.RESET_ALL}")
- 'w': Limit results to the past week. Use for recent events or topics with frequent updates.
+
- 'm': Limit results to the past month. Use for relatively recent information or ongoing events.
+  try:
- 'y': Limit results to the past year. Use for annual events or information that changes yearly.
+      # Start the research
- 'none': No time limit. Use for historical information or topics not tied to a specific time frame.
+      research_manager.start_research(query)
-Respond in the following format:
+
-Search query: [Your 2-5 word query]
+      submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D"
-Time range: [d/w/m/y/none]
+      print(f"\n{Fore.YELLOW}Research Running. Available Commands:{Style.RESET_ALL}")
-Do not provide any additional information or explanation.
+      print(f"Type command and press {submit_key}:")
-"""
+      print("'s' = Show status")
-        max_retries = 3
+      print("'f' = Show focus")
-        for retry in range(max_retries):
+      print("'q' = Quit research")
-            with OutputRedirector() as output:
+
-                response_text = self.llm.generate(prompt, max_tokens=50, stop=None)
+      while research_manager.is_active():
-            llm_output = output.getvalue()
+          try:
-            logger.info(f"LLM Output in formulate_query:\n{llm_output}")
+              command = get_multiline_input().strip().lower()
-            query, time_range = self.parse_query_response(response_text)
+              if command == 's':
-            if query and time_range:
+                  print("\n" + research_manager.get_progress())
-                return query, time_range
+              elif command == 'f':
-        return self.fallback_query(user_query), "none"
+                  if research_manager.current_focus:
-
+                      print(f"\n{Fore.CYAN}Current Focus:{Style.RESET_ALL}")
-    def parse_query_response(self, response: str) -> Tuple[str, str]:
+                      print(f"Area: {research_manager.current_focus.area}")
-        query = ""
+                      print(f"Priority: {research_manager.current_focus.priority}")
-        time_range = "none"
+                      print(f"Reasoning: {research_manager.current_focus.reasoning}")
-        for line in response.strip().split('\n'):
+                  else:
-            if ":" in line:
+                      print(f"\n{Fore.YELLOW}No current focus area{Style.RESET_ALL}")
-                key, value = line.split(":", 1)
+              elif command == 'q':
-                key = key.strip().lower()
+                  break
-                value = value.strip()
+          except KeyboardInterrupt:
-                if "query" in key:
+              break
-                    query = self.clean_query(value)
+
-                elif "time" in key or "range" in key:
+      # Get final summary first
-                    time_range = self.validate_time_range(value)
+      summary = research_manager.terminate_research()
-        return query, time_range
+
-
+      # Ensure research UI is fully cleaned up
-    def clean_query(self, query: str) -> str:
+      research_manager._cleanup_research_ui()
-        query = re.sub(r'["\'\[\]]', '', query)
+
-        query = re.sub(r'\s+', ' ', query)
+      # Now in main terminal, show summary
-        return query.strip()[:100]
+      print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}")
-
+      print(summary)
-    def validate_time_range(self, time_range: str) -> str:
+
-        valid_ranges = ['d', 'w', 'm', 'y', 'none']
+      # Only NOW start conversation mode if we have a valid summary
-        time_range = time_range.lower()
+      if hasattr(research_manager, 'research_complete') and \
-        return time_range if time_range in valid_ranges else 'none'
+         hasattr(research_manager, 'research_summary') and \
-
+         research_manager.research_complete and \
-    def fallback_query(self, user_query: str) -> str:
+         research_manager.research_summary:
-        words = user_query.split()
+          time.sleep(0.5)  # Small delay to ensure clean transition
-        return " ".join(words[:5])
+          research_manager.start_conversation_mode()
-
+
-    def perform_search(self, query: str, time_range: str) -> List[Dict]:
+      return
-        if not query:
+
-            return []
+  except KeyboardInterrupt:
-
+      print(f"\n{Fore.YELLOW}Research interrupted.{Style.RESET_ALL}")
-        from duckduckgo_search import DDGS
+      research_manager.terminate_research()
-
+  except Exception as e:
-        with DDGS() as ddgs:
+      logger.error(f"Research error: {str(e)}")
-            try:
+      print(f"\n{Fore.RED}Research error: {str(e)}{Style.RESET_ALL}")
-                with OutputRedirector() as output:
+      research_manager.terminate_research()
-                    if time_range and time_range != 'none':
+
-                        results = list(ddgs.text(query, timelimit=time_range, max_results=10))
+def main():
-                    else:
+  init()  # Initialize colorama
-                        results = list(ddgs.text(query, max_results=10))
+  print_header()
-                ddg_output = output.getvalue()
+  
-                logger.info(f"DDG Output in perform_search:\n{ddg_output}")
+  try:
-                return [{'number': i+1, **result} for i, result in enumerate(results)]
+      components = initialize_system()
-            except Exception as e:
+      if not all(components):
-                print(f"{Fore.RED}Search error: {str(e)}{Style.RESET_ALL}")
+          sys.exit(1)
-                return []
+          
-
+      llm, parser, search_engine, research_manager = components
-    def display_search_results(self, results: List[Dict]) -> None:
+
-        """Display search results with minimal output"""
+      while True:
-        try:
+          try:
-            if not results:
+              user_input = get_multiline_input()
-                return
+              
-
+              # Skip empty inputs
-            # Only show search success status
+              if not user_input:
-            print(f"\nSearch query sent to DuckDuckGo: {self.last_query}")
+                  continue
-            print(f"Time range sent to DuckDuckGo: {self.last_time_range}")
+                  
-            print(f"Number of results: {len(results)}")
+              # Handle exit commands
-
+              if user_input.lower() in ["@quit", "quit", "q"]:
-        except Exception as e:
+                  break
-            logger.error(f"Error displaying search results: {str(e)}")
+
-
+              # Handle help command
-    def select_relevant_pages(self, search_results: List[Dict], user_query: str) -> List[str]:
+              if user_input.lower() == 'help':
-        prompt = f"""
+                  print_header()
-Given the following search results for the user's question: "{user_query}"
+                  continue
-Select the 2 most relevant results to scrape and analyze. Explain your reasoning for each selection.
+
-
+              # Process commands
-Search Results:
+              if user_input.startswith('/'):
-{self.format_results(search_results)}
+                  handle_search_mode(search_engine, user_input[1:].strip())
-
+              elif user_input.startswith('@'):
-Instructions:
+                  handle_research_mode(research_manager, user_input[1:].strip())
-1. You MUST select exactly 2 result numbers from the search results.
+              else:
-2. Choose the results that are most likely to contain comprehensive and relevant information to answer the user's question.
+                  print(f"{Fore.YELLOW}Please start with '/' for search or '@' for research.{Style.RESET_ALL}")
-3. Provide a brief reason for each selection.
+
-
+          except KeyboardInterrupt:
-You MUST respond using EXACTLY this format and nothing else:
+              print(f"\n{Fore.YELLOW}Use 'q' to quit or continue with new input.{Style.RESET_ALL}")
-
+              continue
-Selected Results: [Two numbers corresponding to the selected results]
+          except Exception as e:
-Reasoning: [Your reasoning for the selections]
+              logger.error(f"Error processing input: {str(e)}")
-"""
+              print(f"{Fore.RED}Error: {str(e)}{Style.RESET_ALL}")
-
+              continue
-        max_retries = 3
+
-        for retry in range(max_retries):
+  except KeyboardInterrupt:
-            with OutputRedirector() as output:
+      print(f"\n{Fore.YELLOW}Program terminated by user.{Style.RESET_ALL}")
-                response_text = self.llm.generate(prompt, max_tokens=200, stop=None)
+  except Exception as e:
-            llm_output = output.getvalue()
+      logger.critical(f"Critical error: {str(e)}")
-            logger.info(f"LLM Output in select_relevant_pages:\n{llm_output}")
+      print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}")
-
+  finally:
-            parsed_response = self.parse_page_selection_response(response_text)
+      try:
-            if parsed_response and self.validate_page_selection_response(parsed_response, len(search_results)):
+          if 'research_manager' in locals() and research_manager:
-                selected_urls = [result['href'] for result in search_results if result['number'] in parsed_response['selected_results']]
+              research_manager.cleanup()
-
+      except Exception as e:
-                allowed_urls = [url for url in selected_urls if can_fetch(url)]
+          logger.error(f"Cleanup error: {str(e)}")
-                if allowed_urls:
+      print(Fore.YELLOW + "\nGoodbye!" + Style.RESET_ALL)
-                    return allowed_urls
+      sys.exit(0)
-                else:
+
-                    print(f"{Fore.YELLOW}Warning: All selected URLs are disallowed by robots.txt. Retrying selection.{Style.RESET_ALL}")
+if __name__ == "__main__":
-            else:
+  main()
                print(f"{Fore.YELLOW}Warning: Invalid page selection. Retrying.{Style.RESET_ALL}")
        print(f"{Fore.YELLOW}Warning: All attempts to select relevant pages failed. Falling back to top allowed results.{Style.RESET_ALL}")
        allowed_urls = [result['href'] for result in search_results if can_fetch(result['href'])][:2]
        return allowed_urls
    def parse_page_selection_response(self, response: str) -> Dict[str, Union[List[int], str]]:
        lines = response.strip().split('\n')
        parsed = {}
        for line in lines:
            if line.startswith('Selected Results:'):
                parsed['selected_results'] = [int(num.strip()) for num in re.findall(r'\d+', line)]
            elif line.startswith('Reasoning:'):
                parsed['reasoning'] = line.split(':', 1)[1].strip()
        return parsed if 'selected_results' in parsed and 'reasoning' in parsed else None
    def validate_page_selection_response(self, parsed_response: Dict[str, Union[List[int], str]], num_results: int) -> bool:
        if len(parsed_response['selected_results']) != 2:
            return False
        if any(num < 1 or num > num_results for num in parsed_response['selected_results']):
            return False
        return True
    def format_results(self, results: List[Dict]) -> str:
        formatted_results = []
        for result in results:
            formatted_result = f"{result['number']}. Title: {result.get('title', 'N/A')}\n"
            formatted_result += f"   Snippet: {result.get('body', 'N/A')[:200]}...\n"
            formatted_result += f"   URL: {result.get('href', 'N/A')}\n"
            formatted_results.append(formatted_result)
        return "\n".join(formatted_results)
    def scrape_content(self, urls: List[str]) -> Dict[str, str]:
        scraped_content = {}
        blocked_urls = []
        for url in urls:
            robots_allowed = can_fetch(url)
            if robots_allowed:
                content = get_web_content([url])
                if content:
                    scraped_content.update(content)
                    print(Fore.YELLOW + f"Successfully scraped: {url}" + Style.RESET_ALL)
                    logger.info(f"Successfully scraped: {url}")
                else:
                    print(Fore.RED + f"Robots.txt disallows scraping of {url}" + Style.RESET_ALL)
                    logger.warning(f"Robots.txt disallows scraping of {url}")
            else:
                blocked_urls.append(url)
                print(Fore.RED + f"Warning: Robots.txt disallows scraping of {url}" + Style.RESET_ALL)
                logger.warning(f"Robots.txt disallows scraping of {url}")
        print(Fore.CYAN + f"Scraped content received for {len(scraped_content)} URLs" + Style.RESET_ALL)
        logger.info(f"Scraped content received for {len(scraped_content)} URLs")
        if blocked_urls:
            print(Fore.RED + f"Warning: {len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions." + Style.RESET_ALL)
            logger.warning(f"{len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions: {', '.join(blocked_urls)}")
        return scraped_content
    def display_scraped_content(self, scraped_content: Dict[str, str]):
        print(f"\n{Fore.CYAN}Scraped Content:{Style.RESET_ALL}")
        for url, content in scraped_content.items():
            print(f"{Fore.GREEN}URL: {url}{Style.RESET_ALL}")
            print(f"Content: {content[:4000]}...\n")
    def generate_final_answer(self, user_query: str, scraped_content: Dict[str, str]) -> str:
        user_query_short = user_query[:200]
        prompt = f"""
 You are an AI assistant. Provide a comprehensive and detailed answer to the following question using ONLY the information provided in the scraped content. Do not include any references or mention any sources. Answer directly and thoroughly.
 Question: "{user_query_short}"
 Scraped Content:
 {self.format_scraped_content(scraped_content)}
 Important Instructions:
 1. Do not use phrases like "Based on the absence of selected results" or similar.
 2. If the scraped content does not contain enough information to answer the question, say so explicitly and explain what information is missing.
 3. Provide as much relevant detail as possible from the scraped content.
 Answer:
 """
        max_retries = 3
        for attempt in range(max_retries):
            with OutputRedirector() as output:
                response_text = self.llm.generate(prompt, max_tokens=1024, stop=None)
            llm_output = output.getvalue()
            logger.info(f"LLM Output in generate_final_answer:\n{llm_output}")
            if response_text:
                logger.info(f"LLM Response:\n{response_text}")
                return response_text
        error_message = "I apologize, but I couldn't generate a satisfactory answer based on the available information."
        logger.warning(f"Failed to generate a response after {max_retries} attempts. Returning error message.")
        return error_message
    def format_scraped_content(self, scraped_content: Dict[str, str]) -> str:
        formatted_content = []
        for url, content in scraped_content.items():
            content = re.sub(r'\s+', ' ', content)
            formatted_content.append(f"Content from {url}:\n{content}\n")
        return "\n".join(formatted_content)
    def synthesize_final_answer(self, user_query: str) -> str:
        prompt = f"""
 After multiple search attempts, we couldn't find a fully satisfactory answer to the user's question: "{user_query}"
 Please provide the best possible answer you can, acknowledging any limitations or uncertainties.
 If appropriate, suggest ways the user might refine their question or where they might find more information.
 Respond in a clear, concise, and informative manner.
 """
        try:
            with OutputRedirector() as output:
                response_text = self.llm.generate(prompt, max_tokens=self.llm_config.get('max_tokens', 1024), stop=self.llm_config.get('stop', None))
            llm_output = output.getvalue()
            logger.info(f"LLM Output in synthesize_final_answer:\n{llm_output}")
            if response_text:
                return response_text.strip()
        except Exception as e:
            logger.error(f"Error in synthesize_final_answer: {str(e)}", exc_info=True)
        return "I apologize, but after multiple attempts, I wasn't able to find a satisfactory answer to your question. Please try rephrasing your question or breaking it down into smaller, more specific queries."
 # End of EnhancedSelfImprovingSearch class