mirror of
https://github.com/TheBlewish/Automated-AI-Web-Researcher-Ollama.git
synced 2025-01-19 00:47:46 +00:00
Update Self_Improving_Search.py for windos
This commit is contained in:
parent
b63eb97037
commit
df2c6ac39b
|
@ -1,22 +1,26 @@
|
||||||
import sys
|
|
||||||
import msvcrt
|
|
||||||
import os
|
|
||||||
from colorama import init, Fore, Style
|
|
||||||
import logging
|
|
||||||
import time
|
import time
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
from typing import List, Dict, Tuple, Union, Optional
|
||||||
|
from colorama import Fore, Style, init
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
from web_scraper import get_web_content, can_fetch
|
||||||
from llm_config import get_llm_config
|
from llm_config import get_llm_config
|
||||||
from llm_response_parser import UltimateLLMResponseParser
|
from llm_response_parser import UltimateLLMResponseParser
|
||||||
from llm_wrapper import LLMWrapper
|
from llm_wrapper import LLMWrapper
|
||||||
from strategic_analysis_parser import StrategicAnalysisParser
|
from urllib.parse import urlparse, quote_plus
|
||||||
from research_manager import ResearchManager
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import threading
|
||||||
|
from queue import Queue
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
# Initialize colorama
|
# Initialize colorama
|
||||||
if os.name != 'nt':
|
init()
|
||||||
print("This version is Windows-specific. Please use the Unix version for other operating systems.")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
init() # Initialize colorama
|
|
||||||
|
|
||||||
# Set up logging
|
# Set up logging
|
||||||
log_directory = 'logs'
|
log_directory = 'logs'
|
||||||
|
@ -25,289 +29,347 @@ if not os.path.exists(log_directory):
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
log_file = os.path.join(log_directory, 'web_llm.log')
|
log_file = os.path.join(log_directory, 'search.log')
|
||||||
file_handler = logging.FileHandler(log_file)
|
file_handler = logging.FileHandler(log_file)
|
||||||
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||||
file_handler.setFormatter(formatter)
|
file_handler.setFormatter(formatter)
|
||||||
logger.handlers = []
|
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
logger.propagate = False
|
|
||||||
|
|
||||||
# Disable other loggers
|
class SearchResult:
|
||||||
for name in logging.root.manager.loggerDict:
|
def __init__(self, title: str, url: str, snippet: str, score: float = 0.0):
|
||||||
if name != __name__:
|
self.title = title
|
||||||
logging.getLogger(name).disabled = True
|
self.url = url
|
||||||
|
self.snippet = snippet
|
||||||
|
self.score = score
|
||||||
|
self.content: Optional[str] = None
|
||||||
|
self.processed = False
|
||||||
|
self.error = None
|
||||||
|
|
||||||
class OutputRedirector:
|
def to_dict(self) -> Dict:
|
||||||
def __init__(self, stream=None):
|
return {
|
||||||
self.stream = stream or StringIO()
|
'title': self.title,
|
||||||
self.original_stdout = sys.stdout
|
'url': self.url,
|
||||||
self.original_stderr = sys.stderr
|
'snippet': self.snippet,
|
||||||
|
'score': self.score,
|
||||||
|
'has_content': bool(self.content),
|
||||||
|
'processed': self.processed,
|
||||||
|
'error': str(self.error) if self.error else None
|
||||||
|
}
|
||||||
|
|
||||||
def __enter__(self):
|
class EnhancedSelfImprovingSearch:
|
||||||
sys.stdout = self.stream
|
def __init__(self, llm: LLMWrapper, parser: UltimateLLMResponseParser, max_attempts: int = 5):
|
||||||
sys.stderr = self.stream
|
self.llm = llm
|
||||||
return self.stream
|
self.parser = parser
|
||||||
|
self.max_attempts = max_attempts
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
self.llm_config = get_llm_config()
|
||||||
sys.stdout = self.original_stdout
|
self.last_query = ""
|
||||||
sys.stderr = self.original_stderr
|
self.last_time_range = ""
|
||||||
|
self.search_cache = {}
|
||||||
def print_header():
|
self.content_cache = {}
|
||||||
print(Fore.CYAN + Style.BRIGHT + """
|
self.max_cache_size = 100
|
||||||
╔══════════════════════════════════════════════════════════╗
|
self.max_concurrent_requests = 5
|
||||||
║ 🌐 Advanced Research Assistant 🤖 ║
|
self.request_timeout = 15
|
||||||
╚══════════════════════════════════════════════════════════╝
|
self.headers = {
|
||||||
""" + Style.RESET_ALL)
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||||
print(Fore.YELLOW + """
|
}
|
||||||
Welcome to the Advanced Research Assistant!
|
|
||||||
|
|
||||||
Commands:
|
|
||||||
- For web search: start message with '/'
|
|
||||||
Example: "/latest news on AI advancements"
|
|
||||||
|
|
||||||
- For research mode: start message with '@'
|
|
||||||
Example: "@analyze the impact of AI on healthcare"
|
|
||||||
|
|
||||||
Press CTRL+Z to submit input.
|
|
||||||
""" + Style.RESET_ALL)
|
|
||||||
|
|
||||||
def get_multiline_input() -> str:
|
|
||||||
"""Windows-compatible multiline input handler with improved reliability"""
|
|
||||||
print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+Z to submit):{Style.RESET_ALL}")
|
|
||||||
lines = []
|
|
||||||
current_line = ""
|
|
||||||
|
|
||||||
|
def search_and_improve(self, query: str, time_range: str = "auto") -> str:
|
||||||
|
"""Main search method that includes self-improvement"""
|
||||||
try:
|
try:
|
||||||
while True:
|
logger.info(f"Starting search for query: {query}")
|
||||||
if msvcrt.kbhit():
|
self.last_query = query
|
||||||
char = msvcrt.getch()
|
self.last_time_range = time_range
|
||||||
|
|
||||||
# Convert bytes to string for comparison
|
# Check cache first
|
||||||
char_code = ord(char)
|
cache_key = f"{query}_{time_range}"
|
||||||
|
if cache_key in self.search_cache:
|
||||||
|
logger.info("Returning cached results")
|
||||||
|
return self.search_cache[cache_key]
|
||||||
|
|
||||||
# CTRL+Z detection (Windows EOF)
|
# Perform initial search
|
||||||
if char_code == 26: # ASCII code for CTRL+Z
|
results = self.perform_search(query, time_range)
|
||||||
print() # New line
|
if not results:
|
||||||
if current_line:
|
return "No results found."
|
||||||
lines.append(current_line)
|
|
||||||
return ' '.join(lines).strip() or "q"
|
|
||||||
|
|
||||||
# Enter key
|
# Enhance results with content fetching
|
||||||
elif char in [b'\r', b'\n']:
|
enhanced_results = self.enhance_search_results(results)
|
||||||
print() # New line
|
|
||||||
lines.append(current_line)
|
|
||||||
current_line = ""
|
|
||||||
|
|
||||||
# Backspace
|
# Generate improved summary
|
||||||
elif char_code == 8: # ASCII code for backspace
|
summary = self.generate_enhanced_summary(enhanced_results, query)
|
||||||
if current_line:
|
|
||||||
current_line = current_line[:-1]
|
|
||||||
print('\b \b', end='', flush=True)
|
|
||||||
|
|
||||||
# Regular character input
|
# Cache the results
|
||||||
elif 32 <= char_code <= 126: # Printable ASCII range
|
self.cache_results(cache_key, summary)
|
||||||
try:
|
|
||||||
char_str = char.decode('utf-8')
|
|
||||||
current_line += char_str
|
|
||||||
print(char_str, end='', flush=True)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
time.sleep(0.01) # Prevent high CPU usage
|
return summary
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\nInput interrupted")
|
|
||||||
return "q"
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Input error: {str(e)}")
|
logger.error(f"Search and improve error: {str(e)}", exc_info=True)
|
||||||
return "q"
|
return f"Error during search: {str(e)}"
|
||||||
|
|
||||||
def initialize_system():
|
def perform_search(self, query: str, time_range: str) -> List[SearchResult]:
|
||||||
"""Initialize system with enhanced error checking and recovery"""
|
"""Performs web search with improved error handling and retry logic"""
|
||||||
|
if not query:
|
||||||
|
return []
|
||||||
|
|
||||||
|
results = []
|
||||||
|
retries = 3
|
||||||
|
delay = 2
|
||||||
|
|
||||||
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
print(Fore.YELLOW + "Initializing system..." + Style.RESET_ALL)
|
encoded_query = quote_plus(query)
|
||||||
|
search_url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
|
||||||
|
|
||||||
# Load configuration
|
response = requests.get(search_url, headers=self.headers, timeout=self.request_timeout)
|
||||||
llm_config = get_llm_config()
|
response.raise_for_status()
|
||||||
|
|
||||||
# Validate Ollama connection
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
if llm_config['llm_type'] == 'ollama':
|
|
||||||
import requests
|
|
||||||
max_retries = 3
|
|
||||||
retry_delay = 2
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
for i, result in enumerate(soup.select('.result'), 1):
|
||||||
try:
|
if i > 15: # Increased limit for better coverage
|
||||||
response = requests.get(llm_config['base_url'], timeout=5)
|
|
||||||
if response.status_code == 200:
|
|
||||||
break
|
break
|
||||||
elif attempt < max_retries - 1:
|
|
||||||
print(f"{Fore.YELLOW}Retrying Ollama connection ({attempt + 1}/{max_retries})...{Style.RESET_ALL}")
|
title_elem = result.select_one('.result__title')
|
||||||
time.sleep(retry_delay)
|
snippet_elem = result.select_one('.result__snippet')
|
||||||
|
link_elem = result.select_one('.result__url')
|
||||||
|
|
||||||
|
if title_elem and link_elem:
|
||||||
|
title = title_elem.get_text(strip=True)
|
||||||
|
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
|
||||||
|
url = link_elem.get('href', '')
|
||||||
|
|
||||||
|
# Basic result scoring
|
||||||
|
score = self.calculate_result_score(title, snippet, query)
|
||||||
|
|
||||||
|
results.append(SearchResult(title, url, snippet, score))
|
||||||
|
|
||||||
|
if results:
|
||||||
|
# Sort results by score
|
||||||
|
results.sort(key=lambda x: x.score, reverse=True)
|
||||||
|
return results
|
||||||
|
|
||||||
|
if attempt < retries - 1:
|
||||||
|
logger.warning(f"No results found, retrying ({attempt + 1}/{retries})...")
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Search attempt {attempt + 1} failed: {str(e)}")
|
||||||
|
if attempt < retries - 1:
|
||||||
|
time.sleep(delay)
|
||||||
else:
|
else:
|
||||||
raise ConnectionError("Cannot connect to Ollama server")
|
raise
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
if attempt == max_retries - 1:
|
|
||||||
raise ConnectionError(
|
|
||||||
"\nCannot connect to Ollama server!"
|
|
||||||
"\nPlease ensure:"
|
|
||||||
"\n1. Ollama is installed"
|
|
||||||
"\n2. Ollama server is running (try 'ollama serve')"
|
|
||||||
"\n3. The model specified in llm_config.py is pulled"
|
|
||||||
)
|
|
||||||
time.sleep(retry_delay)
|
|
||||||
|
|
||||||
# Initialize components with output redirection
|
return results
|
||||||
with OutputRedirector() as output:
|
|
||||||
llm_wrapper = LLMWrapper()
|
|
||||||
parser = UltimateLLMResponseParser()
|
|
||||||
search_engine = EnhancedSelfImprovingSearch(llm_wrapper, parser)
|
|
||||||
research_manager = ResearchManager(llm_wrapper, parser, search_engine)
|
|
||||||
|
|
||||||
# Validate LLM
|
def calculate_result_score(self, title: str, snippet: str, query: str) -> float:
|
||||||
test_response = llm_wrapper.generate("Test", max_tokens=10)
|
"""Calculate relevance score for search result"""
|
||||||
if not test_response:
|
score = 0.0
|
||||||
raise ConnectionError("LLM failed to generate response")
|
query_terms = query.lower().split()
|
||||||
|
|
||||||
print(Fore.GREEN + "System initialized successfully." + Style.RESET_ALL)
|
# Title matching
|
||||||
return llm_wrapper, parser, search_engine, research_manager
|
title_lower = title.lower()
|
||||||
|
for term in query_terms:
|
||||||
|
if term in title_lower:
|
||||||
|
score += 2.0
|
||||||
|
|
||||||
|
# Snippet matching
|
||||||
|
snippet_lower = snippet.lower()
|
||||||
|
for term in query_terms:
|
||||||
|
if term in snippet_lower:
|
||||||
|
score += 1.0
|
||||||
|
|
||||||
|
# Exact phrase matching
|
||||||
|
if query.lower() in title_lower:
|
||||||
|
score += 3.0
|
||||||
|
if query.lower() in snippet_lower:
|
||||||
|
score += 1.5
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
def enhance_search_results(self, results: List[SearchResult]) -> List[SearchResult]:
|
||||||
|
"""Enhance search results with parallel content fetching"""
|
||||||
|
enhanced_results = []
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_concurrent_requests) as executor:
|
||||||
|
future_to_result = {
|
||||||
|
executor.submit(self.fetch_and_process_content, result): result
|
||||||
|
for result in results[:10] # Limit to top 10 results
|
||||||
|
}
|
||||||
|
|
||||||
|
for future in concurrent.futures.as_completed(future_to_result):
|
||||||
|
result = future_to_result[future]
|
||||||
|
try:
|
||||||
|
content = future.result()
|
||||||
|
if content:
|
||||||
|
result.content = content
|
||||||
|
result.processed = True
|
||||||
|
enhanced_results.append(result)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing {result.url}: {str(e)}")
|
||||||
|
result.error = e
|
||||||
|
|
||||||
|
return enhanced_results
|
||||||
|
|
||||||
|
def fetch_and_process_content(self, result: SearchResult) -> Optional[str]:
|
||||||
|
"""Fetch and process content for a search result"""
|
||||||
|
try:
|
||||||
|
# Check cache first
|
||||||
|
if result.url in self.content_cache:
|
||||||
|
return self.content_cache[result.url]
|
||||||
|
|
||||||
|
# Check if we can fetch the content
|
||||||
|
if not can_fetch(result.url):
|
||||||
|
logger.warning(f"Cannot fetch content from {result.url}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = get_web_content(result.url)
|
||||||
|
if content:
|
||||||
|
# Process and clean content
|
||||||
|
cleaned_content = self.clean_content(content)
|
||||||
|
|
||||||
|
# Cache the content
|
||||||
|
self.cache_content(result.url, cleaned_content)
|
||||||
|
|
||||||
|
return cleaned_content
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error initializing system: {str(e)}", exc_info=True)
|
logger.error(f"Error fetching content from {result.url}: {str(e)}")
|
||||||
print(Fore.RED + f"System initialization failed: {str(e)}" + Style.RESET_ALL)
|
return None
|
||||||
return None, None, None, None
|
|
||||||
|
|
||||||
def handle_search_mode(search_engine, query):
|
def clean_content(self, content: str) -> str:
|
||||||
"""Handles web search operations"""
|
"""Clean and normalize web content"""
|
||||||
print(f"{Fore.CYAN}Initiating web search...{Style.RESET_ALL}")
|
# Remove HTML tags if any remained
|
||||||
|
content = re.sub(r'<[^>]+>', '', content)
|
||||||
|
|
||||||
|
# Remove extra whitespace
|
||||||
|
content = re.sub(r'\s+', ' ', content)
|
||||||
|
|
||||||
|
# Remove special characters
|
||||||
|
content = re.sub(r'[^\w\s.,!?-]', '', content)
|
||||||
|
|
||||||
|
# Truncate if too long
|
||||||
|
max_length = 5000
|
||||||
|
if len(content) > max_length:
|
||||||
|
content = content[:max_length] + "..."
|
||||||
|
|
||||||
|
return content.strip()
|
||||||
|
|
||||||
|
def generate_enhanced_summary(self, results: List[SearchResult], query: str) -> str:
|
||||||
|
"""Generate an enhanced summary using LLM with improved context"""
|
||||||
try:
|
try:
|
||||||
# Change search() to search_and_improve() which is the correct method name
|
# Prepare context from enhanced results
|
||||||
results = search_engine.search_and_improve(query)
|
context = self.prepare_summary_context(results, query)
|
||||||
print(f"\n{Fore.GREEN}Search Results:{Style.RESET_ALL}")
|
|
||||||
print(results)
|
prompt = f"""
|
||||||
|
Based on the following comprehensive search results for "{query}",
|
||||||
|
provide a detailed analysis that:
|
||||||
|
1. Synthesizes key information from multiple sources
|
||||||
|
2. Highlights important findings and patterns
|
||||||
|
3. Maintains factual accuracy and cites sources
|
||||||
|
4. Presents a balanced view of different perspectives
|
||||||
|
5. Identifies any gaps or limitations in the available information
|
||||||
|
|
||||||
|
Context:
|
||||||
|
{context}
|
||||||
|
|
||||||
|
Please provide a well-structured analysis:
|
||||||
|
"""
|
||||||
|
|
||||||
|
summary = self.llm.generate(prompt, max_tokens=1500)
|
||||||
|
return self.format_summary(summary)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Search error: {str(e)}")
|
logger.error(f"Summary generation error: {str(e)}")
|
||||||
print(f"{Fore.RED}Search failed: {str(e)}{Style.RESET_ALL}")
|
return f"Error generating summary: {str(e)}"
|
||||||
|
|
||||||
def handle_research_mode(research_manager, query):
|
def prepare_summary_context(self, results: List[SearchResult], query: str) -> str:
|
||||||
"""Handles research mode operations"""
|
"""Prepare context for summary generation"""
|
||||||
print(f"{Fore.CYAN}Initiating research mode...{Style.RESET_ALL}")
|
context = f"Query: {query}\n\n"
|
||||||
|
|
||||||
try:
|
for i, result in enumerate(results, 1):
|
||||||
# Start the research
|
context += f"Source {i}:\n"
|
||||||
research_manager.start_research(query)
|
context += f"Title: {result.title}\n"
|
||||||
|
context += f"URL: {result.url}\n"
|
||||||
|
|
||||||
submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D"
|
if result.content:
|
||||||
print(f"\n{Fore.YELLOW}Research Running. Available Commands:{Style.RESET_ALL}")
|
# Include relevant excerpts from content
|
||||||
print(f"Type command and press {submit_key}:")
|
excerpts = self.extract_relevant_excerpts(result.content, query)
|
||||||
print("'s' = Show status")
|
context += f"Key Excerpts:\n{excerpts}\n"
|
||||||
print("'f' = Show focus")
|
|
||||||
print("'q' = Quit research")
|
|
||||||
|
|
||||||
while research_manager.is_active():
|
|
||||||
try:
|
|
||||||
command = get_multiline_input().strip().lower()
|
|
||||||
if command == 's':
|
|
||||||
print("\n" + research_manager.get_progress())
|
|
||||||
elif command == 'f':
|
|
||||||
if research_manager.current_focus:
|
|
||||||
print(f"\n{Fore.CYAN}Current Focus:{Style.RESET_ALL}")
|
|
||||||
print(f"Area: {research_manager.current_focus.area}")
|
|
||||||
print(f"Priority: {research_manager.current_focus.priority}")
|
|
||||||
print(f"Reasoning: {research_manager.current_focus.reasoning}")
|
|
||||||
else:
|
else:
|
||||||
print(f"\n{Fore.YELLOW}No current focus area{Style.RESET_ALL}")
|
context += f"Summary: {result.snippet}\n"
|
||||||
elif command == 'q':
|
|
||||||
break
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get final summary first
|
context += "\n"
|
||||||
summary = research_manager.terminate_research()
|
|
||||||
|
|
||||||
# Ensure research UI is fully cleaned up
|
return context
|
||||||
research_manager._cleanup_research_ui()
|
|
||||||
|
|
||||||
# Now in main terminal, show summary
|
def extract_relevant_excerpts(self, content: str, query: str, max_excerpts: int = 3) -> str:
|
||||||
print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}")
|
"""Extract relevant excerpts from content"""
|
||||||
print(summary)
|
sentences = re.split(r'[.!?]+', content)
|
||||||
|
scored_sentences = []
|
||||||
|
|
||||||
# Only NOW start conversation mode if we have a valid summary
|
query_terms = set(query.lower().split())
|
||||||
if hasattr(research_manager, 'research_complete') and \
|
|
||||||
hasattr(research_manager, 'research_summary') and \
|
|
||||||
research_manager.research_complete and \
|
|
||||||
research_manager.research_summary:
|
|
||||||
time.sleep(0.5) # Small delay to ensure clean transition
|
|
||||||
research_manager.start_conversation_mode()
|
|
||||||
|
|
||||||
return
|
for sentence in sentences:
|
||||||
|
sentence = sentence.strip()
|
||||||
except KeyboardInterrupt:
|
if not sentence:
|
||||||
print(f"\n{Fore.YELLOW}Research interrupted.{Style.RESET_ALL}")
|
|
||||||
research_manager.terminate_research()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Research error: {str(e)}")
|
|
||||||
print(f"\n{Fore.RED}Research error: {str(e)}{Style.RESET_ALL}")
|
|
||||||
research_manager.terminate_research()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
init() # Initialize colorama
|
|
||||||
print_header()
|
|
||||||
|
|
||||||
try:
|
|
||||||
components = initialize_system()
|
|
||||||
if not all(components):
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
llm, parser, search_engine, research_manager = components
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
user_input = get_multiline_input()
|
|
||||||
|
|
||||||
# Skip empty inputs
|
|
||||||
if not user_input:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Handle exit commands
|
score = sum(1 for term in query_terms if term in sentence.lower())
|
||||||
if user_input.lower() in ["@quit", "quit", "q"]:
|
if score > 0:
|
||||||
break
|
scored_sentences.append((sentence, score))
|
||||||
|
|
||||||
# Handle help command
|
# Sort by relevance score and take top excerpts
|
||||||
if user_input.lower() == 'help':
|
scored_sentences.sort(key=lambda x: x[1], reverse=True)
|
||||||
print_header()
|
excerpts = [sentence for sentence, _ in scored_sentences[:max_excerpts]]
|
||||||
continue
|
|
||||||
|
|
||||||
# Process commands
|
return "\n".join(f"- {excerpt}" for excerpt in excerpts)
|
||||||
if user_input.startswith('/'):
|
|
||||||
handle_search_mode(search_engine, user_input[1:].strip())
|
|
||||||
elif user_input.startswith('@'):
|
|
||||||
handle_research_mode(research_manager, user_input[1:].strip())
|
|
||||||
else:
|
|
||||||
print(f"{Fore.YELLOW}Please start with '/' for search or '@' for research.{Style.RESET_ALL}")
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
def format_summary(self, summary: str) -> str:
|
||||||
print(f"\n{Fore.YELLOW}Use 'q' to quit or continue with new input.{Style.RESET_ALL}")
|
"""Format the final summary for better readability"""
|
||||||
continue
|
# Add section headers if not present
|
||||||
except Exception as e:
|
if not re.search(r'^Key Findings:', summary, re.MULTILINE):
|
||||||
logger.error(f"Error processing input: {str(e)}")
|
summary = "Key Findings:\n" + summary
|
||||||
print(f"{Fore.RED}Error: {str(e)}{Style.RESET_ALL}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
# Add source attribution if not present
|
||||||
print(f"\n{Fore.YELLOW}Program terminated by user.{Style.RESET_ALL}")
|
if not re.search(r'^Sources:', summary, re.MULTILINE):
|
||||||
except Exception as e:
|
summary += "\n\nSources: Based on analysis of search results"
|
||||||
logger.critical(f"Critical error: {str(e)}")
|
|
||||||
print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}")
|
# Add formatting
|
||||||
finally:
|
summary = summary.replace('Key Findings:', f"{Fore.CYAN}Key Findings:{Style.RESET_ALL}")
|
||||||
try:
|
summary = summary.replace('Sources:', f"\n{Fore.CYAN}Sources:{Style.RESET_ALL}")
|
||||||
if 'research_manager' in locals() and research_manager:
|
|
||||||
research_manager.cleanup()
|
return summary
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Cleanup error: {str(e)}")
|
def cache_results(self, key: str, value: str) -> None:
|
||||||
print(Fore.YELLOW + "\nGoodbye!" + Style.RESET_ALL)
|
"""Cache search results with size limit"""
|
||||||
sys.exit(0)
|
if len(self.search_cache) >= self.max_cache_size:
|
||||||
|
# Remove oldest entry
|
||||||
|
oldest_key = next(iter(self.search_cache))
|
||||||
|
del self.search_cache[oldest_key]
|
||||||
|
|
||||||
|
self.search_cache[key] = value
|
||||||
|
|
||||||
|
def cache_content(self, url: str, content: str) -> None:
|
||||||
|
"""Cache web content with size limit"""
|
||||||
|
if len(self.content_cache) >= self.max_cache_size:
|
||||||
|
# Remove oldest entry
|
||||||
|
oldest_key = next(iter(self.content_cache))
|
||||||
|
del self.content_cache[oldest_key]
|
||||||
|
|
||||||
|
self.content_cache[url] = content
|
||||||
|
|
||||||
|
def clear_cache(self) -> None:
|
||||||
|
"""Clear all caches"""
|
||||||
|
self.search_cache.clear()
|
||||||
|
self.content_cache.clear()
|
||||||
|
|
||||||
|
def get_last_query(self) -> str:
|
||||||
|
"""Returns the last executed query"""
|
||||||
|
return self.last_query
|
||||||
|
|
||||||
|
def get_last_time_range(self) -> str:
|
||||||
|
"""Returns the last used time range"""
|
||||||
|
return self.last_time_range
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
pass
|
||||||
|
|
Loading…
Reference in a new issue