diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py index 64ac7486..c48de3f5 100644 --- a/backend/app/utils/agent.py +++ b/backend/app/utils/agent.py @@ -823,13 +823,19 @@ Your capabilities include: Your approach depends on available search tools: -**If Google Search is Available:** -- Initial Search: Start with `search_google` to get a list of relevant URLs +**If Search Tools are Available:** +- Initial Search: Start with `search_google` or `search_exa` to get a list of relevant URLs - Browser-Based Exploration: Use the browser tools to investigate the URLs -**If Google Search is NOT Available:** -- **MUST start with direct website search**: Use `browser_visit_page` to go - directly to popular search engines and informational websites such as: +**If Search Tools are NOT Available:** +- **RECOMMENDED: Use Brave Search**: Navigate to `https://search.brave.com/` + using `browser_visit_page`. Brave Search is highly recommended as it: + * Works well without requiring login + * Has fewer CAPTCHA challenges compared to other search engines + * Provides clean, relevant search results + * Supports advanced search queries +- **Alternative Search Engines**: If Brave Search is unavailable, use + `browser_visit_page` to go directly to other popular search engines: * General search: google.com, bing.com, duckduckgo.com * Academic: scholar.google.com, pubmed.ncbi.nlm.nih.gov * News: news.google.com, bbc.com/news, reuters.com @@ -839,8 +845,6 @@ Your approach depends on available search tools: sites using `browser_type` and submit with `browser_enter` - **Extract URLs from results**: Only use URLs that appear in the search results on these websites -- **Alternative Search**: If available, use `search_exa` for additional - results **Common Browser Operations (both scenarios):** - **Navigation and Exploration**: Use `browser_visit_page` to open URLs. diff --git a/backend/app/utils/toolkit/search_toolkit.py b/backend/app/utils/toolkit/search_toolkit.py index 2da2fe13..bc4d9915 100644 --- a/backend/app/utils/toolkit/search_toolkit.py +++ b/backend/app/utils/toolkit/search_toolkit.py @@ -51,19 +51,19 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit): @listen_toolkit( BaseSearchToolkit.search_google, - lambda _, query, search_type="web": f"with query '{query}' and {search_type} result pages", + lambda _, query, search_type="web", number_of_result_pages=10, start_page=1: f"with query '{query}', search_type '{search_type}', {number_of_result_pages} result pages, starting from page {start_page}", ) - def search_google(self, query: str, search_type: str = "web") -> list[dict[str, Any]]: + def search_google(self, query: str, search_type: str = "web", number_of_result_pages: int = 10, start_page: int = 1) -> list[dict[str, Any]]: if env("GOOGLE_API_KEY") and env("SEARCH_ENGINE_ID"): - return super().search_google(query, search_type) + return super().search_google(query, search_type, number_of_result_pages, start_page) else: - return self.cloud_search_google(query, search_type) + return self.cloud_search_google(query, search_type, number_of_result_pages, start_page) - def cloud_search_google(self, query: str, search_type): + def cloud_search_google(self, query: str, search_type: str, number_of_result_pages: int = 10, start_page: int = 1): url = env_not_empty("SERVER_URL") res = httpx.get( url + "/proxy/google", - params={"query": query, "search_type": search_type}, + params={"query": query, "search_type": search_type, "number_of_result_pages": number_of_result_pages, "start_page": start_page}, headers={"api-key": env_not_empty("cloud_api_key")}, ) return res.json() @@ -185,12 +185,13 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit): exclude_text: List[str] | None = None, use_autoprompt: bool = True, text: bool = False, + number_of_result_pages: int = 10, ) -> Dict[str, Any]: if env("EXA_API_KEY"): - res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text) + res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages) return res else: - return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text) + return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages) def cloud_search_exa( self, @@ -212,6 +213,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit): exclude_text: List[str] | None = None, use_autoprompt: bool = True, text: bool = False, + number_of_result_pages: int = 10, ): url = env_not_empty("SERVER_URL") logger.debug(f">>>>>>>>>>>>>>>>{url}<<<<") @@ -225,6 +227,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit): "exclude_text": exclude_text, "use_autoprompt": use_autoprompt, "text": text, + "number_of_result_pages": number_of_result_pages, }, headers={"api-key": env_not_empty("cloud_api_key")}, )