update

2026-05-30 03:35:54 +00:00 · 2025-10-12 14:43:19 +01:00 · 2025-10-12 14:43:19 +01:00 · 5ee5020bbe
commit 5ee5020bbe
parent 70f7bb035b
2 changed files with 22 additions and 15 deletions
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@ -823,13 +823,19 @@ Your capabilities include:
 <web_search_workflow>
 Your approach depends on available search tools:

-**If Google Search is Available:**
- Initial Search: Start with `search_google` to get a list of relevant URLs
+**If Search Tools are Available:**
+- Initial Search: Start with `search_google` or `search_exa` to get a list of relevant URLs
 - Browser-Based Exploration: Use the browser tools to investigate the URLs

-**If Google Search is NOT Available:**
- **MUST start with direct website search**: Use `browser_visit_page` to go
-  directly to popular search engines and informational websites such as:
+**If Search Tools are NOT Available:**
+- **RECOMMENDED: Use Brave Search**: Navigate to `https://search.brave.com/`
+  using `browser_visit_page`. Brave Search is highly recommended as it:
+  * Works well without requiring login
+  * Has fewer CAPTCHA challenges compared to other search engines
+  * Provides clean, relevant search results
+  * Supports advanced search queries
+- **Alternative Search Engines**: If Brave Search is unavailable, use
+  `browser_visit_page` to go directly to other popular search engines:
  * General search: google.com, bing.com, duckduckgo.com
  * Academic: scholar.google.com, pubmed.ncbi.nlm.nih.gov
  * News: news.google.com, bbc.com/news, reuters.com
@ -839,8 +845,6 @@ Your approach depends on available search tools:
  sites using `browser_type` and submit with `browser_enter`
 - **Extract URLs from results**: Only use URLs that appear in the search
  results on these websites
- **Alternative Search**: If available, use `search_exa` for additional
-  results

 **Common Browser Operations (both scenarios):**
 - **Navigation and Exploration**: Use `browser_visit_page` to open URLs.
--- a/backend/app/utils/toolkit/search_toolkit.py
+++ b/backend/app/utils/toolkit/search_toolkit.py
@ -51,19 +51,19 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):

    @listen_toolkit(
        BaseSearchToolkit.search_google,
-        lambda _, query, search_type="web": f"with query '{query}' and {search_type} result pages",
+        lambda _, query, search_type="web", number_of_result_pages=10, start_page=1: f"with query '{query}', search_type '{search_type}', {number_of_result_pages} result pages, starting from page {start_page}",
    )
-    def search_google(self, query: str, search_type: str = "web") -> list[dict[str, Any]]:
+    def search_google(self, query: str, search_type: str = "web", number_of_result_pages: int = 10, start_page: int = 1) -> list[dict[str, Any]]:
        if env("GOOGLE_API_KEY") and env("SEARCH_ENGINE_ID"):
-            return super().search_google(query, search_type)
+            return super().search_google(query, search_type, number_of_result_pages, start_page)
        else:
-            return self.cloud_search_google(query, search_type)
+            return self.cloud_search_google(query, search_type, number_of_result_pages, start_page)

-    def cloud_search_google(self, query: str, search_type):
+    def cloud_search_google(self, query: str, search_type: str, number_of_result_pages: int = 10, start_page: int = 1):
        url = env_not_empty("SERVER_URL")
        res = httpx.get(
            url + "/proxy/google",
-            params={"query": query, "search_type": search_type},
+            params={"query": query, "search_type": search_type, "number_of_result_pages": number_of_result_pages, "start_page": start_page},
            headers={"api-key": env_not_empty("cloud_api_key")},
        )
        return res.json()
@ -185,12 +185,13 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
        exclude_text: List[str] | None = None,
        use_autoprompt: bool = True,
        text: bool = False,
+        number_of_result_pages: int = 10,
    ) -> Dict[str, Any]:
        if env("EXA_API_KEY"):
-            res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
+            res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages)
            return res
        else:
-            return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
+            return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages)

    def cloud_search_exa(
        self,
@ -212,6 +213,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
        exclude_text: List[str] | None = None,
        use_autoprompt: bool = True,
        text: bool = False,
+        number_of_result_pages: int = 10,
    ):
        url = env_not_empty("SERVER_URL")
        logger.debug(f">>>>>>>>>>>>>>>>{url}<<<<")
@ -225,6 +227,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
                "exclude_text": exclude_text,
                "use_autoprompt": use_autoprompt,
                "text": text,
+                "number_of_result_pages": number_of_result_pages,
            },
            headers={"api-key": env_not_empty("cloud_api_key")},
        )