This commit is contained in:
puzhen 2025-10-12 14:43:19 +01:00
parent 70f7bb035b
commit 5ee5020bbe
2 changed files with 22 additions and 15 deletions

View file

@ -823,13 +823,19 @@ Your capabilities include:
<web_search_workflow>
Your approach depends on available search tools:
**If Google Search is Available:**
- Initial Search: Start with `search_google` to get a list of relevant URLs
**If Search Tools are Available:**
- Initial Search: Start with `search_google` or `search_exa` to get a list of relevant URLs
- Browser-Based Exploration: Use the browser tools to investigate the URLs
**If Google Search is NOT Available:**
- **MUST start with direct website search**: Use `browser_visit_page` to go
directly to popular search engines and informational websites such as:
**If Search Tools are NOT Available:**
- **RECOMMENDED: Use Brave Search**: Navigate to `https://search.brave.com/`
using `browser_visit_page`. Brave Search is highly recommended as it:
* Works well without requiring login
* Has fewer CAPTCHA challenges compared to other search engines
* Provides clean, relevant search results
* Supports advanced search queries
- **Alternative Search Engines**: If Brave Search is unavailable, use
`browser_visit_page` to go directly to other popular search engines:
* General search: google.com, bing.com, duckduckgo.com
* Academic: scholar.google.com, pubmed.ncbi.nlm.nih.gov
* News: news.google.com, bbc.com/news, reuters.com
@ -839,8 +845,6 @@ Your approach depends on available search tools:
sites using `browser_type` and submit with `browser_enter`
- **Extract URLs from results**: Only use URLs that appear in the search
results on these websites
- **Alternative Search**: If available, use `search_exa` for additional
results
**Common Browser Operations (both scenarios):**
- **Navigation and Exploration**: Use `browser_visit_page` to open URLs.

View file

@ -51,19 +51,19 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
@listen_toolkit(
BaseSearchToolkit.search_google,
lambda _, query, search_type="web": f"with query '{query}' and {search_type} result pages",
lambda _, query, search_type="web", number_of_result_pages=10, start_page=1: f"with query '{query}', search_type '{search_type}', {number_of_result_pages} result pages, starting from page {start_page}",
)
def search_google(self, query: str, search_type: str = "web") -> list[dict[str, Any]]:
def search_google(self, query: str, search_type: str = "web", number_of_result_pages: int = 10, start_page: int = 1) -> list[dict[str, Any]]:
if env("GOOGLE_API_KEY") and env("SEARCH_ENGINE_ID"):
return super().search_google(query, search_type)
return super().search_google(query, search_type, number_of_result_pages, start_page)
else:
return self.cloud_search_google(query, search_type)
return self.cloud_search_google(query, search_type, number_of_result_pages, start_page)
def cloud_search_google(self, query: str, search_type):
def cloud_search_google(self, query: str, search_type: str, number_of_result_pages: int = 10, start_page: int = 1):
url = env_not_empty("SERVER_URL")
res = httpx.get(
url + "/proxy/google",
params={"query": query, "search_type": search_type},
params={"query": query, "search_type": search_type, "number_of_result_pages": number_of_result_pages, "start_page": start_page},
headers={"api-key": env_not_empty("cloud_api_key")},
)
return res.json()
@ -185,12 +185,13 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
exclude_text: List[str] | None = None,
use_autoprompt: bool = True,
text: bool = False,
number_of_result_pages: int = 10,
) -> Dict[str, Any]:
if env("EXA_API_KEY"):
res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages)
return res
else:
return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text, number_of_result_pages)
def cloud_search_exa(
self,
@ -212,6 +213,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
exclude_text: List[str] | None = None,
use_autoprompt: bool = True,
text: bool = False,
number_of_result_pages: int = 10,
):
url = env_not_empty("SERVER_URL")
logger.debug(f">>>>>>>>>>>>>>>>{url}<<<<")
@ -225,6 +227,7 @@ class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
"exclude_text": exclude_text,
"use_autoprompt": use_autoprompt,
"text": text,
"number_of_result_pages": number_of_result_pages,
},
headers={"api-key": env_not_empty("cloud_api_key")},
)