eigent/backend/app/utils/toolkit/search_toolkit.py
2025-08-12 01:16:39 +02:00

300 lines
11 KiB
Python

from typing import Any, Dict, List, Literal
from camel.toolkits import SearchToolkit as BaseSearchToolkit
from camel.toolkits.function_tool import FunctionTool
import httpx
from loguru import logger
from app.component.environment import env, env_not_empty
from app.service.task import Agents
from app.utils.listen.toolkit_listen import listen_toolkit
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
class SearchToolkit(BaseSearchToolkit, AbstractToolkit):
agent_name: str = Agents.search_agent
def __init__(
self,
api_task_id: str,
agent_name: str | None = None,
timeout: float | None = None,
exclude_domains: List[str] | None = None,
):
self.api_task_id = api_task_id
if agent_name is not None:
self.agent_name = agent_name
super().__init__(
timeout=timeout, exclude_domains=exclude_domains
)
# @listen_toolkit(BaseSearchToolkit.search_wiki)
# def search_wiki(self, entity: str) -> str:
# return super().search_wiki(entity)
# @listen_toolkit(
# BaseSearchToolkit.search_linkup,
# lambda _,
# query,
# depth="standard",
# output_type="searchResults",
# structured_output_schema=None: f"Search linkup with query '{query}', depth '{depth}', output type '{output_type}', structured output schema '{structured_output_schema}'",
# lambda result: f"Search linkup returned {len(result)} results",
# )
# def search_linkup(
# self,
# query: str,
# depth: Literal["standard", "deep"] = "standard",
# output_type: Literal["searchResults", "sourcedAnswer", "structured"] = "searchResults",
# structured_output_schema: str | None = None,
# ) -> dict[str, Any]:
# return super().search_linkup(query, depth, output_type, structured_output_schema)
@listen_toolkit(
BaseSearchToolkit.search_google,
lambda _, query, search_type="web": f"with query '{query}' and {search_type} result pages",
)
def search_google(self, query: str, search_type: str = "web") -> list[dict[str, Any]]:
if env("GOOGLE_API_KEY") and env("SEARCH_ENGINE_ID"):
return super().search_google(query, search_type)
else:
return self.cloud_search_google(query, search_type)
def cloud_search_google(self, query: str, search_type):
url = env_not_empty("SERVER_URL")
res = httpx.get(
url + "/proxy/google",
params={"query": query, "search_type": search_type},
headers={"api-key": env_not_empty("cloud_api_key")},
)
return res.json()
# @listen_toolkit(
# BaseSearchToolkit.search_duckduckgo,
# lambda _,
# query,
# source="text",
# max_results=5: f"Search DuckDuckGo with query '{query}', source '{source}', and max results {max_results}",
# lambda result: f"Search DuckDuckGo returned {len(result)} results",
# )
# def search_duckduckgo(self, query: str, source: str = "text", max_results: int = 5) -> list[dict[str, Any]]:
# return super().search_duckduckgo(query, source, max_results)
# @listen_toolkit(
# BaseSearchToolkit.tavily_search,
# lambda _, query, num_results=5, **kwargs: f"Search Tavily with query '{query}' and {num_results} results",
# lambda result: f"Search Tavily returned {len(result)} results",
# )
# def tavily_search(self, query: str, num_results: int = 5, **kwargs) -> list[dict[str, Any]]:
# return super().tavily_search(query, num_results, **kwargs)
# @listen_toolkit(
# BaseSearchToolkit.search_brave,
# lambda _, query, *args, **kwargs: f"Search Brave with query '{query}'",
# lambda result: f"Search Brave returned {len(result)} results",
# )
# def search_brave(
# self,
# q: str,
# country: str = "US",
# search_lang: str = "en",
# ui_lang: str = "en-US",
# count: int = 20,
# offset: int = 0,
# safesearch: str = "moderate",
# freshness: str | None = None,
# text_decorations: bool = True,
# spellcheck: bool = True,
# result_filter: str | None = None,
# goggles_id: str | None = None,
# units: str | None = None,
# extra_snippets: bool | None = None,
# summary: bool | None = None,
# ) -> dict[str, Any]:
# return super().search_brave(
# q,
# country,
# search_lang,
# ui_lang,
# count,
# offset,
# safesearch,
# freshness,
# text_decorations,
# spellcheck,
# result_filter,
# goggles_id,
# units,
# extra_snippets,
# summary,
# )
# @listen_toolkit(
# BaseSearchToolkit.search_bocha,
# lambda _,
# query,
# freshness="noLimit",
# summary=False,
# count=10,
# page=1: f"Search Bocha with query '{query}', freshness '{freshness}', summary '{summary}', count {count}, and page {page}",
# lambda result: f"Search Bocha returned {len(result)} results",
# )
# def search_bocha(
# self,
# query: str,
# freshness: str = "noLimit",
# summary: bool = False,
# count: int = 10,
# page: int = 1,
# ) -> dict[str, Any]:
# return super().search_bocha(query, freshness, summary, count, page)
# @listen_toolkit(
# BaseSearchToolkit.search_baidu,
# lambda _, query, max_results=5: f"Search Baidu with query '{query}' and max results {max_results}",
# lambda result: f"Search Baidu returned {len(result)} results",
# )
# def search_baidu(self, query: str, max_results: int = 5) -> dict[str, Any]:
# return super().search_baidu(query, max_results)
# @listen_toolkit(
# BaseSearchToolkit.search_bing,
# lambda _, query: f"with query '{query}'",
# lambda result: f"Search Bing returned {len(result)} results",
# )
# def search_bing(self, query: str) -> dict[str, Any]:
# return super().search_bing(query)
@listen_toolkit(BaseSearchToolkit.search_exa, lambda _, query, *args, **kwargs: f"{query}, {args}, {kwargs}")
def search_exa(
self,
query: str,
search_type: Literal["auto", "neural", "keyword"] = "auto",
category: None
| Literal[
"company",
"research paper",
"news",
"pdf",
"github",
"tweet",
"personal site",
"linkedin profile",
"financial report",
] = None,
include_text: List[str] | None = None,
exclude_text: List[str] | None = None,
use_autoprompt: bool = True,
text: bool = False,
) -> Dict[str, Any]:
if env("EXA_API_KEY"):
res = super().search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
return res
else:
return self.cloud_search_exa(query, search_type, category, include_text, exclude_text, use_autoprompt, text)
def cloud_search_exa(
self,
query: str,
search_type: Literal["auto", "neural", "keyword"] = "auto",
category: None
| Literal[
"company",
"research paper",
"news",
"pdf",
"github",
"tweet",
"personal site",
"linkedin profile",
"financial report",
] = None,
include_text: List[str] | None = None,
exclude_text: List[str] | None = None,
use_autoprompt: bool = True,
text: bool = False,
):
url = env_not_empty("SERVER_URL")
logger.debug(f">>>>>>>>>>>>>>>>{url}<<<<")
res = httpx.post(
url + "/proxy/exa",
json={
"query": query,
"search_type": search_type,
"category": category,
"include_text": include_text,
"exclude_text": exclude_text,
"use_autoprompt": use_autoprompt,
"text": text,
},
headers={"api-key": env_not_empty("cloud_api_key")},
)
logger.debug(">>>>>>>>>>>>>>>>>")
logger.debug(res)
return res.json()
# @listen_toolkit(
# BaseSearchToolkit.search_alibaba_tongxiao,
# lambda _, *args, **kwargs: f"Search Alibaba Tongxiao with args {args} and kwargs {kwargs}",
# lambda result: f"Search Alibaba Tongxiao returned {len(result)} results",
# )
# def search_alibaba_tongxiao(
# self,
# query: str,
# time_range: Literal["OneDay", "OneWeek", "OneMonth", "OneYear", "NoLimit"] = "NoLimit",
# industry: Literal[
# "finance",
# "law",
# "medical",
# "internet",
# "tax",
# "news_province",
# "news_center",
# ]
# | None = None,
# page: int = 1,
# return_main_text: bool = False,
# return_markdown_text: bool = True,
# enable_rerank: bool = True,
# ) -> Dict[str, Any]:
# return super().search_alibaba_tongxiao(
# query,
# time_range,
# industry,
# page,
# return_main_text,
# return_markdown_text,
# enable_rerank,
# )
@classmethod
def get_can_use_tools(cls, api_task_id: str) -> list[FunctionTool]:
search_toolkit = SearchToolkit(api_task_id)
tools = [
# FunctionTool(search_toolkit.search_wiki),
# FunctionTool(search_toolkit.search_duckduckgo),
# FunctionTool(search_toolkit.search_baidu),
# FunctionTool(search_toolkit.search_bing),
]
# if env("LINKUP_API_KEY"):
# tools.append(FunctionTool(search_toolkit.search_linkup))
# if env("BRAVE_API_KEY"):
# tools.append(FunctionTool(search_toolkit.search_brave))
if (env("GOOGLE_API_KEY") and env("SEARCH_ENGINE_ID")) or env("cloud_api_key"):
tools.append(FunctionTool(search_toolkit.search_google))
# if env("TAVILY_API_KEY"):
# tools.append(FunctionTool(search_toolkit.tavily_search))
# if env("BOCHA_API_KEY"):
# tools.append(FunctionTool(search_toolkit.search_bocha))
if env("EXA_API_KEY") or env("cloud_api_key"):
tools.append(FunctionTool(search_toolkit.search_exa))
# if env("TONGXIAO_API_KEY"):
# tools.append(FunctionTool(search_toolkit.search_alibaba_tongxiao))
return tools
def get_tools(self) -> List[FunctionTool]:
return [FunctionTool(self.search_exa)]