mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-05-02 21:50:16 +00:00
536 lines
23 KiB
Python
536 lines
23 KiB
Python
import os
|
|
import subprocess
|
|
import time
|
|
import asyncio
|
|
import json
|
|
from typing import Any, Dict, List, Optional
|
|
from loguru import logger
|
|
import websockets
|
|
import websockets.exceptions
|
|
|
|
from camel.models import BaseModelBackend
|
|
from camel.toolkits.hybrid_browser_toolkit.hybrid_browser_toolkit_ts import (
|
|
HybridBrowserToolkit as BaseHybridBrowserToolkit,
|
|
)
|
|
from camel.toolkits.hybrid_browser_toolkit.ws_wrapper import \
|
|
WebSocketBrowserWrapper as BaseWebSocketBrowserWrapper
|
|
from app.component.command import bun, uv
|
|
from app.service.task import Agents
|
|
from app.utils.listen.toolkit_listen import listen_toolkit
|
|
from app.utils.toolkit.abstract_toolkit import AbstractToolkit
|
|
|
|
|
|
class WebSocketBrowserWrapper(BaseWebSocketBrowserWrapper):
|
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
"""Initialize wrapper."""
|
|
super().__init__(config)
|
|
logger.info(f"WebSocketBrowserWrapper using ts_dir: {self.ts_dir}")
|
|
|
|
async def _receive_loop(self):
|
|
"""Background task to receive messages from WebSocket with enhanced logging."""
|
|
logger.debug("WebSocket receive loop started")
|
|
disconnect_reason = None
|
|
|
|
try:
|
|
while self.websocket:
|
|
try:
|
|
response_data = await self.websocket.recv()
|
|
response = json.loads(response_data)
|
|
|
|
message_id = response.get('id')
|
|
if message_id and message_id in self._pending_responses:
|
|
# Set the result for the waiting coroutine
|
|
future = self._pending_responses.pop(message_id)
|
|
if not future.done():
|
|
future.set_result(response)
|
|
logger.debug(
|
|
f"Processed response for message {message_id}")
|
|
else:
|
|
# Log unexpected messages
|
|
logger.warning(
|
|
f"Received unexpected message: {response}")
|
|
|
|
except asyncio.CancelledError:
|
|
disconnect_reason = "Receive loop cancelled"
|
|
logger.info(f"WebSocket disconnect: {disconnect_reason}")
|
|
break
|
|
except websockets.exceptions.ConnectionClosed as e:
|
|
disconnect_reason = f"WebSocket closed: code={e.code}, reason={e.reason}"
|
|
logger.warning(
|
|
f"WebSocket disconnect: {disconnect_reason}")
|
|
break
|
|
except websockets.exceptions.WebSocketException as e:
|
|
disconnect_reason = f"WebSocket error: {type(e).__name__}: {e}"
|
|
logger.error(
|
|
f"WebSocket disconnect: {disconnect_reason}")
|
|
break
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to decode WebSocket message: {e}")
|
|
continue # Try to continue on JSON errors
|
|
except Exception as e:
|
|
disconnect_reason = f"Unexpected error: {type(e).__name__}: {e}"
|
|
logger.error(
|
|
f"WebSocket disconnect: {disconnect_reason}",
|
|
exc_info=True)
|
|
# Notify all pending futures of the error
|
|
for future in self._pending_responses.values():
|
|
if not future.done():
|
|
future.set_exception(e)
|
|
self._pending_responses.clear()
|
|
break
|
|
finally:
|
|
logger.info(
|
|
f"WebSocket receive loop terminated. Reason: {disconnect_reason or 'Normal shutdown'}")
|
|
# Mark the websocket as None to indicate disconnection
|
|
self.websocket = None
|
|
|
|
async def start(self):
|
|
# Check if node_modules exists (dependencies installed)
|
|
node_modules_path = os.path.join(self.ts_dir, "node_modules")
|
|
if not os.path.exists(node_modules_path):
|
|
logger.warning("Node modules not found. Running npm install...")
|
|
install_result = subprocess.run(
|
|
[uv(), "run", "npm", "install"],
|
|
cwd=self.ts_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if install_result.returncode != 0:
|
|
logger.error(f"npm install failed: {install_result.stderr}")
|
|
raise RuntimeError(
|
|
f"Failed to install npm dependencies: {install_result.stderr}\n" # noqa:E501
|
|
f"Please run 'npm install' in {self.ts_dir} manually."
|
|
)
|
|
logger.info("npm dependencies installed successfully")
|
|
|
|
# Ensure the TypeScript code is built
|
|
build_result = subprocess.run(
|
|
[uv(), "run", "npm", "run", "build"],
|
|
cwd=self.ts_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if build_result.returncode != 0:
|
|
logger.error(f"TypeScript build failed: {build_result.stderr}")
|
|
raise RuntimeError(
|
|
f"TypeScript build failed: {build_result.stderr}")
|
|
else:
|
|
# Log warnings but don't fail on them
|
|
if build_result.stderr:
|
|
logger.warning(
|
|
f"TypeScript build warnings: {build_result.stderr}")
|
|
logger.info("TypeScript build completed successfully")
|
|
|
|
# Start the WebSocket server
|
|
self.process = subprocess.Popen(
|
|
[uv(), "run", "node", "websocket-server.js"], # bun not support playwright, use uv nodejs-bin
|
|
cwd=self.ts_dir,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
|
|
# Wait for server to output the port
|
|
server_ready = False
|
|
timeout = 10 # 10 seconds timeout
|
|
start_time = time.time()
|
|
|
|
while not server_ready and time.time() - start_time < timeout:
|
|
if self.process.poll() is not None:
|
|
# Process died
|
|
stderr = self.process.stderr.read() # type: ignore
|
|
raise RuntimeError(
|
|
f"WebSocket server failed to start: {stderr}")
|
|
|
|
try:
|
|
line = self.process.stdout.readline() # type: ignore
|
|
logger.debug(f"WebSocket server output: {line}")
|
|
if line.startswith("SERVER_READY:"):
|
|
self.server_port = int(line.split(":")[1].strip())
|
|
server_ready = True
|
|
logger.info(
|
|
f"WebSocket server ready on port {self.server_port}")
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
if not server_ready:
|
|
self.process.kill()
|
|
raise RuntimeError(
|
|
"WebSocket server failed to start within timeout")
|
|
|
|
# Connect to the WebSocket server
|
|
try:
|
|
self.websocket = await websockets.connect(
|
|
f"ws://localhost:{self.server_port}",
|
|
ping_interval=30,
|
|
ping_timeout=10,
|
|
max_size=50 * 1024 * 1024, # 50MB limit to match server
|
|
)
|
|
logger.info("Connected to WebSocket server")
|
|
except Exception as e:
|
|
self.process.kill()
|
|
raise RuntimeError(
|
|
f"Failed to connect to WebSocket server: {e}") from e
|
|
|
|
# Start the background receiver task - THIS WAS MISSING!
|
|
self._receive_task = asyncio.create_task(self._receive_loop())
|
|
logger.debug("Started WebSocket receiver task")
|
|
|
|
# Initialize the browser toolkit
|
|
logger.debug(f"send init {self.config}")
|
|
try:
|
|
await self._send_command("init", self.config)
|
|
logger.debug("WebSocket server initialized successfully")
|
|
except RuntimeError as e:
|
|
if "Timeout waiting for response to command: init" in str(e):
|
|
logger.warning(
|
|
"Init timeout - continuing anyway (CDP connection may be slow)")
|
|
# Continue without error - the WebSocket server is likely still initializing
|
|
else:
|
|
raise
|
|
|
|
async def _send_command(self, command: str, params: Dict[str, Any]) -> \
|
|
Dict[str, Any]:
|
|
"""Send a command to the WebSocket server with enhanced error handling."""
|
|
try:
|
|
# First ensure we have a valid connection
|
|
if self.websocket is None:
|
|
raise RuntimeError("WebSocket connection not established")
|
|
|
|
# Check connection state before sending
|
|
if hasattr(self.websocket, 'state'):
|
|
import websockets.protocol
|
|
if self.websocket.state != websockets.protocol.State.OPEN:
|
|
raise RuntimeError(
|
|
f"WebSocket is in {self.websocket.state} state, not OPEN")
|
|
|
|
logger.debug(
|
|
f"Sending command '{command}' with params: {params}")
|
|
|
|
# Call parent's _send_command
|
|
result = await super()._send_command(command, params)
|
|
|
|
logger.debug(f"Command '{command}' completed successfully")
|
|
return result
|
|
|
|
except RuntimeError as e:
|
|
logger.error(f"Failed to send command '{command}': {e}")
|
|
# Check if it's a connection issue
|
|
if "WebSocket" in str(e) or "connection" in str(e).lower():
|
|
# Mark connection as dead
|
|
self.websocket = None
|
|
raise
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Unexpected error sending command '{command}': {type(e).__name__}: {e}")
|
|
raise
|
|
|
|
|
|
# WebSocket connection pool
|
|
class WebSocketConnectionPool:
|
|
"""Manage WebSocket browser connections with session-based pooling."""
|
|
|
|
def __init__(self):
|
|
self._connections: Dict[str, WebSocketBrowserWrapper] = {}
|
|
self._lock = asyncio.Lock()
|
|
|
|
async def get_connection(self, session_id: str, config: Dict[
|
|
str, Any]) -> WebSocketBrowserWrapper:
|
|
"""Get or create a connection for the given session ID."""
|
|
async with self._lock:
|
|
# Check if we have an existing connection for this session
|
|
if session_id in self._connections:
|
|
wrapper = self._connections[session_id]
|
|
|
|
# Comprehensive connection health check
|
|
is_healthy = False
|
|
if wrapper.websocket:
|
|
try:
|
|
# Check WebSocket state based on available attributes
|
|
if hasattr(wrapper.websocket, 'state'):
|
|
import websockets.protocol
|
|
is_healthy = wrapper.websocket.state == websockets.protocol.State.OPEN
|
|
if not is_healthy:
|
|
logger.debug(
|
|
f"Session {session_id} WebSocket state: {wrapper.websocket.state}")
|
|
elif hasattr(wrapper.websocket, 'open'):
|
|
is_healthy = wrapper.websocket.open
|
|
else:
|
|
# Try ping as last resort
|
|
try:
|
|
await asyncio.wait_for(
|
|
wrapper.websocket.ping(), timeout=1.0)
|
|
is_healthy = True
|
|
except:
|
|
is_healthy = False
|
|
except Exception as e:
|
|
logger.debug(
|
|
f"Health check failed for session {session_id}: {e}")
|
|
is_healthy = False
|
|
|
|
if is_healthy:
|
|
logger.debug(
|
|
f"Reusing healthy WebSocket connection for session {session_id}")
|
|
return wrapper
|
|
else:
|
|
# Connection is unhealthy, clean it up
|
|
logger.info(
|
|
f"Removing unhealthy WebSocket connection for session {session_id}")
|
|
try:
|
|
await wrapper.stop()
|
|
except Exception as e:
|
|
logger.debug(
|
|
f"Error stopping unhealthy wrapper: {e}")
|
|
del self._connections[session_id]
|
|
|
|
# Create a new connection
|
|
logger.info(
|
|
f"Creating new WebSocket connection for session {session_id}")
|
|
wrapper = WebSocketBrowserWrapper(config)
|
|
await wrapper.start()
|
|
self._connections[session_id] = wrapper
|
|
logger.info(
|
|
f"Successfully created WebSocket connection for session {session_id}")
|
|
return wrapper
|
|
|
|
async def close_connection(self, session_id: str):
|
|
"""Close and remove a connection for the given session ID."""
|
|
async with self._lock:
|
|
if session_id in self._connections:
|
|
wrapper = self._connections[session_id]
|
|
try:
|
|
await wrapper.stop()
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error closing WebSocket connection for session {session_id}: {e}")
|
|
del self._connections[session_id]
|
|
logger.info(
|
|
f"Closed WebSocket connection for session {session_id}")
|
|
|
|
async def _close_connection_unlocked(self, session_id: str):
|
|
"""Close connection without acquiring lock (for internal use)."""
|
|
if session_id in self._connections:
|
|
wrapper = self._connections[session_id]
|
|
try:
|
|
await wrapper.stop()
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error closing WebSocket connection for session {session_id}: {e}")
|
|
del self._connections[session_id]
|
|
logger.info(
|
|
f"Closed WebSocket connection for session {session_id}")
|
|
|
|
async def close_all(self):
|
|
"""Close all connections in the pool."""
|
|
async with self._lock:
|
|
for session_id in list(self._connections.keys()):
|
|
await self._close_connection_unlocked(session_id)
|
|
logger.info("Closed all WebSocket connections")
|
|
|
|
|
|
# Global connection pool instance
|
|
websocket_connection_pool = WebSocketConnectionPool()
|
|
|
|
|
|
class HybridBrowserToolkit(BaseHybridBrowserToolkit, AbstractToolkit):
|
|
agent_name: str = Agents.search_agent
|
|
|
|
def __init__(
|
|
self,
|
|
api_task_id: str,
|
|
*,
|
|
headless: bool = False,
|
|
user_data_dir: str | None = None,
|
|
stealth: bool = True,
|
|
web_agent_model: BaseModelBackend | None = None,
|
|
cache_dir: str = "tmp/",
|
|
enabled_tools: List[str] | None = None,
|
|
browser_log_to_file: bool = False,
|
|
session_id: str | None = None,
|
|
default_start_url: str = "https://google.com/",
|
|
default_timeout: int | None = None,
|
|
short_timeout: int | None = None,
|
|
navigation_timeout: int | None = None,
|
|
network_idle_timeout: int | None = None,
|
|
screenshot_timeout: int | None = None,
|
|
page_stability_timeout: int | None = None,
|
|
dom_content_loaded_timeout: int | None = None,
|
|
viewport_limit: bool = False,
|
|
connect_over_cdp: bool = True,
|
|
cdp_url: str | None = "http://localhost:9222",
|
|
) -> None:
|
|
self.api_task_id = api_task_id
|
|
super().__init__(
|
|
headless=headless,
|
|
user_data_dir=user_data_dir,
|
|
stealth=stealth,
|
|
web_agent_model=web_agent_model,
|
|
cache_dir=cache_dir,
|
|
enabled_tools=enabled_tools,
|
|
browser_log_to_file=browser_log_to_file,
|
|
session_id=session_id,
|
|
default_start_url=default_start_url,
|
|
default_timeout=default_timeout,
|
|
short_timeout=short_timeout,
|
|
navigation_timeout=navigation_timeout,
|
|
network_idle_timeout=network_idle_timeout,
|
|
screenshot_timeout=screenshot_timeout,
|
|
page_stability_timeout=page_stability_timeout,
|
|
dom_content_loaded_timeout=dom_content_loaded_timeout,
|
|
viewport_limit=viewport_limit,
|
|
connect_over_cdp=connect_over_cdp,
|
|
cdp_url=cdp_url,
|
|
)
|
|
|
|
async def _ensure_ws_wrapper(self):
|
|
"""Ensure WebSocket wrapper is initialized using connection pool."""
|
|
global websocket_connection_pool
|
|
|
|
# Get session ID from config or use default
|
|
session_id = self._ws_config.get('session_id', 'default')
|
|
|
|
# Get or create connection from pool
|
|
self._ws_wrapper = await websocket_connection_pool.get_connection(
|
|
session_id, self._ws_config)
|
|
|
|
# Additional health check
|
|
if self._ws_wrapper.websocket is None:
|
|
logger.warning(
|
|
f"WebSocket connection for session {session_id} is None after pool retrieval, recreating...")
|
|
await websocket_connection_pool.close_connection(session_id)
|
|
self._ws_wrapper = await websocket_connection_pool.get_connection(
|
|
session_id, self._ws_config)
|
|
|
|
def clone_for_new_session(self,
|
|
new_session_id: str | None = None) -> "HybridBrowserToolkit":
|
|
import uuid
|
|
|
|
if new_session_id is None:
|
|
new_session_id = str(uuid.uuid4())[:8]
|
|
|
|
return HybridBrowserToolkit(
|
|
self.api_task_id,
|
|
headless=self._headless,
|
|
user_data_dir=self._user_data_dir,
|
|
stealth=self._stealth,
|
|
web_agent_model=self._web_agent_model,
|
|
cache_dir=f"{self._cache_dir.rstrip('/')}/_clone_{new_session_id}/",
|
|
enabled_tools=self.enabled_tools.copy(),
|
|
browser_log_to_file=self._browser_log_to_file,
|
|
session_id=new_session_id,
|
|
default_start_url=self._default_start_url,
|
|
default_timeout=self._default_timeout,
|
|
short_timeout=self._short_timeout,
|
|
navigation_timeout=self._navigation_timeout,
|
|
network_idle_timeout=self._network_idle_timeout,
|
|
screenshot_timeout=self._screenshot_timeout,
|
|
page_stability_timeout=self._page_stability_timeout,
|
|
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
|
|
viewport_limit=self._viewport_limit,
|
|
connect_over_cdp=self.config_loader.get_browser_config().connect_over_cdp,
|
|
cdp_url=self.config_loader.get_browser_config().cdp_url,
|
|
)
|
|
|
|
@classmethod
|
|
def toolkit_name(cls) -> str:
|
|
return "Browser Toolkit"
|
|
|
|
async def close(self):
|
|
"""Close the browser toolkit and release WebSocket connection."""
|
|
try:
|
|
# Close browser if needed
|
|
if self._ws_wrapper:
|
|
await super().browser_close()
|
|
except Exception as e:
|
|
logger.error(f"Error closing browser: {e}")
|
|
|
|
# Release connection from pool
|
|
session_id = self._ws_config.get('session_id', 'default')
|
|
await websocket_connection_pool.close_connection(session_id)
|
|
logger.info(
|
|
f"Released WebSocket connection for session {session_id}")
|
|
|
|
def __del__(self):
|
|
"""Cleanup when object is garbage collected."""
|
|
if hasattr(self, '_ws_wrapper') and self._ws_wrapper:
|
|
session_id = self._ws_config.get('session_id', 'default')
|
|
logger.debug(
|
|
f"HybridBrowserToolkit for session {session_id} is being garbage collected")
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_open)
|
|
async def browser_open(self) -> Dict[str, Any]:
|
|
return await super().browser_open()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_close)
|
|
async def browser_close(self) -> str:
|
|
return await super().browser_close()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_visit_page)
|
|
async def browser_visit_page(self, url: str) -> Dict[str, Any]:
|
|
logger.debug(f"browser_visit_page called with URL: {url}")
|
|
try:
|
|
result = await super().browser_visit_page(url)
|
|
logger.debug(f"browser_visit_page succeeded for URL: {url}")
|
|
return result
|
|
except Exception as e:
|
|
logger.error(
|
|
f"browser_visit_page failed for URL {url}: {type(e).__name__}: {e}")
|
|
raise
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_back)
|
|
async def browser_back(self) -> Dict[str, Any]:
|
|
return await super().browser_back()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_forward)
|
|
async def browser_forward(self) -> Dict[str, Any]:
|
|
return await super().browser_forward()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_get_page_snapshot)
|
|
async def browser_get_page_snapshot(self) -> str:
|
|
return await super().browser_get_page_snapshot()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_get_som_screenshot)
|
|
async def browser_get_som_screenshot(self, read_image: bool = False,
|
|
instruction: str | None = None) -> str:
|
|
return await super().browser_get_som_screenshot(read_image,
|
|
instruction)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_click)
|
|
async def browser_click(self, *, ref: str) -> Dict[str, Any]:
|
|
return await super().browser_click(ref=ref)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_type)
|
|
async def browser_type(self, *, ref: str, text: str) -> Dict[str, Any]:
|
|
return await super().browser_type(ref=ref, text=text)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_select)
|
|
async def browser_select(self, *, ref: str, value: str) -> Dict[
|
|
str, Any]:
|
|
return await super().browser_select(ref=ref, value=value)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_scroll)
|
|
async def browser_scroll(self, *, direction: str, amount: int = 500) -> \
|
|
Dict[str, Any]:
|
|
return await super().browser_scroll(direction=direction,
|
|
amount=amount)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_enter)
|
|
async def browser_enter(self) -> Dict[str, Any]:
|
|
return await super().browser_enter()
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_wait_user)
|
|
async def browser_wait_user(self, timeout_sec: float | None = None) -> \
|
|
Dict[str, Any]:
|
|
return await super().browser_wait_user(timeout_sec)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_switch_tab)
|
|
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
return await super().browser_switch_tab(tab_id=tab_id)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_close_tab)
|
|
async def browser_close_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
return await super().browser_close_tab(tab_id=tab_id)
|
|
|
|
@listen_toolkit(BaseHybridBrowserToolkit.browser_get_tab_info)
|
|
async def browser_get_tab_info(self) -> Dict[str, Any]:
|
|
return await super().browser_get_tab_info()
|