mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-29 04:00:13 +00:00
347 lines
11 KiB
Python
347 lines
11 KiB
Python
"""Chrome browser launcher for CDP-based browser serve command."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import platform
|
|
import shutil
|
|
import subprocess
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
import httpx
|
|
import psutil
|
|
|
|
if TYPE_CHECKING:
|
|
from subprocess import Popen
|
|
|
|
# Default Skyvern directory for browser data
|
|
SKYVERN_DATA_DIR = Path.home() / ".skyvern"
|
|
|
|
|
|
@dataclass
|
|
class LocalBrowserInfo:
|
|
"""Information about a locally launched browser instance."""
|
|
|
|
cdp_ws_url: str
|
|
profile_dir: str
|
|
process: Popen[bytes]
|
|
|
|
|
|
def generate_browser_id() -> str:
|
|
"""Generate a unique browser ID for this browser instance."""
|
|
return f"br_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
def get_default_chrome_path() -> str:
|
|
"""Get the default Chrome executable path based on the platform."""
|
|
system = platform.system()
|
|
|
|
paths: list[Path]
|
|
if system == "Darwin":
|
|
# macOS
|
|
paths = [
|
|
Path("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"),
|
|
Path("/Applications/Chromium.app/Contents/MacOS/Chromium"),
|
|
Path.home() / "Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
]
|
|
elif system == "Windows":
|
|
# Windows
|
|
paths = [
|
|
Path.home() / "AppData/Local/Google/Chrome/Application/chrome.exe",
|
|
Path("C:/Program Files/Google/Chrome/Application/chrome.exe"),
|
|
Path("C:/Program Files (x86)/Google/Chrome/Application/chrome.exe"),
|
|
]
|
|
else:
|
|
# Linux
|
|
paths = [
|
|
Path("/usr/bin/google-chrome"),
|
|
Path("/usr/bin/google-chrome-stable"),
|
|
Path("/usr/bin/chromium"),
|
|
Path("/usr/bin/chromium-browser"),
|
|
Path("/snap/bin/chromium"),
|
|
]
|
|
|
|
for path in paths:
|
|
if path.exists():
|
|
return str(path)
|
|
|
|
raise FileNotFoundError(
|
|
f"Chrome executable not found. Searched paths: {paths}. Install Chrome or specify the path with --chrome-path."
|
|
)
|
|
|
|
|
|
def get_default_profile_dir() -> str:
|
|
"""Get the default Chrome profile directory."""
|
|
profile_dir = SKYVERN_DATA_DIR / "chrome-profile"
|
|
profile_dir.mkdir(parents=True, exist_ok=True)
|
|
return str(profile_dir)
|
|
|
|
|
|
def get_default_download_dir(browser_id: str) -> str:
|
|
"""Get the default download directory for a browser instance."""
|
|
download_dir = SKYVERN_DATA_DIR / "downloads" / browser_id
|
|
download_dir.mkdir(parents=True, exist_ok=True)
|
|
return str(download_dir)
|
|
|
|
|
|
def get_local_chrome_profile_dir() -> Path:
|
|
"""Return the platform-specific Chrome user data directory."""
|
|
system = platform.system()
|
|
if system == "Darwin":
|
|
return Path.home() / "Library/Application Support/Google/Chrome"
|
|
elif system == "Windows":
|
|
local_app = Path.home() / "AppData" / "Local"
|
|
return local_app / "Google" / "Chrome" / "User Data"
|
|
else:
|
|
return Path.home() / ".config" / "google-chrome"
|
|
|
|
|
|
_CHROME_MAIN_NAMES = {"chrome", "google-chrome", "google-chrome-stable", "google chrome", "chromium"}
|
|
_CHROME_SKIP_NAMES = {"chrome_crashpad_handler", "chromedriver", "chrome helper", "google chrome helper"}
|
|
|
|
|
|
def is_chrome_running() -> bool:
|
|
"""Check if Chrome is already running (main browser process only).
|
|
|
|
Uses exact name matching to avoid false positives from helper processes
|
|
(renderer, GPU, utility) and unrelated tools like chromedriver.
|
|
"""
|
|
for proc in psutil.process_iter(["name"]):
|
|
try:
|
|
name = (proc.info["name"] or "").lower()
|
|
if name in _CHROME_SKIP_NAMES:
|
|
continue
|
|
if name in _CHROME_MAIN_NAMES:
|
|
return True
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
pass
|
|
return False
|
|
|
|
|
|
_PROFILE_COPY_IGNORE = {
|
|
"Cache",
|
|
"Code Cache",
|
|
"GPUCache",
|
|
"Service Worker",
|
|
"blob_storage",
|
|
"BudgetDatabase",
|
|
"coupon_db",
|
|
"Download Service",
|
|
"GCM Store",
|
|
"optimization_guide_model_metadata",
|
|
"optimization_guide_prediction_model_downloads",
|
|
"Extensions",
|
|
"IndexedDB",
|
|
"File System",
|
|
"Session Storage",
|
|
}
|
|
|
|
_LOCK_FILES = {"SingletonLock", "SingletonSocket", "SingletonCookie"}
|
|
|
|
|
|
def clone_local_chrome_profile(chrome_profile_name: str, dest_user_data_dir: Path, *, full: bool = False) -> None:
|
|
"""Copy the user's local Chrome profile into *dest_user_data_dir*.
|
|
|
|
Args:
|
|
chrome_profile_name: Profile subdirectory name (e.g. ``"Default"``).
|
|
dest_user_data_dir: Destination path that will be used as ``--user-data-dir``.
|
|
full: When ``True``, copy the **entire** Chrome user-data directory
|
|
(requires Chrome to be closed). When ``False`` (default), copy
|
|
only the target profile subdir (skipping caches) plus ``Local State``,
|
|
which is much faster (~50-200 MB vs 2-10+ GB).
|
|
"""
|
|
source_user_data_dir = get_local_chrome_profile_dir()
|
|
if not source_user_data_dir.is_dir():
|
|
raise FileNotFoundError(
|
|
f"Chrome user data directory not found at {source_user_data_dir}. Is Google Chrome installed?"
|
|
)
|
|
|
|
source_profile = source_user_data_dir / chrome_profile_name
|
|
if not source_profile.resolve().is_relative_to(source_user_data_dir.resolve()):
|
|
raise ValueError(f"Profile name '{chrome_profile_name}' resolves outside the Chrome data directory.")
|
|
|
|
if not source_profile.is_dir():
|
|
available = [d.name for d in source_user_data_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
|
|
raise FileNotFoundError(
|
|
f"Chrome profile '{chrome_profile_name}' not found in {source_user_data_dir}. "
|
|
f"Available profiles: {', '.join(sorted(available)[:10])}"
|
|
)
|
|
|
|
resolved_dest = dest_user_data_dir.resolve()
|
|
skyvern_data = SKYVERN_DATA_DIR.resolve()
|
|
if not resolved_dest.is_relative_to(skyvern_data):
|
|
raise ValueError(
|
|
f"Refusing to overwrite {dest_user_data_dir} — it is outside Skyvern's data directory ({SKYVERN_DATA_DIR}). "
|
|
"Remove it manually or use the default profile directory."
|
|
)
|
|
|
|
if dest_user_data_dir.exists():
|
|
shutil.rmtree(dest_user_data_dir)
|
|
|
|
if full:
|
|
shutil.copytree(source_user_data_dir, dest_user_data_dir, ignore_dangling_symlinks=True)
|
|
else:
|
|
_selective_copy(source_user_data_dir, source_profile, chrome_profile_name, dest_user_data_dir)
|
|
|
|
# Remove lock files so Chrome can open the copied profile
|
|
for lock_name in _LOCK_FILES:
|
|
lock_file = dest_user_data_dir / lock_name
|
|
if lock_file.exists():
|
|
lock_file.unlink()
|
|
|
|
|
|
def _selective_copy(
|
|
source_user_data_dir: Path,
|
|
source_profile: Path,
|
|
chrome_profile_name: str,
|
|
dest_user_data_dir: Path,
|
|
) -> None:
|
|
"""Copy only auth-relevant files from the Chrome profile."""
|
|
dest_user_data_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# 1. Copy the profile subdir, skipping cache directories
|
|
def _ignore(directory: str, contents: list[str]) -> set[str]:
|
|
return {c for c in contents if c in _PROFILE_COPY_IGNORE}
|
|
|
|
shutil.copytree(
|
|
source_profile,
|
|
dest_user_data_dir / chrome_profile_name,
|
|
ignore=_ignore,
|
|
ignore_dangling_symlinks=True,
|
|
)
|
|
|
|
# 2. Copy Local State (needed for cookie decryption on macOS/Linux)
|
|
local_state = source_user_data_dir / "Local State"
|
|
if local_state.is_file():
|
|
shutil.copy2(local_state, dest_user_data_dir / "Local State")
|
|
|
|
|
|
async def launch_chrome_with_cdp(
|
|
port: int,
|
|
profile_dir: str | None = None,
|
|
headless: bool = False,
|
|
chrome_path: str | None = None,
|
|
download_dir: str | None = None,
|
|
profile_name: str | None = None,
|
|
) -> LocalBrowserInfo:
|
|
"""Launch Chrome with CDP enabled and wait for it to be ready.
|
|
|
|
Args:
|
|
port: The port for Chrome's CDP server.
|
|
profile_dir: Chrome user data directory. Uses default if not specified.
|
|
headless: Whether to run in headless mode.
|
|
chrome_path: Path to Chrome executable. Auto-detects if not specified.
|
|
download_dir: Directory for downloads. Uses default if not specified.
|
|
profile_name: Chrome profile subdirectory name (e.g. "Profile 1").
|
|
When set, passes ``--profile-directory`` so Chrome loads the
|
|
correct profile instead of defaulting to ``Default/``.
|
|
|
|
Returns:
|
|
LocalBrowserInfo with connection details.
|
|
|
|
Raises:
|
|
FileNotFoundError: If Chrome executable is not found.
|
|
TimeoutError: If Chrome fails to start within timeout.
|
|
"""
|
|
resolved_chrome_path = chrome_path or get_default_chrome_path()
|
|
resolved_profile_dir = profile_dir or get_default_profile_dir()
|
|
|
|
# Build Chrome args
|
|
args = [
|
|
resolved_chrome_path,
|
|
f"--remote-debugging-port={port}",
|
|
f"--user-data-dir={resolved_profile_dir}",
|
|
"--no-first-run",
|
|
"--no-default-browser-check",
|
|
"--disable-background-networking",
|
|
"--disable-sync",
|
|
"--disable-translate",
|
|
"--metrics-recording-only",
|
|
"--hide-crash-restore-bubble",
|
|
]
|
|
|
|
if profile_name:
|
|
args.append(f"--profile-directory={profile_name}")
|
|
|
|
if headless:
|
|
args.append("--headless=new")
|
|
|
|
if download_dir:
|
|
# Ensure download directory exists
|
|
Path(download_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
# Launch Chrome process
|
|
process = subprocess.Popen(
|
|
args,
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
)
|
|
|
|
# Wait for CDP to be ready
|
|
cdp_ws_url = await _wait_for_cdp_ready(port, timeout=30)
|
|
|
|
return LocalBrowserInfo(
|
|
cdp_ws_url=cdp_ws_url,
|
|
profile_dir=resolved_profile_dir,
|
|
process=process,
|
|
)
|
|
|
|
|
|
async def _wait_for_cdp_ready(port: int, timeout: int = 30) -> str:
|
|
"""Wait for Chrome's CDP server to be ready and return the WebSocket URL.
|
|
|
|
Args:
|
|
port: CDP port to poll.
|
|
timeout: Max seconds to wait.
|
|
|
|
Returns:
|
|
CDP WebSocket URL for browser connection.
|
|
|
|
Raises:
|
|
TimeoutError: If CDP doesn't become ready within timeout.
|
|
"""
|
|
url = f"http://127.0.0.1:{port}/json/version"
|
|
deadline = asyncio.get_event_loop().time() + timeout
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
while asyncio.get_event_loop().time() < deadline:
|
|
try:
|
|
response = await client.get(url, timeout=2)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
ws_url = data.get("webSocketDebuggerUrl")
|
|
if ws_url:
|
|
return ws_url
|
|
except (httpx.RequestError, httpx.TimeoutException):
|
|
pass
|
|
|
|
await asyncio.sleep(0.5)
|
|
|
|
raise TimeoutError(f"Chrome CDP did not become ready on port {port} within {timeout}s")
|
|
|
|
|
|
def terminate_browser(browser_info: LocalBrowserInfo) -> None:
|
|
"""Terminate the browser process gracefully.
|
|
|
|
Args:
|
|
browser_info: The browser info containing the process to terminate.
|
|
"""
|
|
process = browser_info.process
|
|
|
|
if process.poll() is not None:
|
|
# Process already exited
|
|
return
|
|
|
|
# Try graceful termination first
|
|
process.terminate()
|
|
|
|
try:
|
|
# Wait up to 5 seconds for graceful shutdown
|
|
process.wait(timeout=5)
|
|
except subprocess.TimeoutExpired:
|
|
# Force kill if it doesn't respond
|
|
process.kill()
|
|
process.wait(timeout=2)
|