mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 11:40:32 +00:00
440 lines
16 KiB
Python
440 lines
16 KiB
Python
"""MCP tools for browser tab management.
|
|
|
|
Provides tools to list, create, switch, close, and wait for browser tabs.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from collections import deque
|
|
from typing import Annotated, Any
|
|
|
|
import structlog
|
|
from pydantic import BaseModel, Field
|
|
|
|
from ._common import ErrorCode, Timer, make_error, make_result
|
|
from ._session import BrowserNotAvailableError, get_current_session, get_page, no_browser_error
|
|
|
|
LOG = structlog.get_logger(__name__)
|
|
|
|
_STATELESS_TAB_MSG = (
|
|
"Tab management tools that rely on persisted state (switch, close, wait_for_new) "
|
|
"are not supported in stateless HTTP mode. Use stdio transport (Claude Code, gstack)."
|
|
)
|
|
_STATELESS_TAB_HINT = "Connect via stdio transport: `skyvern mcp` (default)."
|
|
|
|
|
|
class TabInfo(BaseModel):
|
|
"""Typed descriptor for a browser tab.
|
|
|
|
NOTE: tab_id uses id(page) which can be reused after GC. A UUID-based
|
|
tab ID scheme is planned as a follow-up to eliminate this class of issue.
|
|
"""
|
|
|
|
tab_id: str
|
|
index: int
|
|
url: str
|
|
title: str = ""
|
|
is_active: bool
|
|
|
|
|
|
def _tab_info(page: Any, *, index: int, is_active: bool) -> TabInfo:
|
|
"""Build a TabInfo from a raw Playwright Page (sync — title left empty)."""
|
|
return TabInfo(
|
|
tab_id=str(id(page)),
|
|
index=index,
|
|
url=page.url,
|
|
is_active=is_active,
|
|
)
|
|
|
|
|
|
async def _tab_info_with_title(page: Any, *, index: int, is_active: bool) -> TabInfo:
|
|
info = _tab_info(page, index=index, is_active=is_active)
|
|
try:
|
|
info.title = await page.title()
|
|
except Exception:
|
|
pass # title defaults to ""
|
|
return info
|
|
|
|
|
|
def _resolve_tab(
|
|
pages: list[Any],
|
|
*,
|
|
tab_id: str | None = None,
|
|
index: int | None = None,
|
|
) -> Any | None:
|
|
"""Find a page by tab_id (id(page)) or index. Returns None if not found or closed."""
|
|
if tab_id is not None:
|
|
for p in pages:
|
|
if str(id(p)) == tab_id:
|
|
return None if p.is_closed() else p
|
|
return None
|
|
if index is not None:
|
|
if 0 <= index < len(pages):
|
|
p = pages[index]
|
|
return None if p.is_closed() else p
|
|
return None
|
|
return None
|
|
|
|
|
|
async def skyvern_tab_list(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
) -> dict[str, Any]:
|
|
"""List all open browser tabs with their URLs, titles, and active status.
|
|
|
|
Returns an array of tabs, each with tab_id (session-scoped identifier for switching),
|
|
index (position), url, title, and is_active flag.
|
|
"""
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_tab_list", ok=False, error=no_browser_error())
|
|
|
|
state = get_current_session()
|
|
browser = state.browser
|
|
if browser is None:
|
|
return make_result("skyvern_tab_list", ok=False, error=no_browser_error())
|
|
|
|
raw_pages = browser._browser_context.pages
|
|
active_page = page.page # The raw Playwright Page currently active
|
|
|
|
tabs = []
|
|
for i, p in enumerate(raw_pages):
|
|
tabs.append(await _tab_info_with_title(p, index=i, is_active=(p is active_page)))
|
|
|
|
return make_result(
|
|
"skyvern_tab_list",
|
|
browser_context=ctx,
|
|
data={
|
|
"tabs": [t.model_dump() for t in tabs],
|
|
"count": len(tabs),
|
|
"active_tab_id": str(id(active_page)),
|
|
},
|
|
)
|
|
|
|
|
|
async def skyvern_tab_new(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
url: Annotated[
|
|
str | None, Field(description="URL to navigate to in the new tab. Opens about:blank if omitted.")
|
|
] = None,
|
|
) -> dict[str, Any]:
|
|
"""Open a new browser tab. Optionally navigate to a URL. The new tab becomes the active tab.
|
|
|
|
Use skyvern_tab_switch to go back to a previous tab.
|
|
"""
|
|
try:
|
|
_, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_tab_new", ok=False, error=no_browser_error())
|
|
|
|
state = get_current_session()
|
|
browser = state.browser
|
|
if browser is None:
|
|
return make_result("skyvern_tab_new", ok=False, error=no_browser_error())
|
|
|
|
prev_active = state._active_page
|
|
new_page = None
|
|
with Timer() as timer:
|
|
try:
|
|
new_page = await browser._browser_context.new_page()
|
|
state._active_page = new_page
|
|
# New tab has no iframes yet — clear stale frame reference
|
|
state._working_frame = None
|
|
# Drain the event that _on_new_page() buffered for this explicitly
|
|
# created page, so tab_wait_for_new doesn't return it as a popup.
|
|
state._page_events = deque(
|
|
(e for e in state._page_events if e["page"] is not new_page),
|
|
maxlen=state._page_events.maxlen,
|
|
)
|
|
timer.mark("new_page")
|
|
|
|
if url:
|
|
await new_page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
|
timer.mark("navigate")
|
|
except Exception as e:
|
|
# Clean up the orphan tab and restore the previous active page
|
|
try:
|
|
state._active_page = prev_active
|
|
if new_page is not None:
|
|
await new_page.close()
|
|
except Exception:
|
|
pass
|
|
return make_result(
|
|
"skyvern_tab_new",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check URL or browser state"),
|
|
)
|
|
|
|
pages = browser._browser_context.pages
|
|
index = pages.index(new_page) if new_page in pages else len(pages) - 1
|
|
tab = await _tab_info_with_title(new_page, index=index, is_active=True)
|
|
|
|
return make_result(
|
|
"skyvern_tab_new",
|
|
browser_context=ctx,
|
|
data=tab.model_dump(),
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_tab_switch(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
tab_id: Annotated[str | None, Field(description="Tab ID from skyvern_tab_list to switch to")] = None,
|
|
index: Annotated[int | None, Field(description="Tab index (0-based) to switch to")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Switch the active browser tab. All subsequent browser tools will operate on this tab.
|
|
|
|
Provide either tab_id (from skyvern_tab_list) or index (0-based position).
|
|
Use skyvern_tab_list first to see available tabs and their IDs.
|
|
"""
|
|
from skyvern.cli.core.session_manager import is_stateless_http_mode
|
|
|
|
if is_stateless_http_mode():
|
|
return make_result(
|
|
"skyvern_tab_switch",
|
|
ok=False,
|
|
error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_TAB_MSG, _STATELESS_TAB_HINT),
|
|
)
|
|
|
|
if tab_id is None and index is None:
|
|
return make_result(
|
|
"skyvern_tab_switch",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide tab_id or index",
|
|
"Use skyvern_tab_list to see available tabs, then pass tab_id or index",
|
|
),
|
|
)
|
|
|
|
try:
|
|
_, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_tab_switch", ok=False, error=no_browser_error())
|
|
|
|
state = get_current_session()
|
|
browser = state.browser
|
|
if browser is None:
|
|
return make_result("skyvern_tab_switch", ok=False, error=no_browser_error())
|
|
|
|
raw_pages = browser._browser_context.pages
|
|
target = _resolve_tab(raw_pages, tab_id=tab_id, index=index)
|
|
|
|
if target is None:
|
|
return make_result(
|
|
"skyvern_tab_switch",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Tab not found: tab_id={tab_id}, index={index}",
|
|
"Use skyvern_tab_list to see available tabs",
|
|
),
|
|
)
|
|
|
|
state._active_page = target
|
|
# Switching tabs invalidates any iframe frame reference from the old tab
|
|
state._working_frame = None
|
|
|
|
# bring_to_front is a no-op in headless but helps in headed mode
|
|
try:
|
|
await target.bring_to_front()
|
|
except Exception:
|
|
pass
|
|
|
|
tab_index = raw_pages.index(target) if target in raw_pages else 0
|
|
tab = await _tab_info_with_title(target, index=tab_index, is_active=True)
|
|
|
|
return make_result(
|
|
"skyvern_tab_switch",
|
|
browser_context=ctx,
|
|
data=tab.model_dump(),
|
|
)
|
|
|
|
|
|
async def skyvern_tab_close(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
tab_id: Annotated[str | None, Field(description="Tab ID to close. Closes active tab if omitted.")] = None,
|
|
index: Annotated[int | None, Field(description="Tab index (0-based) to close.")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Close a browser tab. Closes the active tab if no tab_id or index is given.
|
|
|
|
If the last tab is closed, a new blank tab is created automatically.
|
|
If the active tab is closed, the most recent remaining tab becomes active.
|
|
"""
|
|
from skyvern.cli.core.session_manager import is_stateless_http_mode
|
|
|
|
if is_stateless_http_mode():
|
|
return make_result(
|
|
"skyvern_tab_close",
|
|
ok=False,
|
|
error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_TAB_MSG, _STATELESS_TAB_HINT),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_tab_close", ok=False, error=no_browser_error())
|
|
|
|
state = get_current_session()
|
|
browser = state.browser
|
|
if browser is None:
|
|
return make_result("skyvern_tab_close", ok=False, error=no_browser_error())
|
|
|
|
raw_pages = browser._browser_context.pages
|
|
|
|
if tab_id is not None or index is not None:
|
|
target = _resolve_tab(raw_pages, tab_id=tab_id, index=index)
|
|
if target is None:
|
|
return make_result(
|
|
"skyvern_tab_close",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Tab not found: tab_id={tab_id}, index={index}",
|
|
"Use skyvern_tab_list to see available tabs",
|
|
),
|
|
)
|
|
else:
|
|
target = page.page # Close the active tab
|
|
|
|
target_id = id(target)
|
|
closed_tab_id = str(target_id)
|
|
closing_active = target is page.page
|
|
|
|
try:
|
|
await target.close()
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_tab_close",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Tab may already be closed"),
|
|
)
|
|
|
|
# Clear active page — get_working_page() will lazily pick the last remaining page
|
|
if closing_active or (state._active_page is not None and state._active_page is target):
|
|
state._active_page = None
|
|
# Closed tab's frame reference is no longer valid
|
|
state._working_frame = None
|
|
|
|
# Clean up inspection hooks for the closed page
|
|
state._hooked_page_ids.discard(target_id)
|
|
state._hooked_handlers_map.pop(target_id, None)
|
|
|
|
remaining = len(browser._browser_context.pages)
|
|
|
|
return make_result(
|
|
"skyvern_tab_close",
|
|
browser_context=ctx,
|
|
data={
|
|
"closed_tab_id": closed_tab_id,
|
|
"remaining_tabs": remaining,
|
|
},
|
|
)
|
|
|
|
|
|
async def skyvern_tab_wait_for_new(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
timeout_ms: Annotated[
|
|
int,
|
|
Field(description="Max time to wait for a new tab in ms. Default 30000 (30s)", ge=1000, le=120000),
|
|
] = 30000,
|
|
) -> dict[str, Any]:
|
|
"""Wait for a new browser tab to open (popup, target=_blank link, window.open).
|
|
|
|
Checks the event buffer first — if a new tab already opened, returns it immediately.
|
|
Returns one tab per call. If multiple popups may open, call repeatedly to drain them.
|
|
Does NOT auto-switch to the new tab. Use skyvern_tab_switch after if desired.
|
|
"""
|
|
from skyvern.cli.core.session_manager import is_stateless_http_mode
|
|
|
|
if is_stateless_http_mode():
|
|
return make_result(
|
|
"skyvern_tab_wait_for_new",
|
|
ok=False,
|
|
error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_TAB_MSG, _STATELESS_TAB_HINT),
|
|
)
|
|
|
|
try:
|
|
_, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_tab_wait_for_new", ok=False, error=no_browser_error())
|
|
|
|
state = get_current_session()
|
|
browser = state.browser
|
|
if browser is None:
|
|
return make_result("skyvern_tab_wait_for_new", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
# Check event buffer first — popup may have already opened.
|
|
# Drain closed pages so we don't miss valid events behind them.
|
|
while state._page_events:
|
|
event = state._page_events.popleft()
|
|
raw_page = event["page"]
|
|
if not raw_page.is_closed():
|
|
pages = browser._browser_context.pages
|
|
idx = pages.index(raw_page) if raw_page in pages else -1
|
|
tab = await _tab_info_with_title(raw_page, index=idx, is_active=False)
|
|
timer.mark("from_buffer")
|
|
return make_result(
|
|
"skyvern_tab_wait_for_new",
|
|
browser_context=ctx,
|
|
data=tab.model_dump(),
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
# Wait for a new page event
|
|
try:
|
|
new_page = await asyncio.wait_for(
|
|
_wait_for_page_event(state),
|
|
timeout=timeout_ms / 1000.0,
|
|
)
|
|
timer.mark("waited")
|
|
except asyncio.TimeoutError:
|
|
return make_result(
|
|
"skyvern_tab_wait_for_new",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
ErrorCode.TIMEOUT,
|
|
f"No new tab opened within {timeout_ms}ms",
|
|
"Ensure the page action that opens a new tab has been triggered first",
|
|
),
|
|
)
|
|
|
|
pages = browser._browser_context.pages
|
|
idx = pages.index(new_page) if new_page in pages else -1
|
|
tab = await _tab_info_with_title(new_page, index=idx, is_active=False)
|
|
|
|
return make_result(
|
|
"skyvern_tab_wait_for_new",
|
|
browser_context=ctx,
|
|
data=tab.model_dump(),
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def _wait_for_page_event(state: Any) -> Any:
|
|
"""Wait for a new page event using asyncio.Event for near-instant response."""
|
|
while True:
|
|
# Clear BEFORE draining the queue to prevent lost wakeups: if _on_new_page
|
|
# fires between the drain and the clear, the set() lands after the clear
|
|
# and the next iteration catches the event.
|
|
state._page_event_signal.clear()
|
|
while state._page_events:
|
|
event = state._page_events.popleft()
|
|
raw_page = event["page"]
|
|
if not raw_page.is_closed():
|
|
return raw_page
|
|
await state._page_event_signal.wait()
|