diff --git a/.env.example b/.env.example index 8871a2dff..3405d14b4 100644 --- a/.env.example +++ b/.env.example @@ -110,6 +110,13 @@ PORT=8000 # ANALYTICS_ID: Distinct analytics ID (a UUID is generated if left blank). ANALYTICS_ID="anonymous" +# LAMINAR +# Skyvern's backend runs on port 8000 by default. Consider updating your self-hosted laminar to env vars to avoid conflicts +# LMNR_HTTP_PORT=8010 +# LMNR_GRPC_PORT=8011 +# LMNR_BASE_URL=http://localhost +# LMNR_PROJECT_API_KEY= + # 1Password Integration # OP_SERVICE_ACCOUNT_TOKEN: API token for 1Password integration OP_SERVICE_ACCOUNT_TOKEN="" diff --git a/docs/debugging/observability-with-laminar.mdx b/docs/debugging/observability-with-laminar.mdx index 1d0e876e5..173682126 100644 --- a/docs/debugging/observability-with-laminar.mdx +++ b/docs/debugging/observability-with-laminar.mdx @@ -184,9 +184,18 @@ If you're running Skyvern on your own infrastructure, add these to your server's LMNR_PROJECT_API_KEY=your-laminar-api-key ``` -Skyvern's server includes a built-in `LaminarTrace` integration that initializes Laminar with the LiteLLM callback, capturing every LLM call, token count, and cost. It disables the automatic Skyvern/Patchright instrumentors (to avoid conflicts) and uses Laminar's `@observe` decorator on internal methods instead. +Skyvern's server initializes Laminar at startup, which auto-instruments LiteLLM to capture every LLM call, token count, and cost. No manual callback setup is needed. -No code changes needed — once the env var is set, traces appear in your Laminar project automatically. +If you're running a self-hosted Laminar instance, also set the base URL and ports: + +```bash .env +LMNR_PROJECT_API_KEY=your-laminar-api-key +LMNR_BASE_URL=http://localhost +LMNR_GRPC_PORT=8011 +LMNR_HTTP_PORT=8010 +``` + +No code changes needed — once the env vars are set, traces appear in your Laminar project automatically. --- diff --git a/fern/observability/overview.mdx b/fern/observability/overview.mdx index ebb3dc7ef..b51488404 100644 --- a/fern/observability/overview.mdx +++ b/fern/observability/overview.mdx @@ -18,24 +18,18 @@ Copy of this guide is available in the [Laminar documentation](https://docs.lmnr ## Quickstart -To trace Skyvern workflows with Laminar, **initialize Laminar and configure LiteLLM callbacks at the top of your project**. This will automatically capture all LLM calls, browser session recordings, and workflow execution details. +To trace Skyvern workflows with Laminar, **initialize Laminar at the top of your project**. This will automatically instrument LiteLLM and capture all LLM calls, browser session recordings, and workflow execution details. No manual callback setup is needed. -```python {3-4} {8-12} {14-15} +```python {3} {7-8} from skyvern import Skyvern import asyncio -import litellm -from lmnr import Laminar, LaminarLiteLLMCallback, Instruments +from lmnr import Laminar from dotenv import load_dotenv load_dotenv() -# Initialize Laminar -# This will automatically trace all Skyvern functions -# Disable OpenAI to avoid double instrumentation of LLM calls -Laminar.initialize(disabled_instruments=set([Instruments.OPENAI])) - -# Configure LiteLLM to trace all LLM calls made by Skyvern -litellm.callbacks = [LaminarLiteLLMCallback()] +# Initialize Laminar — automatically instruments LiteLLM +Laminar.initialize() skyvern = Skyvern(api_key="YOUR_API_KEY") @@ -49,6 +43,10 @@ if __name__ == "__main__": asyncio.run(main()) ``` + +`LaminarLiteLLMCallback` and manually setting `litellm.callbacks` are deprecated. `Laminar.initialize()` handles LiteLLM instrumentation automatically. + + ## Viewing Traces You can view traces in the Laminar UI by navigating to the traces tab in your project. When you select a trace, you can see: diff --git a/fern/openapi/skyvern_openapi.json b/fern/openapi/skyvern_openapi.json index 2627a9d2a..5a64fddc3 100644 --- a/fern/openapi/skyvern_openapi.json +++ b/fern/openapi/skyvern_openapi.json @@ -3907,6 +3907,69 @@ } } } + }, + "/v1/sdk/run_action": { + "post": { + "tags": [ + "SDK" + ], + "summary": "Run an SDK action", + "description": "Execute a single SDK action with the specified parameters", + "operationId": "run_sdk_action_v1_sdk_run_action_post", + "parameters": [ + { + "name": "x-api-key", + "in": "header", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.", + "title": "X-Api-Key" + }, + "description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings." + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunSdkActionRequest" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunSdkActionResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "x-fern-sdk-method-name": "run_sdk_action" + } } }, "components": { @@ -4015,6 +4078,41 @@ ], "title": "AWSSecretParameterYAML" }, + "ActAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_act", + "title": "Type", + "default": "ai_act" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "Natural language prompt for the action", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + } + }, + "type": "object", + "title": "ActAction", + "description": "AI act action parameters." + }, "Action": { "properties": { "action_type": { @@ -6353,6 +6451,60 @@ "title": "BrowserSessionResponse", "description": "Response model for browser session information." }, + "ClickAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_click", + "title": "Type", + "default": "ai_click" + }, + "selector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selector", + "description": "CSS selector for the element", + "default": "" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "The intention or goal of the click", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + }, + "timeout": { + "type": "number", + "title": "Timeout", + "description": "Timeout in milliseconds", + "default": 10000 + } + }, + "type": "object", + "title": "ClickAction", + "description": "Click action parameters." + }, "ClickContext": { "properties": { "thought": { @@ -7742,6 +7894,88 @@ ], "title": "Extensions" }, + "ExtractAction": { + "properties": { + "type": { + "type": "string", + "const": "extract", + "title": "Type", + "default": "extract" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "Extraction prompt", + "default": "" + }, + "extract_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "items": {}, + "type": "array" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Extract Schema", + "description": "Schema for extraction" + }, + "error_code_mapping": { + "anyOf": [ + { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Error Code Mapping", + "description": "Error code mapping for extraction" + }, + "intention": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Intention", + "description": "The intention or goal of the extraction" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + } + }, + "type": "object", + "title": "ExtractAction", + "description": "Extract data action parameters." + }, "ExtractionBlock": { "properties": { "label": { @@ -10776,6 +11010,97 @@ "type": "object", "title": "InputOrSelectContext" }, + "InputTextAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_input_text", + "title": "Type", + "default": "ai_input_text" + }, + "selector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selector", + "description": "CSS selector for the element", + "default": "" + }, + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Value", + "description": "Value to input", + "default": "" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "The intention or goal of the input", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + }, + "totp_identifier": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Totp Identifier", + "description": "TOTP identifier for input_text actions" + }, + "totp_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Totp Url", + "description": "TOTP URL for input_text actions" + }, + "timeout": { + "type": "number", + "title": "Timeout", + "description": "Timeout in milliseconds", + "default": 10000 + } + }, + "type": "object", + "title": "InputTextAction", + "description": "Input text action parameters." + }, "JinjaBranchCriteria": { "properties": { "criteria_type": { @@ -10807,6 +11132,25 @@ "title": "JinjaBranchCriteria", "description": "Jinja2-templated branch criteria (only supported criteria type for now)." }, + "LocateElementAction": { + "properties": { + "type": { + "type": "string", + "const": "locate_element", + "title": "Type", + "default": "locate_element" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "Natural language prompt to locate an element", + "default": "" + } + }, + "type": "object", + "title": "LocateElementAction", + "description": "Locate element action parameters." + }, "LoginBlock": { "properties": { "label": { @@ -12859,6 +13203,53 @@ ], "title": "PrintPageBlockYAML" }, + "PromptAction": { + "properties": { + "type": { + "type": "string", + "const": "prompt", + "title": "Type", + "default": "prompt" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "The prompt to send to the LLM" + }, + "response_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "Optional JSON schema to structure the response" + }, + "model": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "Optional model configuration" + } + }, + "type": "object", + "required": [ + "prompt" + ], + "title": "PromptAction", + "description": "Prompt action parameters." + }, "PromptBranchCriteria": { "properties": { "criteria_type": { @@ -12952,6 +13343,130 @@ ], "title": "RunEngine" }, + "RunSdkActionRequest": { + "properties": { + "url": { + "type": "string", + "title": "Url", + "description": "The URL where the action should be executed" + }, + "browser_session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Browser Session Id", + "description": "The browser session ID" + }, + "browser_address": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Browser Address", + "description": "The browser address" + }, + "workflow_run_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Workflow Run Id", + "description": "Optional workflow run ID to continue an existing workflow run" + }, + "action": { + "oneOf": [ + { + "$ref": "#/components/schemas/ClickAction" + }, + { + "$ref": "#/components/schemas/InputTextAction" + }, + { + "$ref": "#/components/schemas/SelectOptionAction" + }, + { + "$ref": "#/components/schemas/UploadFileAction" + }, + { + "$ref": "#/components/schemas/ActAction" + }, + { + "$ref": "#/components/schemas/ExtractAction" + }, + { + "$ref": "#/components/schemas/LocateElementAction" + }, + { + "$ref": "#/components/schemas/ValidateAction" + }, + { + "$ref": "#/components/schemas/PromptAction" + } + ], + "title": "Action", + "description": "The action to execute with its specific parameters", + "discriminator": { + "propertyName": "type", + "mapping": { + "ai_act": "#/components/schemas/ActAction", + "ai_click": "#/components/schemas/ClickAction", + "ai_input_text": "#/components/schemas/InputTextAction", + "ai_select_option": "#/components/schemas/SelectOptionAction", + "ai_upload_file": "#/components/schemas/UploadFileAction", + "extract": "#/components/schemas/ExtractAction", + "locate_element": "#/components/schemas/LocateElementAction", + "prompt": "#/components/schemas/PromptAction", + "validate": "#/components/schemas/ValidateAction" + } + } + } + }, + "type": "object", + "required": [ + "url", + "action" + ], + "title": "RunSdkActionRequest", + "description": "Request to run a single SDK action." + }, + "RunSdkActionResponse": { + "properties": { + "workflow_run_id": { + "type": "string", + "title": "Workflow Run Id", + "description": "The workflow run ID used for this action" + }, + "result": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "title": "Result", + "description": "The result from the action (e.g., selector, value, extracted data)" + } + }, + "type": "object", + "required": [ + "workflow_run_id" + ], + "title": "RunSdkActionResponse", + "description": "Response from running an SDK action." + }, "RunStatus": { "type": "string", "enum": [ @@ -13178,6 +13693,73 @@ "type": "object", "title": "SelectOption" }, + "SelectOptionAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_select_option", + "title": "Type", + "default": "ai_select_option" + }, + "selector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selector", + "description": "CSS selector for the element", + "default": "" + }, + "value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Value", + "description": "Value to select", + "default": "" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "The intention or goal of the selection", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + }, + "timeout": { + "type": "number", + "title": "Timeout", + "description": "Timeout in milliseconds", + "default": 10000 + } + }, + "type": "object", + "title": "SelectOptionAction", + "description": "Select option action parameters." + }, "SendEmailBlock": { "properties": { "label": { @@ -15609,6 +16191,73 @@ "title": "UpdateWorkflowFolderRequest", "description": "Request model for updating a workflow's folder assignment" }, + "UploadFileAction": { + "properties": { + "type": { + "type": "string", + "const": "ai_upload_file", + "title": "Type", + "default": "ai_upload_file" + }, + "selector": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Selector", + "description": "CSS selector for the element", + "default": "" + }, + "file_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "File Url", + "description": "File URL for upload", + "default": "" + }, + "intention": { + "type": "string", + "title": "Intention", + "description": "The intention or goal of the upload", + "default": "" + }, + "data": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Data", + "description": "Additional context data" + }, + "timeout": { + "type": "number", + "title": "Timeout", + "description": "Timeout in milliseconds", + "default": 10000 + } + }, + "type": "object", + "title": "UploadFileAction", + "description": "Upload file action parameters." + }, "UploadFileResponse": { "properties": { "s3_uri": { @@ -16144,6 +16793,40 @@ ], "title": "UserDefinedError" }, + "ValidateAction": { + "properties": { + "type": { + "type": "string", + "const": "validate", + "title": "Type", + "default": "validate" + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "Validation criteria or condition to check" + }, + "model": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Model", + "description": "Optional model configuration" + } + }, + "type": "object", + "required": [ + "prompt" + ], + "title": "ValidateAction", + "description": "Validate action parameters." + }, "ValidationBlock": { "properties": { "label": { diff --git a/pyproject.toml b/pyproject.toml index b1984da93..1c2741deb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "skyvern" -version = "1.0.28" +version = "1.0.29" description = "" authors = [{ name = "Skyvern AI", email = "info@skyvern.com" }] requires-python = ">=3.11,<3.14" diff --git a/skyvern-frontend/src/routes/credentials/CredentialsModal.tsx b/skyvern-frontend/src/routes/credentials/CredentialsModal.tsx index 0cdfe4a97..5f14321f8 100644 --- a/skyvern-frontend/src/routes/credentials/CredentialsModal.tsx +++ b/skyvern-frontend/src/routes/credentials/CredentialsModal.tsx @@ -753,10 +753,10 @@ function CredentialsModal({ ? updateCredentialMutation : createCredentialMutation; - const handleRenameOnly = (name: string, hasMetadataChanges: boolean) => { + const handleRenameOnly = (name: string) => { if (!editingCredential) return; - // Skip the API call if nothing actually changed - if (name === editingCredential.name && !hasMetadataChanges) { + // Skip the API call if the name hasn't actually changed + if (name === editingCredential.name) { reset(); setIsOpen(false); return; @@ -764,9 +764,6 @@ function CredentialsModal({ renameCredentialMutation.mutate({ id: editingCredential.credential_id, name, - tested_url: testUrl.trim() || undefined, - user_context: userContext.trim() || null, - save_browser_session_intent: testAndSave, }); }; @@ -788,26 +785,14 @@ function CredentialsModal({ // In edit mode, use editingGroups to determine what changed (type-agnostic) if (isEditMode && editingCredential) { - const hasMetadataChanges = - testUrl.trim() !== (editingCredential.tested_url ?? "") || - testAndSave !== - (editingCredential.save_browser_session_intent ?? - !!editingCredential.browser_profile_id) || - userContext.trim() !== (editingCredential.user_context ?? ""); - if (!editingGroups.name && !editingGroups.values) { - if (!hasMetadataChanges) { - // Nothing was edited — close silently - reset(); - setIsOpen(false); - return; - } - // Only metadata changed (no auth values edited) — save via PATCH - handleRenameOnly(name, hasMetadataChanges); + // Nothing was edited — close silently + reset(); + setIsOpen(false); return; } if (editingGroups.name && !editingGroups.values) { - handleRenameOnly(name, hasMetadataChanges); + handleRenameOnly(name); return; } } diff --git a/skyvern/cli/commands/browser.py b/skyvern/cli/commands/browser.py index 8ad64f954..d1b1c5e8d 100644 --- a/skyvern/cli/commands/browser.py +++ b/skyvern/cli/commands/browser.py @@ -20,11 +20,17 @@ from skyvern.cli.core.artifacts import save_artifact from skyvern.cli.core.browser_ops import ( do_act, do_extract, + do_find, do_frame_list, do_frame_main, do_frame_switch, + do_get_html, + do_get_styles, + do_get_value, do_navigate, do_screenshot, + do_state_load, + do_state_save, ) from skyvern.cli.core.client import get_skyvern from skyvern.cli.core.guards import ( @@ -42,12 +48,17 @@ from skyvern.cli.core.ngrok import check_ngrok_auth, detect_ngrok, offer_install from skyvern.cli.core.session_ops import do_session_close, do_session_create, do_session_list from skyvern.cli.mcp_tools.browser import skyvern_login as tool_login from skyvern.cli.mcp_tools.browser import skyvern_run_task as tool_run_task +from skyvern.cli.mcp_tools.inspection import skyvern_har_start, skyvern_har_stop browser_app = typer.Typer(help="Browser automation commands.", no_args_is_help=True) session_app = typer.Typer(help="Manage browser sessions.", no_args_is_help=True) frame_app = typer.Typer(help="Manage iframe context.", no_args_is_help=True) +state_app = typer.Typer(help="Save and load browser auth state.", no_args_is_help=True) +storage_app = typer.Typer(help="Read, write, and clear web storage.", no_args_is_help=True) browser_app.add_typer(session_app, name="session") browser_app.add_typer(frame_app, name="frame") +browser_app.add_typer(state_app, name="state") +browser_app.add_typer(storage_app, name="storage") @dataclass(frozen=True) @@ -1519,3 +1530,356 @@ def frame_list_cmd( output(data, action="frame_list", json_mode=json_output) except Exception as e: output_error(str(e), json_mode=json_output) + + +# ── State persistence commands ────────────────────────────────────── + + +@state_app.command("save") +def state_save_cmd( + file_path: str = typer.Argument(help="Path to save state file (JSON)."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Save browser auth state (cookies + localStorage + sessionStorage) to a file.""" + from skyvern.cli.mcp_tools.state import _validate_state_path + + async def _run() -> dict: + resolved = _validate_state_path(file_path) + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + result = await do_state_save(page.page, browser, resolved) + return { + "file_path": result.file_path, + "cookie_count": result.cookie_count, + "local_storage_count": result.local_storage_count, + "session_storage_count": result.session_storage_count, + "url": result.url, + } + + try: + data = asyncio.run(_run()) + output(data, action="state_save", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@state_app.command("load") +def state_load_cmd( + file_path: str = typer.Argument(help="Path to state file (JSON) from state save."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Load browser auth state (cookies + localStorage + sessionStorage) from a file.""" + from urllib.parse import urlparse + + from skyvern.cli.mcp_tools.state import _validate_state_path + + async def _run() -> dict: + resolved = _validate_state_path(file_path, must_exist=True) + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + current_domain = urlparse(page.page.url).hostname or "" + result = await do_state_load(page.page, browser, resolved, current_domain) + return { + "cookie_count": result.cookie_count, + "local_storage_count": result.local_storage_count, + "session_storage_count": result.session_storage_count, + "source_url": result.source_url, + "skipped_cookies": result.skipped_cookies, + } + + try: + data = asyncio.run(_run()) + output(data, action="state_load", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +# ── Web storage commands ──────────────────────────────────────────── + + +@storage_app.command("get-session") +def storage_get_session_cmd( + keys: list[str] | None = typer.Argument(None, help="Specific keys to retrieve. Omit for all."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Read sessionStorage values from the current page.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + if keys: + items = {} + for key in keys: + val = await page.page.evaluate(f"() => window.sessionStorage.getItem({json.dumps(key)})") + if val is not None: + items[key] = val + else: + items = await page.page.evaluate("() => Object.fromEntries(Object.entries(window.sessionStorage))") + return {"items": items, "count": len(items)} + + try: + data = asyncio.run(_run()) + output(data, action="get_session_storage", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@storage_app.command("set-session") +def storage_set_session_cmd( + key: str = typer.Argument(help="The key to set."), + value: str = typer.Argument(help="The value to store."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Set a sessionStorage key-value pair.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + await page.page.evaluate("(args) => window.sessionStorage.setItem(args[0], args[1])", [key, value]) + return {"key": key, "value_length": len(value)} + + try: + data = asyncio.run(_run()) + output(data, action="set_session_storage", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@storage_app.command("clear-session") +def storage_clear_session_cmd( + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Clear all sessionStorage entries.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + count = await page.page.evaluate( + "() => { const n = window.sessionStorage.length; window.sessionStorage.clear(); return n; }" + ) + return {"cleared_count": count} + + try: + data = asyncio.run(_run()) + output(data, action="clear_session_storage", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@storage_app.command("clear-local") +def storage_clear_local_cmd( + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Clear all localStorage entries.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + count = await page.page.evaluate( + "() => { const n = window.localStorage.length; window.localStorage.clear(); return n; }" + ) + return {"cleared_count": count} + + try: + data = asyncio.run(_run()) + output(data, action="clear_local_storage", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +# ── Page JS errors command ─────────────────────────────────────────── + + +@browser_app.command("get-errors") +def get_errors_cmd( + text: str | None = typer.Option(None, "--text", help="Filter by substring match (case-insensitive)."), + clear: bool = typer.Option(False, "--clear", help="Clear the buffer after reading."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Read uncaught JavaScript errors from the browser page.""" + from skyvern.cli.mcp_tools.inspection import skyvern_get_errors + + async def _run() -> dict: + return await skyvern_get_errors(text=text, clear=clear, session_id=session, cdp_url=cdp) + + try: + result = asyncio.run(_run()) + if result.get("ok"): + output(result["data"], action="get_errors", json_mode=json_output) + else: + output_error(result.get("error", {}).get("message", "Unknown error"), json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +# ── HAR recording commands ─────────────────────────────────────────── + + +@browser_app.command("har-start") +def har_start_cmd( + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Start recording network traffic in HAR format.""" + + async def _run() -> dict: + return await skyvern_har_start(session_id=session, cdp_url=cdp) + + try: + result = asyncio.run(_run()) + if result.get("ok"): + output(result["data"], action="har_start", json_mode=json_output) + else: + output_error(result.get("error", {}).get("message", "Unknown error"), json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@browser_app.command("har-stop") +def har_stop_cmd( + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Stop HAR recording and return captured traffic.""" + + async def _run() -> dict: + return await skyvern_har_stop(session_id=session, cdp_url=cdp) + + try: + result = asyncio.run(_run()) + if result.get("ok"): + output(result["data"], action="har_stop", json_mode=json_output) + else: + output_error(result.get("error", {}).get("message", "Unknown error"), json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +# ── DOM Inspection commands ────────────────────────────────────────── + + +@browser_app.command("get-html") +def get_html_cmd( + selector: str = typer.Argument(help="CSS or XPath selector for the element."), + outer: bool = typer.Option(False, "--outer", help="Return outerHTML instead of innerHTML."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Get the HTML content of a DOM element.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + html = await do_get_html(page.page, selector, outer=outer) + return {"html": html, "selector": selector, "outer": outer, "length": len(html)} + + try: + data = asyncio.run(_run()) + output(data, action="get_html", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@browser_app.command("get-value") +def get_value_cmd( + selector: str = typer.Argument(help="CSS or XPath selector for the input element."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Get the current value of a form input element.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + value = await do_get_value(page.page, selector) + return {"value": value, "selector": selector} + + try: + data = asyncio.run(_run()) + output(data, action="get_value", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +@browser_app.command("get-styles") +def get_styles_cmd( + selector: str = typer.Argument(help="CSS or XPath selector for the element."), + properties: list[str] | None = typer.Argument(None, help="Specific CSS properties (e.g. color font-size)."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Get computed CSS styles from a DOM element.""" + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + styles = await do_get_styles(page.page, selector, properties=properties) + return {"styles": styles, "selector": selector, "count": len(styles)} + + try: + data = asyncio.run(_run()) + output(data, action="get_styles", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) + + +# -- Semantic locator command -- + + +@browser_app.command("find") +def find_cmd( + by: str = typer.Argument(help="Locator type: role, text, label, placeholder, alt, testid."), + value: str = typer.Argument(help="The text/role/label to match."), + session: str | None = typer.Option(None, help="Browser session ID."), + cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."), + json_output: bool = typer.Option(False, "--json", help="Output as JSON."), +) -> None: + """Find elements using Playwright semantic locators (role, text, label, etc.).""" + from skyvern.cli.core.browser_ops import LOCATOR_TYPES + + if by not in LOCATOR_TYPES: + output_error( + f"Invalid locator type: {by!r}. Must be one of: {', '.join(sorted(LOCATOR_TYPES))}", json_mode=json_output + ) + raise typer.Exit(code=2) + + async def _run() -> dict: + connection = _resolve_connection(session, cdp) + browser = await _connect_browser(connection) + page = await browser.get_working_page() + result = await do_find(page, by=by, value=value) + return asdict(result) + + try: + data = asyncio.run(_run()) + output(data, action="find", json_mode=json_output) + except Exception as e: + output_error(str(e), json_mode=json_output) diff --git a/skyvern/cli/core/browser_ops.py b/skyvern/cli/core/browser_ops.py index e922593f7..e5af7a60b 100644 --- a/skyvern/cli/core/browser_ops.py +++ b/skyvern/cli/core/browser_ops.py @@ -7,7 +7,10 @@ Session resolution and output formatting are caller responsibilities. from __future__ import annotations import json +import os from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path from typing import Any from .guards import GuardError @@ -87,6 +90,48 @@ async def do_extract( return ExtractResult(extracted=extracted) +# -- Semantic locators -- + + +@dataclass +class FindResult: + selector: str + count: int + first_text: str | None + first_visible: bool + + +locator_map: dict[str, str] = { + "role": "get_by_role", + "text": "get_by_text", + "label": "get_by_label", + "placeholder": "get_by_placeholder", + "alt": "get_by_alt_text", + "testid": "get_by_test_id", +} + +LOCATOR_TYPES = frozenset(locator_map.keys()) + + +async def do_find(page: Any, by: str, value: str) -> FindResult: + """Locate elements using Playwright's semantic locator API.""" + if by not in locator_map: + raise GuardError( + f"Invalid locator type: {by!r}. Must be one of: {', '.join(sorted(LOCATOR_TYPES))}", + f"Use one of: {', '.join(sorted(LOCATOR_TYPES))}", + ) + locator = getattr(page, locator_map[by])(value) + count = await locator.count() + first_text = await locator.first.text_content() if count > 0 else None + first_visible = await locator.first.is_visible() if count > 0 else False + return FindResult( + selector=f"{locator_map[by]}({value!r})", + count=count, + first_text=first_text, + first_visible=first_visible, + ) + + # -- Frame operations -- @@ -131,3 +176,157 @@ def do_frame_main(page: Any) -> None: async def do_frame_list(page: Any) -> list[FrameInfo]: frames = await page.frame_list() return [FrameInfo(index=f["index"], name=f["name"], url=f["url"], is_main=f["is_main"]) for f in frames] + + +# -- Auth state persistence -- + + +@dataclass +class StateSaveResult: + file_path: str + cookie_count: int + local_storage_count: int + session_storage_count: int + url: str + + +@dataclass +class StateLoadResult: + cookie_count: int + local_storage_count: int + session_storage_count: int + source_url: str + skipped_cookies: int + + +def _cookie_domain_matches(cookie_domain: str, page_domain: str) -> bool: + """Check if a cookie's domain matches the current page domain per RFC 6265. + + Handles leading dots (wildcard subdomains). + Rejects suffix attacks: 'evil-example.com' must NOT match 'example.com'. + """ + if not cookie_domain or not page_domain: + return False + cd = cookie_domain.lstrip(".") + if not cd: + return False + return page_domain == cd or page_domain.endswith("." + cd) + + +async def do_state_save(page: Any, browser: Any, file_path: Path) -> StateSaveResult: + """Save browser auth state to a JSON file. + + ``page`` is the raw Playwright Page (not SkyvernBrowserPage). + ``browser`` is a SkyvernBrowser — cookies accessed via ``browser._browser_context``. + """ + pw_context = browser._browser_context + cookies = await pw_context.cookies() + local_storage = await page.evaluate("() => Object.fromEntries(Object.entries(window.localStorage))") + session_storage = await page.evaluate("() => Object.fromEntries(Object.entries(window.sessionStorage))") + + state = { + "version": 1, + "url": page.url, + "timestamp": datetime.now(timezone.utc).isoformat(), + "cookies": cookies, + "local_storage": local_storage, + "session_storage": session_storage, + } + + file_path.parent.mkdir(parents=True, exist_ok=True) + fd = os.open(str(file_path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as f: + json.dump(state, f, indent=2) + return StateSaveResult( + file_path=str(file_path), + cookie_count=len(cookies), + local_storage_count=len(local_storage), + session_storage_count=len(session_storage), + url=page.url, + ) + + +async def do_state_load( + page: Any, + browser: Any, + file_path: Path, + current_domain: str, +) -> StateLoadResult: + """Load browser auth state from a JSON file. + + Validates JSON schema version. Filters cookies to only apply those matching + ``current_domain`` to prevent cross-domain session injection. + """ + raw = file_path.read_text() + state = json.loads(raw) + if state.get("version") != 1: + raise ValueError(f"Unsupported state file version: {state.get('version')}") + + pw_context = browser._browser_context + + all_cookies = state.get("cookies", []) + safe_cookies = [c for c in all_cookies if _cookie_domain_matches(c.get("domain", ""), current_domain)] + skipped = len(all_cookies) - len(safe_cookies) + + if safe_cookies: + await pw_context.add_cookies(safe_cookies) + + local_storage = state.get("local_storage", {}) + for k, v in local_storage.items(): + await page.evaluate( + "(args) => window.localStorage.setItem(args[0], args[1])", + [k, v], + ) + + session_storage = state.get("session_storage", {}) + for k, v in session_storage.items(): + await page.evaluate( + "(args) => window.sessionStorage.setItem(args[0], args[1])", + [k, v], + ) + + return StateLoadResult( + cookie_count=len(safe_cookies), + local_storage_count=len(local_storage), + session_storage_count=len(session_storage), + source_url=state.get("url", ""), + skipped_cookies=skipped, + ) + + +# -- DOM inspection -- + + +async def do_get_html(page: Any, selector: str, outer: bool = False) -> str: + """Get innerHTML or outerHTML from an element. ``page`` is raw Playwright Page.""" + prop = "outerHTML" if outer else "innerHTML" + return await page.locator(selector).evaluate(f"el => el.{prop}") + + +async def do_get_value(page: Any, selector: str) -> str | None: + """Get the current value of a form input element.""" + return await page.locator(selector).input_value() + + +async def do_get_styles(page: Any, selector: str, properties: list[str] | None = None) -> dict[str, str]: + """Get computed CSS styles from an element.""" + if properties is not None: + if not properties: + return {} + return await page.locator(selector).evaluate( + """(el, props) => { + const styles = window.getComputedStyle(el); + return Object.fromEntries(props.map(p => [p, styles.getPropertyValue(p)])); + }""", + properties, + ) + return await page.locator(selector).evaluate( + """el => { + const styles = window.getComputedStyle(el); + const result = {}; + for (let i = 0; i < Math.min(styles.length, 100); i++) { + result[styles[i]] = styles.getPropertyValue(styles[i]); + } + return result; + }""" + ) diff --git a/skyvern/cli/core/session_manager.py b/skyvern/cli/core/session_manager.py index ca234edbf..f3ae3d69b 100644 --- a/skyvern/cli/core/session_manager.py +++ b/skyvern/cli/core/session_manager.py @@ -31,8 +31,10 @@ class SessionState: console_messages: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=1000)) network_requests: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=1000)) dialog_events: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=1000)) + page_errors: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=1000)) tracing_active: bool = False har_enabled: bool = False + _har_entries: deque[dict[str, Any]] = field(default_factory=lambda: deque(maxlen=5000)) # -- Active page tracking (tab management) -- _active_page: Page | None = None # -- Page event buffer for tab_wait_for_new -- diff --git a/skyvern/cli/mcp_tools/__init__.py b/skyvern/cli/mcp_tools/__init__.py index a4210e1f4..c0d94b056 100644 --- a/skyvern/cli/mcp_tools/__init__.py +++ b/skyvern/cli/mcp_tools/__init__.py @@ -19,6 +19,7 @@ from .browser import ( skyvern_evaluate, skyvern_extract, skyvern_file_upload, + skyvern_find, skyvern_frame_list, skyvern_frame_main, skyvern_frame_switch, @@ -48,7 +49,13 @@ from .folder import ( ) from .inspection import ( skyvern_console_messages, + skyvern_get_errors, + skyvern_get_html, + skyvern_get_styles, + skyvern_get_value, skyvern_handle_dialog, + skyvern_har_start, + skyvern_har_stop, skyvern_network_requests, ) from .prompts import build_workflow, debug_automation, extract_data, qa_test @@ -66,6 +73,13 @@ from .session import ( skyvern_browser_session_get, skyvern_browser_session_list, ) +from .state import skyvern_state_load, skyvern_state_save +from .storage import ( + skyvern_clear_local_storage, + skyvern_clear_session_storage, + skyvern_get_session_storage, + skyvern_set_session_storage, +) from .tabs import ( skyvern_tab_close, skyvern_tab_list, @@ -157,6 +171,9 @@ targeted test cases, open a browser against the dev server, and report pass/fail | "Switch to [tab]" / "Go to tab [N]" | skyvern_tab_switch | Change active tab | | "Close tab" / "Close this tab" | skyvern_tab_close | Close tab by ID or index | | "Wait for popup" / "A new tab should open" | skyvern_tab_wait_for_new | Waits for popup/new tab | +| "Save login state" / "Remember this session" | skyvern_state_save | Persists cookies + storage to file | +| "Restore login" / "Load saved state" | skyvern_state_load | Restores cookies + storage from file | +| "Find button" / "Locate element by role/text" | skyvern_find | Semantic locator: find by role, text, label, placeholder, alt, testid | ## Critical Rules 1. Use Skyvern for all browser tasks. curl/wget/requests are fine for APIs and file downloads. @@ -378,6 +395,7 @@ mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_scroll) mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_select_option) mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_press_key) mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_wait) +mcp.tool(tags={"browser_primitive"}, annotations=_RO)(skyvern_find) # -- Tab management (multi-tab) -- mcp.tool(tags={"tab_management"}, annotations=_RO)(skyvern_tab_list) @@ -391,10 +409,26 @@ mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_frame_switch) mcp.tool(tags={"browser_primitive"}, annotations=_MUT)(skyvern_frame_main) mcp.tool(tags={"browser_primitive"}, annotations=_RO)(skyvern_frame_list) -# -- Inspection tools (console, network, dialog) -- +# -- Auth state persistence -- +mcp.tool(tags={"state"}, annotations=_MUT)(skyvern_state_save) +mcp.tool(tags={"state"}, annotations=_MUT)(skyvern_state_load) + +# -- Inspection tools (console, network, dialog, page errors, DOM) -- mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_console_messages) mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_network_requests) mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_handle_dialog) +mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_get_errors) +mcp.tool(tags={"inspection"}, annotations=_MUT)(skyvern_har_start) +mcp.tool(tags={"inspection"}, annotations=_MUT)(skyvern_har_stop) +mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_get_html) +mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_get_value) +mcp.tool(tags={"inspection"}, annotations=_RO)(skyvern_get_styles) + +# -- Web storage (sessionStorage + localStorage) -- +mcp.tool(tags={"storage"}, annotations=_RO)(skyvern_get_session_storage) +mcp.tool(tags={"storage"}, annotations=_MUT)(skyvern_set_session_storage) +mcp.tool(tags={"storage"}, annotations=_DEST)(skyvern_clear_session_storage) +mcp.tool(tags={"storage"}, annotations=_DEST)(skyvern_clear_local_storage) # -- Block discovery + validation (no browser needed) -- mcp.tool(tags={"block_discovery"}, annotations=_RO)(skyvern_block_schema) @@ -463,6 +497,7 @@ __all__ = [ "skyvern_select_option", "skyvern_press_key", "skyvern_wait", + "skyvern_find", # Tab management "skyvern_tab_list", "skyvern_tab_new", @@ -473,10 +508,21 @@ __all__ = [ "skyvern_frame_switch", "skyvern_frame_main", "skyvern_frame_list", - # Inspection (console, network, dialog) + # Inspection (console, network, dialog, page errors, DOM) "skyvern_console_messages", "skyvern_network_requests", "skyvern_handle_dialog", + "skyvern_get_errors", + "skyvern_har_start", + "skyvern_har_stop", + "skyvern_get_html", + "skyvern_get_value", + "skyvern_get_styles", + # Web storage + "skyvern_get_session_storage", + "skyvern_set_session_storage", + "skyvern_clear_session_storage", + "skyvern_clear_local_storage", # Block discovery + validation "skyvern_block_schema", "skyvern_block_validate", @@ -506,6 +552,9 @@ __all__ = [ "skyvern_script_versions", "skyvern_script_fallback_episodes", "skyvern_script_deploy", + # Auth state persistence + "skyvern_state_save", + "skyvern_state_load", # Prompts "build_workflow", "debug_automation", diff --git a/skyvern/cli/mcp_tools/browser.py b/skyvern/cli/mcp_tools/browser.py index e26e9511f..935eb4db6 100644 --- a/skyvern/cli/mcp_tools/browser.py +++ b/skyvern/cli/mcp_tools/browser.py @@ -14,6 +14,7 @@ from pydantic import Field from skyvern.cli.core.browser_ops import ( do_act, do_extract, + do_find, do_frame_list, do_frame_main, do_frame_switch, @@ -1795,3 +1796,69 @@ async def skyvern_frame_list( }, timing_ms=timer.timing_ms, ) + + +async def skyvern_find( + by: Annotated[ + str, + Field(description="Locator type: role, text, label, placeholder, alt, testid"), + ], + value: Annotated[ + str, + Field(description="The text, role, label, placeholder, alt text, or test ID to match"), + ], + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, +) -> dict[str, Any]: + """Find elements using Playwright's semantic locator API. + + Locates elements by accessibility role, visible text, label, placeholder, alt text, or test ID. + Returns the match count, first element's text content, and visibility status. + Use this to verify elements exist before interacting with them, or to inspect element state. + + Locator types: + - role: ARIA role (button, link, heading, textbox, etc.) + - text: Visible text content + - label: Associated label text (for form inputs) + - placeholder: Placeholder attribute text + - alt: Alt text (for images) + - testid: data-testid attribute value + """ + try: + page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url) + except BrowserNotAvailableError: + return make_result("skyvern_find", ok=False, error=no_browser_error()) + + with Timer() as timer: + try: + result = await do_find(page, by=by, value=value) + timer.mark("find") + except GuardError as e: + return make_result( + "skyvern_find", + ok=False, + browser_context=ctx, + timing_ms=timer.timing_ms, + error=make_error(ErrorCode.INVALID_INPUT, str(e), e.hint), + ) + except Exception as e: + return make_result( + "skyvern_find", + ok=False, + browser_context=ctx, + timing_ms=timer.timing_ms, + error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check the locator type and value"), + ) + + return make_result( + "skyvern_find", + browser_context=ctx, + data={ + "selector": result.selector, + "count": result.count, + "first_text": result.first_text, + "first_visible": result.first_visible, + "sdk_equivalent": f"page.{result.selector}", + }, + timing_ms=timer.timing_ms, + ) diff --git a/skyvern/cli/mcp_tools/inspection.py b/skyvern/cli/mcp_tools/inspection.py index 60a9e7ac4..a66d4aa54 100644 --- a/skyvern/cli/mcp_tools/inspection.py +++ b/skyvern/cli/mcp_tools/inspection.py @@ -3,7 +3,9 @@ from __future__ import annotations import asyncio import re import time +from datetime import datetime, timedelta, timezone from typing import Annotated, Any +from urllib.parse import parse_qsl, urlparse import structlog from pydantic import Field @@ -32,6 +34,9 @@ _SECRET_QUERY_PARAMS = frozenset( } ) +_REDACTED_HEADERS = frozenset({"authorization", "cookie", "set-cookie", "proxy-authorization"}) +_SECRET_QS_NAMES = frozenset(p.lower() for p in _SECRET_QUERY_PARAMS) + _STATELESS_ERROR_MSG = ( "Inspection tools are not supported in stateless HTTP mode. " "Event buffers are not persisted across requests in this transport. " @@ -74,7 +79,7 @@ def _redact_url(url: str) -> str: def _make_page_handlers(state: Any, raw_page: Any) -> dict[str, Any]: - """Create console/network/dialog handlers bound to a specific page.""" + """Create console/network/dialog/pageerror handlers bound to a specific page.""" def _on_console(msg: Any) -> None: try: @@ -105,6 +110,10 @@ def _make_page_handlers(state: Any, raw_page: Any) -> dict[str, Any]: pass content_length = response.headers.get("content-length") + try: + response_size = int(content_length) if content_length is not None else None + except (ValueError, TypeError): + response_size = None state.network_requests.append( { "url": _redact_url(response.url), @@ -112,11 +121,72 @@ def _make_page_handlers(state: Any, raw_page: Any) -> dict[str, Any]: "status": response.status, "content_type": response.headers.get("content-type", ""), "timing_ms": round(timing, 1), - "response_size": int(content_length) if content_length is not None else None, + "response_size": response_size, "page_url": raw_page.url, "tab_id": str(id(raw_page)), } ) + + # HAR recording: capture enhanced entry when enabled + if state.har_enabled: + req_headers = [] + try: + for k, v in response.request.headers.items(): + if k.lower() not in _REDACTED_HEADERS: + req_headers.append({"name": k, "value": v}) + except Exception: + pass + + resp_headers = [] + try: + for k, v in response.headers.items(): + if k.lower() not in _REDACTED_HEADERS: + resp_headers.append({"name": k, "value": v}) + except Exception: + pass + + # Approximate request start from response time minus elapsed + started = datetime.now(timezone.utc) - timedelta(milliseconds=timing) + qs = [ + {"name": n, "value": "REDACTED" if n.lower() in _SECRET_QS_NAMES else v} + for n, v in parse_qsl(urlparse(response.url).query) + ] + + state._har_entries.append( + { + "startedDateTime": started.isoformat(), + "time": round(timing, 1), + "request": { + "method": response.request.method, + "url": _redact_url(response.url), + "httpVersion": "HTTP/1.1", + "headers": req_headers, + "queryString": qs, + "cookies": [], + "headersSize": -1, + "bodySize": -1, + }, + "response": { + "status": response.status, + "statusText": response.status_text if hasattr(response, "status_text") else "", + "httpVersion": "HTTP/1.1", + "headers": resp_headers, + "content": { + "size": response_size if response_size is not None else -1, + "mimeType": response.headers.get("content-type", ""), + }, + "redirectURL": "", + "headersSize": -1, + "bodySize": -1, + "cookies": [], + }, + "timings": { + "send": -1, + "wait": round(timing, 1), + "receive": -1, + }, + } + ) except Exception: pass @@ -146,7 +216,24 @@ def _make_page_handlers(state: Any, raw_page: Any) -> dict[str, Any]: else: event_record["action_taken"] = "dismissed" - return {"console": _on_console, "response": _on_response, "dialog": _on_dialog} + def _on_pageerror(error: Any) -> None: + try: + try: + message = str(error) + except Exception: + message = "" + state.page_errors.append( + { + "message": message, + "timestamp": time.time(), + "page_url": raw_page.url, + "tab_id": str(id(raw_page)), + } + ) + except Exception: + pass + + return {"console": _on_console, "response": _on_response, "dialog": _on_dialog, "pageerror": _on_pageerror} def _register_hooks_on_page(state: Any, raw_page: Any) -> None: @@ -159,6 +246,7 @@ def _register_hooks_on_page(state: Any, raw_page: Any) -> None: raw_page.on("console", handlers["console"]) raw_page.on("response", handlers["response"]) raw_page.on("dialog", handlers["dialog"]) + raw_page.on("pageerror", handlers["pageerror"]) state._hooked_page_ids.add(page_id) state._hooked_handlers_map[page_id] = handlers @@ -386,3 +474,306 @@ async def skyvern_handle_dialog( "count": len(entries), }, ) + + +# -- Page JS error tool -- + + +async def skyvern_get_errors( + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, + text: Annotated[ + str | None, + Field(description="Filter by substring match in error message. Case-insensitive."), + ] = None, + clear: Annotated[ + bool, + Field(description="Clear the buffer after reading. Default false."), + ] = False, +) -> dict[str, Any]: + """Read uncaught JavaScript errors (exceptions) from the browser page. + + Captures unhandled errors thrown by page scripts (window onerror / unhandledrejection). + These are distinct from console.error() messages — use skyvern_console_messages(level='error') for those. + Use text='...' to search for specific error messages. + """ + from skyvern.cli.core.session_manager import is_stateless_http_mode + + if is_stateless_http_mode(): + return make_result( + "skyvern_get_errors", + ok=False, + error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_ERROR_MSG, _STATELESS_HINT), + ) + + try: + page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url) + except BrowserNotAvailableError: + return make_result("skyvern_get_errors", ok=False, error=no_browser_error()) + + state = get_current_session() + has_filter = text is not None + entries = list(state.page_errors) + + if text: + text_lower = text.lower() + entries = [e for e in entries if text_lower in e.get("message", "").lower()] + + if clear: + if has_filter: + matched = {id(e) for e in entries} + state.page_errors = type(state.page_errors)( + (e for e in state.page_errors if id(e) not in matched), + maxlen=state.page_errors.maxlen, + ) + else: + state.page_errors.clear() + + return make_result( + "skyvern_get_errors", + browser_context=ctx, + data={ + "errors": entries, + "count": len(entries), + "buffer_size": len(state.page_errors), + }, + ) + + +# -- HAR recording tools -- + + +async def skyvern_har_start( + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, +) -> dict[str, Any]: + """Start recording network traffic in HAR format. + + All HTTP requests/responses will be captured until skyvern_har_stop is called. + The HAR buffer is cleared on start. Only one recording can be active at a time. + Use skyvern_har_stop to retrieve the HAR data. + """ + from skyvern.cli.core.session_manager import is_stateless_http_mode + + if is_stateless_http_mode(): + return make_result( + "skyvern_har_start", + ok=False, + error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_ERROR_MSG, _STATELESS_HINT), + ) + + try: + _, ctx = await get_page(session_id=session_id, cdp_url=cdp_url) + except BrowserNotAvailableError: + return make_result("skyvern_har_start", ok=False, error=no_browser_error()) + + state = get_current_session() + + if state.har_enabled: + return make_result( + "skyvern_har_start", + ok=False, + browser_context=ctx, + error=make_error( + ErrorCode.ACTION_FAILED, + "HAR recording is already active", + "Call skyvern_har_stop first to stop the current recording", + ), + ) + + state._har_entries.clear() + state.har_enabled = True + + return make_result( + "skyvern_har_start", + browser_context=ctx, + data={ + "recording": True, + "message": "HAR recording started. Network traffic is being captured.", + }, + ) + + +async def skyvern_har_stop( + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, +) -> dict[str, Any]: + """Stop HAR recording and return the captured traffic as HAR 1.2 JSON. + + Returns a complete HAR archive with all HTTP requests/responses captured since skyvern_har_start. + The HAR data can be imported into browser DevTools, Charles Proxy, or other HTTP analysis tools. + """ + from skyvern.cli.core.session_manager import is_stateless_http_mode + + if is_stateless_http_mode(): + return make_result( + "skyvern_har_stop", + ok=False, + error=make_error(ErrorCode.ACTION_FAILED, _STATELESS_ERROR_MSG, _STATELESS_HINT), + ) + + try: + _, ctx = await get_page(session_id=session_id, cdp_url=cdp_url) + except BrowserNotAvailableError: + return make_result("skyvern_har_stop", ok=False, error=no_browser_error()) + + state = get_current_session() + + if not state.har_enabled: + return make_result( + "skyvern_har_stop", + ok=False, + browser_context=ctx, + error=make_error( + ErrorCode.ACTION_FAILED, + "No active HAR recording", + "Call skyvern_har_start first to begin recording", + ), + ) + + entries = list(state._har_entries) + state.har_enabled = False + state._har_entries.clear() + + har = { + "log": { + "version": "1.2", + "creator": {"name": "Skyvern", "version": "1.0"}, + "pages": [], + "entries": entries, + }, + } + + return make_result( + "skyvern_har_stop", + browser_context=ctx, + data={ + "har": har, + "entry_count": len(entries), + }, + ) + + +# -- DOM inspection tools -- + + +async def skyvern_get_html( + selector: Annotated[str, Field(description="CSS or XPath selector for the element.")], + outer: Annotated[ + bool, + Field(description="If true, return outerHTML (includes the element itself). Default false (innerHTML)."), + ] = False, + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, +) -> dict[str, Any]: + """Get the HTML content of a DOM element. + + Returns innerHTML by default (children only). Set outer=true for outerHTML (includes the element tag). + Useful for inspecting page structure, checking rendered content, or debugging element contents. + """ + from skyvern.cli.core.browser_ops import do_get_html + + try: + page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url) + except BrowserNotAvailableError: + return make_result("skyvern_get_html", ok=False, error=no_browser_error()) + + try: + html = await do_get_html(page, selector, outer=outer) + return make_result( + "skyvern_get_html", + browser_context=ctx, + data={ + "html": html, + "selector": selector, + "outer": outer, + "length": len(html), + }, + ) + except Exception as e: + return make_result( + "skyvern_get_html", + ok=False, + browser_context=ctx, + error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check that the selector matches an element on the page"), + ) + + +async def skyvern_get_value( + selector: Annotated[str, Field(description="CSS or XPath selector for the input element.")], + session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None, + cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None, +) -> dict[str, Any]: + """Get the current value of a form input element. + + Works with ,