diff --git a/docs/integrations/cli.mdx b/docs/integrations/cli.mdx index a7f3ce53c..e0007d558 100644 --- a/docs/integrations/cli.mdx +++ b/docs/integrations/cli.mdx @@ -3,7 +3,6 @@ title: CLI & Skills subtitle: Automate browsers and manage workflows from the command line slug: going-to-production/cli --- -{/* This file (docs/) and fern/integrations/cli.mdx must stay in sync. docs/ is for Mintlify, fern/ is for Fern. */} The `skyvern` CLI gives you direct access to browser automation, workflow management, credential storage, and more — all from your terminal. Use it in shell scripts, CI/CD pipelines, or for quick one-off tasks. diff --git a/docs/integrations/mcp.mdx b/docs/integrations/mcp.mdx index 6548f121f..58bd7d869 100644 --- a/docs/integrations/mcp.mdx +++ b/docs/integrations/mcp.mdx @@ -3,7 +3,6 @@ title: MCP Server subtitle: Connect AI assistants to browser automation via Model Context Protocol slug: going-to-production/mcp --- -{/* This file (docs/) and fern/integrations/mcp.mdx must stay in sync. docs/ is for Mintlify, fern/ is for Fern. Minor formatting differences (Tip vs inline text, CardGroup) are OK since the platforms support different components. */} The Skyvern MCP server lets AI assistants like Claude Desktop, Claude Code, Codex, Cursor, and Windsurf control a browser. Your AI can fill out forms, extract data, download files, and run multi-step workflows, all through natural language. diff --git a/fern/integrations/cli.mdx b/fern/integrations/cli.mdx index 110ccdcc7..eb06d19b8 100644 --- a/fern/integrations/cli.mdx +++ b/fern/integrations/cli.mdx @@ -1,4 +1,3 @@ -{/* This file (fern/) and docs/integrations/cli.mdx must stay in sync. fern/ is for Fern, docs/ is for Mintlify. */} --- title: CLI & Skills subtitle: Automate browsers and manage workflows from the command line @@ -13,13 +12,21 @@ The `skyvern` CLI gives you direct access to browser automation, workflow manage pip install skyvern ``` -Set your API key: +Sign up and get your API key in one step: ```bash -export SKYVERN_API_KEY="YOUR_KEY" # get one at https://app.skyvern.com +skyvern signup ``` -Optionally run the interactive setup wizard to configure your environment: +This opens a browser, walks you through account creation (or login), and saves your API key locally. No manual copy-paste needed. + +Already have an API key? Set it directly: + +```bash +export SKYVERN_API_KEY="YOUR_KEY" +``` + +Optionally run the interactive setup wizard to configure your full environment (LLM provider, database, browser mode): ```bash skyvern init @@ -27,6 +34,14 @@ skyvern init ## Command reference +### Onboarding + +```bash +skyvern signup # Sign up / login via browser — saves API key automatically +skyvern init # Interactive setup wizard (LLM, database, browser) +skyvern quickstart # One-command setup + start +``` + ### Services ```bash @@ -84,6 +99,27 @@ skyvern browser login --url https://app.example.com --credential-id cred_xxx Every browser command supports `--json` for machine-readable output, and `--session` / `--cdp` to target a specific session. If omitted, the CLI uses the last active session automatically. +### Local browser serve + +Expose your local Chrome to Skyvern Cloud so tasks can access localhost, internal tools, and your existing login sessions. + +```bash +# Launch with ngrok tunnel and your Chrome profile's cookies/logins +skyvern browser serve --tunnel --use-local-profile + +# Use a specific Chrome profile +skyvern browser serve --tunnel --use-local-profile --chrome-profile-name "Profile 2" + +# Headless mode with JSON output (for scripting) +skyvern browser serve --tunnel --headless --json +``` + +The `--use-local-profile` flag clones cookies and saved passwords from your Chrome profile into the served browser. Your original profile is never modified, and it works while Chrome is open. + + +Always pass `--api-key` when using `--tunnel`. Without it, anyone with the ngrok URL has full browser control. + + ### Workflows ```bash @@ -135,7 +171,6 @@ skyvern setup codex # Register with Codex ```bash skyvern docs # Open documentation in your browser -skyvern quickstart # One-command setup + start skyvern init browser # Initialize browser configuration only ``` diff --git a/fern/integrations/mcp.mdx b/fern/integrations/mcp.mdx index 71f972e3b..8d3438af7 100644 --- a/fern/integrations/mcp.mdx +++ b/fern/integrations/mcp.mdx @@ -1,4 +1,3 @@ -{/* This file (fern/) and docs/integrations/mcp.mdx must stay in sync. fern/ is for Fern, docs/ is for Mintlify. Minor formatting differences (Tip vs inline text, CardGroup) are OK since the platforms support different components. */} --- title: MCP Server subtitle: Connect AI assistants to browser automation via Model Context Protocol @@ -9,16 +8,16 @@ The Skyvern MCP server lets AI assistants like Claude Desktop, Claude Code, Code ## What you can do -The MCP server exposes 33 tools across 6 categories: +The MCP server exposes 35 tools across 6 categories: | Category | Key tools | What they do | |----------|-----------|--------------| -| **Browser session management** | `skyvern_browser_session_create`, `skyvern_browser_session_close`, `skyvern_browser_session_list`, `skyvern_browser_session_connect` | Open, manage, and reuse browser sessions | -| **Browser actions** | `skyvern_act`, `skyvern_navigate`, `skyvern_click`, `skyvern_type`, `skyvern_scroll`, `skyvern_select_option`, `skyvern_press_key`, `skyvern_wait` | Control the browser with natural language or CSS/XPath selectors | -| **Data extraction** | `skyvern_extract`, `skyvern_screenshot`, `skyvern_evaluate` | Pull structured data from pages, capture screenshots, run JavaScript | -| **Validation** | `skyvern_validate` | Check page conditions using AI (returns true/false) | -| **Credentials** | `skyvern_credential_list`, `skyvern_credential_get`, `skyvern_credential_delete` | Look up stored credentials for login flows | -| **Workflows** | `skyvern_workflow_create`, `skyvern_workflow_run`, `skyvern_workflow_status`, `skyvern_workflow_get`, `skyvern_workflow_update`, `skyvern_workflow_delete` | Build and execute multi-step automations | +| **Browser session management** | `skyvern_browser_session_create`, `skyvern_browser_session_close`, `skyvern_browser_session_list`, `skyvern_browser_session_get`, `skyvern_browser_session_connect` | Open, manage, and reuse browser sessions — including connecting to a local browser via CDP | +| **Browser actions** | `skyvern_act`, `skyvern_navigate`, `skyvern_click`, `skyvern_type`, `skyvern_hover`, `skyvern_scroll`, `skyvern_select_option`, `skyvern_press_key`, `skyvern_wait` | Control the browser with natural language, CSS/XPath selectors, or both (hybrid mode) | +| **Data extraction** | `skyvern_extract`, `skyvern_screenshot`, `skyvern_evaluate` | Pull structured JSON from pages, capture screenshots, run JavaScript | +| **Validation** | `skyvern_validate` | Assert page conditions in natural language (returns true/false with reasoning) | +| **Credentials & login** | `skyvern_login`, `skyvern_credential_list`, `skyvern_credential_get`, `skyvern_credential_delete` | Log in with stored credentials (supports Skyvern vault, Bitwarden, 1Password, Azure Key Vault) with automatic 2FA/TOTP | +| **Workflows** | `skyvern_workflow_create`, `skyvern_workflow_run`, `skyvern_workflow_status`, `skyvern_workflow_get`, `skyvern_workflow_update`, `skyvern_workflow_delete`, `skyvern_workflow_cancel` | Build, run, and manage multi-step automations with 23 block types | Your AI assistant decides which tools to call based on your instructions. For example, asking "go to Hacker News and get the top post title" triggers `skyvern_browser_session_create`, `skyvern_navigate`, `skyvern_extract`, and `skyvern_browser_session_close` automatically. @@ -236,6 +235,30 @@ You can also run `skyvern init` to auto-detect installed clients and write confi +## Connect to your local browser + +You can point your AI assistant at a browser running on your machine instead of a cloud browser. This lets you automate internal tools on localhost, reuse existing login sessions, and verify local dev changes — all through the same MCP tools. + +Start a local browser with a tunnel so Skyvern Cloud can reach it: + +```bash +skyvern browser serve --tunnel --use-local-profile +``` + +This launches Chrome with your existing cookies and logins, and creates an ngrok tunnel. Your AI assistant can then connect to it: + +> "Connect to my local browser and check if the dashboard at localhost:3000 renders correctly" + +The `--use-local-profile` flag clones auth-relevant data from your default Chrome profile (cookies, saved passwords, local storage) so the browser starts pre-authenticated. Your original profile is never modified. + + +This is especially powerful during development: make a code change, then ask your AI assistant to navigate to `localhost:3000` and verify it works — fix code and validate in the same prompt. + + + +Always use `--api-key` when exposing your browser via tunnel. Without it, anyone with the ngrok URL has full browser control. + + ## Troubleshooting diff --git a/skyvern-frontend/src/components/ui/status-pill-variants.ts b/skyvern-frontend/src/components/ui/status-pill-variants.ts new file mode 100644 index 000000000..ae373b6a5 --- /dev/null +++ b/skyvern-frontend/src/components/ui/status-pill-variants.ts @@ -0,0 +1,14 @@ +import { cva } from "class-variance-authority"; + +const statusPillVariants = cva("flex items-center gap-1 rounded-sm px-2 py-1", { + variants: { + variant: { + neutral: "bg-slate-elevation5", + }, + }, + defaultVariants: { + variant: "neutral", + }, +}); + +export { statusPillVariants }; diff --git a/skyvern-frontend/src/components/ui/status-pill.tsx b/skyvern-frontend/src/components/ui/status-pill.tsx new file mode 100644 index 000000000..6818ec3c4 --- /dev/null +++ b/skyvern-frontend/src/components/ui/status-pill.tsx @@ -0,0 +1,27 @@ +import * as React from "react"; +import { type VariantProps } from "class-variance-authority"; +import { cn } from "@/util/utils"; +import { statusPillVariants } from "./status-pill-variants"; + +type StatusPillProps = React.HTMLAttributes & + VariantProps & { + icon?: React.ReactNode; + }; + +const StatusPill = React.forwardRef( + ({ icon, variant, className, children, ...props }, ref) => { + return ( +
+ {icon} + {children != null && {children}} +
+ ); + }, +); +StatusPill.displayName = "StatusPill"; + +export { StatusPill }; diff --git a/skyvern-frontend/src/index.css b/skyvern-frontend/src/index.css index 30adce7e0..9dc2281f4 100644 --- a/skyvern-frontend/src/index.css +++ b/skyvern-frontend/src/index.css @@ -30,6 +30,7 @@ --destructive: 0 84.2% 60.2%; --destructive-foreground: 210 40% 98%; + --error-bg-light: rgba(220, 38, 38, 0.1); --warning: 32.1 94.6% 43.7%; /* amber-600 */ --warning-foreground: 36 100% 97.1%; /* amber-50 */ @@ -65,6 +66,7 @@ --destructive: 0 72.2% 50.6%; /* red-600 */ --destructive-foreground: 0 85.7% 97.3%; /* red-50 */ + --error-bg-light: rgba(220, 38, 38, 0.1); --warning: 40.6 96.1% 40.4%; /* yellow-600 */ --warning-foreground: 54.5 91.7% 95.3%; /* yellow-50 */ diff --git a/skyvern-frontend/src/routes/tasks/detail/ActionTypePill.tsx b/skyvern-frontend/src/routes/tasks/detail/ActionTypePill.tsx index a317eab8b..4e80a03e6 100644 --- a/skyvern-frontend/src/routes/tasks/detail/ActionTypePill.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/ActionTypePill.tsx @@ -1,4 +1,5 @@ import { ActionType, ReadableActionTypes } from "@/api/types"; +import { StatusPill } from "@/components/ui/status-pill"; import { CursorArrowIcon, HandIcon, @@ -19,10 +20,9 @@ const icons: Partial> = { function ActionTypePill({ actionType }: Props) { return ( -
- {icons[actionType] ?? null} - {ReadableActionTypes[actionType]} -
+ + {ReadableActionTypes[actionType]} + ); } diff --git a/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx b/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx index ea80808b0..500488be2 100644 --- a/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx @@ -1,5 +1,6 @@ import { getClient } from "@/api/AxiosClient"; import { Action, ActionTypes } from "@/api/types"; +import { StatusPill } from "@/components/ui/status-pill"; import { Tooltip, TooltipContent, @@ -91,9 +92,11 @@ function ScrollableActionList({ -
- -
+ + } + />
Code Execution @@ -102,13 +105,15 @@ function ScrollableActionList({
)} {action.success ? ( -
- -
+ } + /> ) : ( -
- -
+ + } + /> )} diff --git a/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx b/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx index 0624e71f6..031fb8074 100644 --- a/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx +++ b/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx @@ -217,12 +217,7 @@ function WorkflowRun() { finallyBlockInTimeline; const workflowFailureReason = workflowRun?.failure_reason ? ( -
+
{failureReasonTitle}
{workflowRun.failure_reason}
{matchedTips} diff --git a/skyvern-frontend/src/routes/workflows/debugger/DebuggerRun.tsx b/skyvern-frontend/src/routes/workflows/debugger/DebuggerRun.tsx index 2551d59e7..e600d52ec 100644 --- a/skyvern-frontend/src/routes/workflows/debugger/DebuggerRun.tsx +++ b/skyvern-frontend/src/routes/workflows/debugger/DebuggerRun.tsx @@ -5,13 +5,7 @@ function DebuggerRun() { const { data: workflowRun } = useWorkflowRunQuery(); const workflowFailureReason = workflowRun?.failure_reason ? ( -
+
Run Failure Reason
{workflowRun.failure_reason}
diff --git a/skyvern-frontend/src/routes/workflows/workflowRun/ActionCard.tsx b/skyvern-frontend/src/routes/workflows/workflowRun/ActionCard.tsx index d67317056..32c6837d6 100644 --- a/skyvern-frontend/src/routes/workflows/workflowRun/ActionCard.tsx +++ b/skyvern-frontend/src/routes/workflows/workflowRun/ActionCard.tsx @@ -1,4 +1,5 @@ import { ActionsApiResponse, ActionTypes, Status } from "@/api/types"; +import { StatusPill } from "@/components/ui/status-pill"; import { Tooltip, TooltipContent, @@ -65,9 +66,11 @@ function ActionCard({ action, onClick, active, index }: Props) { -
- -
+ + } + />
Code Execution @@ -76,13 +79,13 @@ function ActionCard({ action, onClick, active, index }: Props) {
)} {success ? ( -
- -
+ } + /> ) : ( -
- -
+ } + /> )}
diff --git a/skyvern-frontend/src/routes/workflows/workflowRun/ThoughtCard.tsx b/skyvern-frontend/src/routes/workflows/workflowRun/ThoughtCard.tsx index 1291494b7..2460db963 100644 --- a/skyvern-frontend/src/routes/workflows/workflowRun/ThoughtCard.tsx +++ b/skyvern-frontend/src/routes/workflows/workflowRun/ThoughtCard.tsx @@ -1,3 +1,4 @@ +import { StatusPill } from "@/components/ui/status-pill"; import { QuestionMarkIcon } from "@radix-ui/react-icons"; import { ObserverThought } from "../types/workflowRunTypes"; import { cn } from "@/util/utils"; @@ -41,10 +42,9 @@ function ThoughtCard({ thought, onClick, active }: Props) { {(thought.answer || thought.thought) && Thought} {!thought.answer && !thought.thought && Thinking}
-
- - Decision -
+ }> + Decision + {(thought.answer || thought.thought) && (
diff --git a/skyvern/cli/core/result.py b/skyvern/cli/core/result.py index 94757b5bc..cd62059fb 100644 --- a/skyvern/cli/core/result.py +++ b/skyvern/cli/core/result.py @@ -7,6 +7,35 @@ from typing import Any from skyvern import analytics +# Module-level flag: when True, make_result() strips fields that waste AI context +# tokens (echoed inputs, sdk_equivalent, browser_context, timing, empty collections). +# Set once at MCP server startup; CLI paths leave it False. +_concise_responses: bool = False + +# Fields inside data{} that are debug/scripting aids, not decision-relevant for AI. +_DATA_STRIP_KEYS = frozenset( + { + "sdk_equivalent", + "ai_mode", + "selector", + "intent", + } +) + +# Keys whose None value is meaningful (e.g. JS eval returning null). +# These survive the concise filter even when None. +_DATA_KEEP_NONE_KEYS = frozenset( + { + "result", + "extracted", + } +) + + +def set_concise_responses(enabled: bool) -> None: + global _concise_responses # noqa: PLW0603 + _concise_responses = enabled + class ErrorCode: NO_ACTIVE_BROWSER = "NO_ACTIVE_BROWSER" @@ -78,6 +107,25 @@ def make_result( "session_id": browser_context.session_id if browser_context else None, }, ) + + if _concise_responses: + result: dict[str, Any] = {"ok": ok} + if error: + result["error"] = error + if warnings: + result["warnings"] = warnings + if data: + concise_data = { + k: v + for k, v in data.items() + if k not in _DATA_STRIP_KEYS and (v is not None or k in _DATA_KEEP_NONE_KEYS) + } + if concise_data: + result["data"] = concise_data + if artifacts: + result["artifacts"] = [a.to_dict() for a in artifacts] + return result + return { "ok": ok, "action": action, diff --git a/skyvern/cli/mcp_tools/__init__.py b/skyvern/cli/mcp_tools/__init__.py index 967bc81b4..8f1fefe05 100644 --- a/skyvern/cli/mcp_tools/__init__.py +++ b/skyvern/cli/mcp_tools/__init__.py @@ -230,7 +230,7 @@ Once you've confirmed each step works, compose them into a workflow with skyvern ## Writing Scripts (ONLY when user explicitly asks) Use the Skyvern Python SDK: `from skyvern import Skyvern` NEVER import from skyvern.cli.mcp_tools — those are internal server modules. -Every tool response includes an `sdk_equivalent` field for script conversion. +In verbose mode (`--verbose`), every tool response includes an `sdk_equivalent` field for script conversion. **Hybrid xpath+prompt pattern** — the recommended approach for production scripts: await page.click("xpath=//button[@id='submit']", prompt="the Submit button") diff --git a/skyvern/cli/run_commands.py b/skyvern/cli/run_commands.py index f9b31826d..8c9e2f4ef 100644 --- a/skyvern/cli/run_commands.py +++ b/skyvern/cli/run_commands.py @@ -18,6 +18,7 @@ from starlette.middleware import Middleware from skyvern.cli.console import console from skyvern.cli.core.client import close_skyvern from skyvern.cli.core.mcp_http_auth import MCPAPIKeyMiddleware, close_auth_db +from skyvern.cli.core.result import set_concise_responses from skyvern.cli.core.session_manager import close_current_session, set_stateless_http_mode from skyvern.cli.mcp_tools import mcp # Uses standalone fastmcp (v2.x) from skyvern.cli.utils import start_services @@ -283,6 +284,13 @@ def run_mcp( help="Use stateless HTTP semantics for HTTP transports (ignored for stdio).", ), ] = True, + verbose: Annotated[ + bool, + typer.Option( + "--verbose/--no-verbose", + help="Return full tool responses including sdk_equivalent, browser_context, and timing.", + ), + ] = False, ) -> None: """Run the MCP server with configurable transport for local or remote hosting.""" path = _normalize_mcp_path(path) @@ -292,6 +300,7 @@ def run_mcp( # atexit doesn't fire on normal return and finally doesn't fire on signals. atexit.register(_cleanup_mcp_resources_sync) set_stateless_http_mode(stateless_http_enabled) + set_concise_responses(not verbose) try: if transport == "stdio": mcp.run(transport="stdio") @@ -308,6 +317,7 @@ def run_mcp( ) finally: set_stateless_http_mode(False) + set_concise_responses(False) _cleanup_mcp_resources_blocking() diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 326a16e3a..92e2ab970 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -993,7 +993,14 @@ class BaseTaskBlock(Block): await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_parameter_value) return await self.build_block_result( success=success, - failure_reason=updated_task.failure_reason, + failure_reason=( + updated_task.failure_reason + if success + else ( + updated_task.failure_reason + or f"Task {updated_task.task_id} finished with status {updated_task.status}" + ) + ), output_parameter_value=output_parameter_value, status=block_status_mapping[updated_task.status], workflow_run_block_id=workflow_run_block_id, @@ -1010,7 +1017,7 @@ class BaseTaskBlock(Block): ) return await self.build_block_result( success=False, - failure_reason=updated_task.failure_reason, + failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} was canceled", output_parameter_value=None, status=block_status_mapping[updated_task.status], workflow_run_block_id=workflow_run_block_id, @@ -1027,7 +1034,7 @@ class BaseTaskBlock(Block): ) return await self.build_block_result( success=False, - failure_reason=updated_task.failure_reason, + failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} timed out", output_parameter_value=None, status=block_status_mapping[updated_task.status], workflow_run_block_id=workflow_run_block_id, @@ -1083,7 +1090,10 @@ class BaseTaskBlock(Block): ) return await self.build_block_result( success=False, - failure_reason=updated_task.failure_reason, + failure_reason=( + updated_task.failure_reason + or f"Task {updated_task.task_id} failed with status {updated_task.status}" + ), output_parameter_value=output_parameter_value, status=block_status_mapping[updated_task.status], workflow_run_block_id=workflow_run_block_id, @@ -1094,7 +1104,11 @@ class BaseTaskBlock(Block): return await self.build_block_result( success=False, status=BlockStatus.failed, - failure_reason=current_running_task.failure_reason if current_running_task else None, + failure_reason=( + (current_running_task.failure_reason or f"Task {current_running_task.task_id} failed") + if current_running_task + else "Task failed (no task reference available)" + ), workflow_run_block_id=workflow_run_block_id, organization_id=organization_id, ) @@ -1311,7 +1325,10 @@ class ForLoopBlock(Block): if not extraction_result.success: LOG.error("Extraction block failed", failure_reason=extraction_result.failure_reason) - raise ValueError(f"Extraction block failed: {extraction_result.failure_reason}") + raise ValueError( + f"Extraction block failed: " + f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}" + ) LOG.debug("Extraction block succeeded", output=extraction_result.output_parameter_value) @@ -5755,7 +5772,10 @@ class ConditionalBlock(Block): block_label=self.label, failure_reason=extraction_result.failure_reason, ) - raise ValueError(f"Branch evaluation failed: {extraction_result.failure_reason}") + raise ValueError( + f"Branch evaluation failed: " + f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}" + ) if workflow_run_context: try: diff --git a/tests/unit/test_mcp_concise_responses.py b/tests/unit/test_mcp_concise_responses.py new file mode 100644 index 000000000..4522cf07a --- /dev/null +++ b/tests/unit/test_mcp_concise_responses.py @@ -0,0 +1,185 @@ +"""Tests for the concise MCP response mode in make_result().""" + +from __future__ import annotations + +from collections.abc import Iterator + +import pytest + +from skyvern.cli.core.result import Artifact, BrowserContext, make_result, set_concise_responses + + +@pytest.fixture(autouse=True) +def _enable_concise() -> Iterator[None]: + """Enable concise mode for every test; restore after.""" + set_concise_responses(True) + yield + set_concise_responses(False) + + +# -- Helpers ------------------------------------------------------------------ + +_CTX = BrowserContext(mode="cdp", session_id="pbs_1", cdp_url="wss://example.com/devtools") + +_CLICK_DATA = { + "selector": "#btn", + "intent": "the Submit button", + "ai_mode": "proactive", + "resolved_selector": "xpath=/*[name()='html'][1]/*[name()='body'][1]/*[name()='button'][1]", + "sdk_equivalent": 'await page.click("xpath=...", prompt="the Submit button")', +} + + +# -- Stripped fields ---------------------------------------------------------- + + +def test_concise_strips_action_and_browser_context() -> None: + result = make_result("skyvern_click", browser_context=_CTX, data=_CLICK_DATA) + assert "action" not in result + assert "browser_context" not in result + + +def test_concise_strips_timing() -> None: + result = make_result("skyvern_click", data=_CLICK_DATA, timing_ms={"sdk": 500, "total": 500}) + assert "timing_ms" not in result + + +@pytest.mark.parametrize("key", ["sdk_equivalent", "ai_mode", "selector", "intent"]) +def test_concise_strips_debug_data_keys(key: str) -> None: + data = {key: "some_value", "url": "https://example.com"} + result = make_result("skyvern_navigate", data=data) + assert key not in result.get("data", {}) + + +def test_concise_strips_none_values_from_data() -> None: + data = {"url": "https://example.com", "title": None} + result = make_result("skyvern_navigate", data=data) + assert "title" not in result.get("data", {}) + + +def test_concise_omits_data_when_all_keys_stripped() -> None: + """When every key in data is strippable, the data key should be omitted entirely.""" + data = {"sdk_equivalent": "await page.click(...)", "ai_mode": "proactive", "selector": "#x", "intent": "foo"} + result = make_result("skyvern_click", data=data) + assert "data" not in result + + +# -- Minimal response -------------------------------------------------------- + + +def test_concise_minimal_response() -> None: + """No data, no error, no artifacts — should return just {"ok": True}.""" + result = make_result("skyvern_click") + assert result == {"ok": True} + + +# -- Omitted empty collections ----------------------------------------------- + + +def test_concise_omits_empty_artifacts() -> None: + result = make_result("skyvern_click", data=_CLICK_DATA, artifacts=[]) + assert "artifacts" not in result + + +def test_concise_omits_empty_warnings() -> None: + result = make_result("skyvern_click", data=_CLICK_DATA, warnings=[]) + assert "warnings" not in result + + +def test_concise_omits_null_error() -> None: + result = make_result("skyvern_click", data=_CLICK_DATA, error=None) + assert "error" not in result + + +# -- Preserved fields --------------------------------------------------------- + + +def test_concise_click_preserves_resolved_selector() -> None: + """resolved_selector is actionable feedback — shows what the AI resolver matched.""" + result = make_result("skyvern_click", data=_CLICK_DATA) + assert result["data"]["resolved_selector"] == _CLICK_DATA["resolved_selector"] + + +def test_concise_click_strips_other_echoed_fields() -> None: + result = make_result("skyvern_click", data=_CLICK_DATA) + data = result.get("data", {}) + assert "selector" not in data + assert "intent" not in data + assert "ai_mode" not in data + assert "sdk_equivalent" not in data + + +def test_concise_preserves_meaningful_data() -> None: + data = {"extracted": {"price": 42.0}, "sdk_equivalent": "await page.extract(...)"} + result = make_result("skyvern_extract", data=data) + assert result["data"] == {"extracted": {"price": 42.0}} + + +def test_concise_preserves_error() -> None: + err = {"code": "SELECTOR_NOT_FOUND", "message": "Not found", "hint": "Try another selector"} + result = make_result("skyvern_click", ok=False, error=err) + assert result["ok"] is False + assert result["error"] == err + + +def test_concise_preserves_nonempty_warnings() -> None: + result = make_result("skyvern_click", ok=False, warnings=["Element hidden"]) + assert result["warnings"] == ["Element hidden"] + + +def test_concise_preserves_nonempty_artifacts() -> None: + artifact = Artifact(kind="screenshot", path="/tmp/shot.png", mime="image/png", bytes=1024) + result = make_result("skyvern_screenshot", artifacts=[artifact]) + assert len(result["artifacts"]) == 1 + assert result["artifacts"][0]["path"] == "/tmp/shot.png" + + +# -- Partial failure with data ------------------------------------------------ + + +def test_concise_preserves_data_on_failure() -> None: + err = {"code": "TIMEOUT", "message": "Timed out", "hint": "Increase timeout"} + data = {"partial_result": {"items": 3}, "sdk_equivalent": "await page.extract(...)"} + result = make_result("skyvern_extract", ok=False, error=err, data=data) + assert result["ok"] is False + assert result["error"] == err + assert result["data"] == {"partial_result": {"items": 3}} + + +# -- None-preserving keys (result, extracted) --------------------------------- + + +def test_concise_preserves_none_result_for_evaluate() -> None: + """JS returning null is a meaningful answer — must not be stripped.""" + data = {"result": None, "sdk_equivalent": "await page.evaluate(...)"} + result = make_result("skyvern_evaluate", data=data) + assert "data" in result + assert result["data"]["result"] is None + + +def test_concise_preserves_none_extracted() -> None: + """Extraction returning None means 'found nothing' — must not be stripped.""" + data = {"extracted": None, "sdk_equivalent": "await page.extract(...)"} + result = make_result("skyvern_extract", data=data) + assert "data" in result + assert result["data"]["extracted"] is None + + +# -- Verbose mode (flag off) -------------------------------------------------- + + +def test_verbose_returns_all_fields() -> None: + set_concise_responses(False) + result = make_result( + "skyvern_click", + browser_context=_CTX, + data=_CLICK_DATA, + timing_ms={"sdk": 500, "total": 500}, + ) + assert result["action"] == "skyvern_click" + assert "browser_context" in result + assert result["timing_ms"] == {"sdk": 500, "total": 500} + assert result["data"]["sdk_equivalent"] is not None + assert result["data"]["resolved_selector"] is not None + assert result["artifacts"] == [] + assert result["warnings"] == [] diff --git a/tests/unit/workflow/test_conditional_branch_evaluation.py b/tests/unit/workflow/test_conditional_branch_evaluation.py index 592c30ec7..4a89582e1 100644 --- a/tests/unit/workflow/test_conditional_branch_evaluation.py +++ b/tests/unit/workflow/test_conditional_branch_evaluation.py @@ -397,3 +397,83 @@ async def test_empty_param_produces_explicit_marker_in_prompt_evaluation() -> No assert rendered_expressions == ["if (empty value) is not empty"] # The prompt should be loaded with the patched expression assert mock_prompt.call_args.kwargs["conditions"] == ["if (empty value) is not empty"] + + +# --------------------------------------------------------------------------- +# Tests for None failure_reason guard in _evaluate_prompt_branches (SKY-8026) +# --------------------------------------------------------------------------- + + +def _failed_extraction_result(output_parameter: OutputParameter, failure_reason: str | None = None) -> BlockResult: + return BlockResult( + success=False, + output_parameter=output_parameter, + output_parameter_value=None, + failure_reason=failure_reason, + ) + + +@pytest.mark.asyncio +async def test_extraction_failure_with_none_reason_produces_informative_error() -> None: + """When ExtractionBlock fails with failure_reason=None, the raised ValueError + should NOT contain the literal string 'None' (SKY-8026).""" + block = _conditional_block() + branch = BranchCondition( + criteria=PromptBranchCriteria(expression="user selected premium plan"), + next_block_label="premium", + ) + + evaluation_context = BranchEvaluationContext(workflow_run_context=None, template_renderer=lambda expr: expr) + evaluation_context.build_llm_safe_context_snapshot = MagicMock(return_value={}) # type: ignore[method-assign] + + with ( + patch("skyvern.forge.sdk.workflow.models.block.prompt_engine.load_prompt", return_value="goal"), + patch("skyvern.forge.sdk.workflow.models.block.ExtractionBlock") as mock_extraction_cls, + ): + mock_extraction = MagicMock() + mock_extraction.execute = AsyncMock( + return_value=_failed_extraction_result(block.output_parameter, failure_reason=None) + ) + mock_extraction_cls.return_value = mock_extraction + + with pytest.raises(ValueError, match="Unknown error"): + await block._evaluate_prompt_branches( + branches=[branch], + evaluation_context=evaluation_context, + workflow_run_id="wr_test", + workflow_run_block_id="wrb_test", + organization_id="org_test", + ) + + +@pytest.mark.asyncio +async def test_extraction_failure_with_reason_preserves_original_message() -> None: + """When ExtractionBlock fails with a real failure_reason, that reason should + appear verbatim in the raised ValueError.""" + block = _conditional_block() + branch = BranchCondition( + criteria=PromptBranchCriteria(expression="user selected premium plan"), + next_block_label="premium", + ) + + evaluation_context = BranchEvaluationContext(workflow_run_context=None, template_renderer=lambda expr: expr) + evaluation_context.build_llm_safe_context_snapshot = MagicMock(return_value={}) # type: ignore[method-assign] + + with ( + patch("skyvern.forge.sdk.workflow.models.block.prompt_engine.load_prompt", return_value="goal"), + patch("skyvern.forge.sdk.workflow.models.block.ExtractionBlock") as mock_extraction_cls, + ): + mock_extraction = MagicMock() + mock_extraction.execute = AsyncMock( + return_value=_failed_extraction_result(block.output_parameter, failure_reason="LLM rate limited") + ) + mock_extraction_cls.return_value = mock_extraction + + with pytest.raises(ValueError, match="LLM rate limited"): + await block._evaluate_prompt_branches( + branches=[branch], + evaluation_context=evaluation_context, + workflow_run_id="wr_test", + workflow_run_block_id="wrb_test", + organization_id="org_test", + )