feat: MCP script/caching tools for code v2 visibility (#5243)

This commit is contained in:
Marc Kelechava 2026-03-25 15:42:53 -07:00 committed by GitHub
parent 6d3aa06164
commit 31d37a5b01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1048 additions and 59 deletions

View file

@ -49,6 +49,13 @@ from .inspection import (
skyvern_network_requests,
)
from .prompts import build_workflow, debug_automation, extract_data, qa_test
from .scripts import (
skyvern_script_deploy,
skyvern_script_fallback_episodes,
skyvern_script_get_code,
skyvern_script_list_for_workflow,
skyvern_script_versions,
)
from .session import (
skyvern_browser_session_close,
skyvern_browser_session_connect,
@ -104,6 +111,8 @@ targeted test cases, open a browser against the dev server, and report pass/fail
| Debug browser issues | skyvern_browser_session_create skyvern_navigate | skyvern_console_messages / skyvern_network_requests |
| Build a reusable automation | skyvern_workflow_create (no session needed) | skyvern_workflow_run to test |
| Run an existing automation | skyvern_workflow_run (no session needed) | skyvern_workflow_status to check |
| View cached scripts | skyvern_script_list_for_workflow (no session needed) | skyvern_script_get_code to see code |
| Check why AI fallback happened | skyvern_script_fallback_episodes (no session needed) | skyvern_script_versions for history |
| One-off autonomous task | skyvern_run_task (no session needed) | Check result in response |
## Tool Selection
@ -127,6 +136,10 @@ targeted test cases, open a browser against the dev server, and report pass/fail
| "What credentials do I have?" | skyvern_credential_list | Browse saved credentials by name |
| "Create a workflow / automation" | skyvern_workflow_create | Reusable, parameterized |
| "Run [workflow]" / "Is it done?" | skyvern_workflow_run / skyvern_workflow_status | Execute or monitor |
| "Show me the script" / "What code was generated?" | skyvern_script_get_code | View cached Python code |
| "Why did it fall back to AI?" | skyvern_script_fallback_episodes | Inspect AI fallback details |
| "Run this with AI agent" / "Force agent mode" | skyvern_workflow_run(run_with="agent") | Override cached script |
| "Edit / update the script" | skyvern_script_deploy | Deploy new script version |
## Critical Rules
1. Use Skyvern for all browser tasks. curl/wget/requests are fine for APIs and file downloads.
@ -171,6 +184,8 @@ skyvern_wait, skyvern_drag support three modes. When unsure, use intent. For mul
- skyvern_drag requires a session AND a navigated page with draggable elements
- skyvern_console_messages / skyvern_network_requests capture events from session start call anytime
- skyvern_run_task is one-off for reusable automations, use skyvern_workflow_create
- Script tools (list, get_code, versions, fallback_episodes, deploy) do NOT need a browser session
- Use skyvern_script_list_for_workflow as the entry point to discover script IDs for a workflow
## Engine Selection
@ -190,6 +205,41 @@ workflow block definitions — skyvern_run_task always uses engine 2.0 internall
Other engines (`openai-cua`, `anthropic-cua`, `ui-tars`) are available for advanced use cases but are not recommended as defaults.
## Caching & Script Execution
Skyvern workflows support two execution modes controlled by `run_with`:
| `run_with` value | Behavior |
|------------------|----------|
| `"code"` (default for MCP-created workflows) | Runs a cached Python script generated from a previous successful AI run. \
10-100x faster, no LLM calls. Falls back to AI if the script fails. |
| `"agent"` | Always runs with the AI agent (LLM-driven navigation). Use for first-run exploration or when the site changed. |
| `null` / omitted | Inherits from the workflow definition. MCP defaults to `"code"`. |
### How Caching Works
1. **First run** The AI agent navigates the site, recording every action.
2. **Script generation** After a successful run, a deterministic Python script is generated from the recorded actions.
3. **Subsequent runs** The script replays actions directly (no LLM calls). If a selector fails, AI takes over for that step.
4. **Script evolution** Each AI fallback improves the script. Over time, fallbacks decrease.
MCP-created workflows automatically set `code_version=2` and `run_with="code"` unless you explicitly override them.
### When to Override
- Set `run_with="agent"` in skyvern_workflow_run when: testing a new workflow for the first time, debugging a cached \
script, or when the target site redesigned its UI.
- Set `run_with="code"` (or omit it's the default) when: the workflow has run successfully before and you want \
maximum speed.
### Script Tools
- **skyvern_script_list_for_workflow** Entry point: find scripts for a workflow (wpid script IDs)
- **skyvern_script_get_code** View the generated Python code for a script version
- **skyvern_script_versions** List version history showing how the script evolved
- **skyvern_script_fallback_episodes** See when and why the AI agent took over from the cached script
- **skyvern_script_deploy** Deploy an updated script version
## Getting Started
**Exploring a website**: skyvern_browser_session_create skyvern_navigate skyvern_screenshot \
@ -234,6 +284,9 @@ BAD (1 giant block trying to do everything):
Use `{{parameter_key}}` to reference workflow input parameters in any block field.
Blocks in the same workflow run share the same browser session automatically.
To inspect a real workflow for reference, use skyvern_workflow_get.
Workflows created via MCP default to code execution mode (code_version=2, run_with="code"). \
The first run uses the AI agent to learn the navigation; subsequent runs replay a cached script. \
To force AI agent mode on a specific run, pass run_with="agent" to skyvern_workflow_run.
### Block Types Reference
- **navigation** fill forms, click buttons, navigate multi-step flows (most common)
@ -333,6 +386,13 @@ mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_run)
mcp.tool(tags={"workflow"}, annotations=_RO)(skyvern_workflow_status)
mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_cancel)
# -- Script/caching tools (no browser needed) --
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_list_for_workflow)
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_get_code)
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_versions)
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_fallback_episodes)
mcp.tool(tags={"script"}, annotations=_MUT)(skyvern_script_deploy)
# -- Prompts (methodology guides injected into LLM conversations) --
mcp.prompt()(build_workflow)
mcp.prompt()(debug_automation)
@ -393,6 +453,12 @@ __all__ = [
"skyvern_workflow_run",
"skyvern_workflow_status",
"skyvern_workflow_cancel",
# Script/caching
"skyvern_script_list_for_workflow",
"skyvern_script_get_code",
"skyvern_script_versions",
"skyvern_script_fallback_episodes",
"skyvern_script_deploy",
# Prompts
"build_workflow",
"debug_automation",

View file

@ -5,8 +5,45 @@ MCP tools import from here; the canonical implementations live in core/.
from __future__ import annotations
from typing import Any
from skyvern.cli.core.artifacts import get_artifact_dir, save_artifact
from skyvern.cli.core.result import Artifact, BrowserContext, ErrorCode, Timer, make_error, make_result
from skyvern.client.errors import NotFoundError
async def raw_http_get(path: str, params: dict[str, Any] | None = None) -> Any:
"""GET request to Skyvern API for endpoints without SDK methods.
Raises NotFoundError on 404, RuntimeError on other HTTP errors.
"""
from ._session import get_skyvern
skyvern = get_skyvern()
# Temporary workaround: these MCP routes do not have public Fern SDK methods yet,
# so we reach through the generated client's private wrapper. Revisit if the SDK
# is regenerated or adds first-class methods for these endpoints.
response = await skyvern._client_wrapper.httpx_client.request(
path,
method="GET",
params=params or {},
)
if response.status_code == 404:
raise NotFoundError(body={"detail": f"Not found: {path}"})
if response.status_code >= 400:
detail = ""
try:
detail = response.json().get("detail", response.text)
except Exception:
detail = response.text
raise RuntimeError(f"HTTP {response.status_code}: {detail}")
if response.status_code == 204:
return {}
try:
return response.json()
except Exception:
return {"raw": response.text}
__all__ = [
"Artifact",
@ -16,5 +53,6 @@ __all__ = [
"get_artifact_dir",
"make_error",
"make_result",
"raw_http_get",
"save_artifact",
]

View file

@ -27,3 +27,75 @@ def validate_folder_id(folder_id: str, action: str) -> dict[str, Any] | None:
),
)
return None
def validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
if "/" in workflow_id or "\\" in workflow_id:
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"workflow_id must not contain path separators",
"Provide a valid workflow permanent ID (starts with wpid_)",
),
)
if not workflow_id.startswith("wpid_"):
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid workflow_id format: {workflow_id!r}",
"Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
),
)
return None
def validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
if "/" in run_id or "\\" in run_id:
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"run_id must not contain path separators",
"Provide a valid run ID (starts with wr_ or tsk_v2_)",
),
)
if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid run_id format: {run_id!r}",
"Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
),
)
return None
def validate_script_id(script_id: str, action: str) -> dict[str, Any] | None:
if "/" in script_id or "\\" in script_id:
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"script_id must not contain path separators",
"Provide a valid script ID (starts with s_)",
),
)
if not script_id.startswith("s_"):
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid script_id format: {script_id!r}",
"Script IDs start with s_. Use skyvern_script_list_for_workflow to find script IDs.",
),
)
return None

View file

@ -399,6 +399,34 @@ After extraction, check the returned data before using it:
or what the data looks like), not the schema.
- Use `skyvern_validate` for page-level assertions before extracting \
("Is this the search results page?" / "Are there at least 10 results visible?").
## Caching Considerations
Workflows created via MCP default to Code 2.0 (code_version=2, run_with="code").
### What this means for workflow design
- **First run**: The AI agent runs all blocks, recording actions. A cached script is generated afterward.
- **Subsequent runs**: The script replays deterministically 10-100x faster, no LLM costs.
- **AI fallback**: If the script encounters an element it cannot find, it falls back to the AI agent \
for that step. The fallback episode is recorded and used to improve the script.
### Design for cacheability
1. Use stable selectors: navigation goals that reference exact field labels cache better than vague \
descriptions. "Fill in the 'Company Name' field" caches better than "fill in the first text box."
2. Avoid dynamic page content in goals: if a page shows different content each time, the cached script \
may need frequent AI fallbacks. Consider splitting dynamic sections into separate blocks.
3. Parameterize all variable data: cached scripts substitute parameters at runtime. Hardcoded values \
in navigation_goal become part of the script literally.
### Overriding execution mode at run time
Pass `run_with="agent"` to `skyvern_workflow_run` to force AI execution for a specific run without \
changing the workflow definition. This is useful for:
- First runs when no script exists yet (the system handles this automatically)
- Debugging: comparing AI behavior vs script behavior
- Sites that changed layout since the last successful script run
"""

View file

@ -0,0 +1,293 @@
"""Skyvern MCP script tools — visibility into cached scripts and fallback episodes.
Tools for listing scripts, viewing generated code, checking version history,
inspecting AI fallback episodes, and deploying updated script versions.
These tools do not require a browser session.
"""
from __future__ import annotations
import json
from typing import Annotated, Any
import structlog
from pydantic import Field, ValidationError
from skyvern.client.errors import NotFoundError
from skyvern.client.types import ScriptFileCreate
from ._common import ErrorCode, Timer, make_error, make_result, raw_http_get
from ._session import get_skyvern
from ._validation import validate_run_id, validate_script_id, validate_workflow_id
LOG = structlog.get_logger()
# ---------------------------------------------------------------------------
# Script tools
# ---------------------------------------------------------------------------
async def skyvern_script_list_for_workflow(
workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
) -> dict[str, Any]:
"""List all cached scripts for a workflow. Use this as the entry point to discover
script IDs for a given workflow. Returns script metadata including version count,
success rate, and cache key information."""
if err := validate_workflow_id(workflow_id, "skyvern_script_list_for_workflow"):
return err
with Timer() as timer:
try:
data = await raw_http_get(f"v1/scripts/workflows/{workflow_id}")
timer.mark("api")
except NotFoundError:
return make_result(
"skyvern_script_list_for_workflow",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.WORKFLOW_NOT_FOUND,
f"Workflow {workflow_id!r} not found",
"Verify the workflow ID with skyvern_workflow_list",
),
)
except Exception as e:
LOG.error("script_list_for_workflow_failed", workflow_id=workflow_id, error=str(e))
return make_result(
"skyvern_script_list_for_workflow",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
)
raw_scripts = data.get("scripts", []) if isinstance(data, dict) else data
scripts: Any = []
if isinstance(raw_scripts, list):
for script in raw_scripts:
if not isinstance(script, dict):
scripts.append(script)
continue
script_data = dict(script)
if "version" not in script_data and "latest_version" in script_data:
script_data["version"] = script_data["latest_version"]
scripts.append(script_data)
else:
scripts = raw_scripts
count = len(scripts) if isinstance(scripts, list) else 0
return make_result(
"skyvern_script_list_for_workflow",
data={"workflow_id": workflow_id, "scripts": scripts, "count": count},
timing_ms=timer.timing_ms,
)
async def skyvern_script_get_code(
script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
version: Annotated[int | None, Field(description="Version number. Omit to get the latest version.")] = None,
) -> dict[str, Any]:
"""Get the generated Python code for a cached script. Returns the main orchestrator
script and per-block code. Use skyvern_script_list_for_workflow to find script IDs first."""
if err := validate_script_id(script_id, "skyvern_script_get_code"):
return err
with Timer() as timer:
try:
if version is None:
script_meta = await raw_http_get(f"v1/scripts/{script_id}")
timer.mark("resolve_version")
version = script_meta.get("version", 1) if isinstance(script_meta, dict) else 1
data = await raw_http_get(f"v1/scripts/{script_id}/versions/{version}")
timer.mark("api")
except NotFoundError:
return make_result(
"skyvern_script_get_code",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Script {script_id!r} version {version} not found",
"Use skyvern_script_versions to see available versions",
),
)
except Exception as e:
LOG.error("script_get_code_failed", script_id=script_id, version=version, error=str(e))
return make_result(
"skyvern_script_get_code",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
)
result: dict[str, Any] = {
"script_id": script_id,
"version": version,
}
if isinstance(data, dict):
result["blocks"] = data.get("blocks", {})
result["main_script"] = data.get("main_script")
return make_result("skyvern_script_get_code", data=result, timing_ms=timer.timing_ms)
async def skyvern_script_versions(
script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
) -> dict[str, Any]:
"""List all versions of a cached script. Shows version history including
creation timestamps and which run triggered each version."""
if err := validate_script_id(script_id, "skyvern_script_versions"):
return err
with Timer() as timer:
try:
data = await raw_http_get(f"v1/scripts/{script_id}/versions")
timer.mark("api")
except NotFoundError:
return make_result(
"skyvern_script_versions",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Script {script_id!r} not found",
"Use skyvern_script_list_for_workflow to find valid script IDs",
),
)
except Exception as e:
LOG.error("script_versions_failed", script_id=script_id, error=str(e))
return make_result(
"skyvern_script_versions",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
)
versions = data.get("versions", []) if isinstance(data, dict) else data
return make_result(
"skyvern_script_versions",
data={"script_id": script_id, "versions": versions, "count": len(versions)},
timing_ms=timer.timing_ms,
)
async def skyvern_script_fallback_episodes(
workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
workflow_run_id: Annotated[str | None, Field(description="Filter to a specific run (starts with wr_)")] = None,
block_label: Annotated[str | None, Field(description="Filter to a specific block label")] = None,
page: Annotated[int, Field(description="Page number (1-based)", ge=1)] = 1,
page_size: Annotated[int, Field(description="Results per page", ge=1, le=100)] = 20,
) -> dict[str, Any]:
"""List AI fallback episodes for a workflow's cached scripts. Each episode records
when a cached script's selector failed and the AI agent took over. Shows error details,
block label, and whether the fallback succeeded. Useful for understanding why a script
fell back to AI and how the script evolved."""
if err := validate_workflow_id(workflow_id, "skyvern_script_fallback_episodes"):
return err
if workflow_run_id is not None:
if err := validate_run_id(workflow_run_id, "skyvern_script_fallback_episodes"):
return err
params: dict[str, Any] = {"page": page, "page_size": page_size}
if workflow_run_id is not None:
params["workflow_run_id"] = workflow_run_id
if block_label is not None:
params["block_label"] = block_label
with Timer() as timer:
try:
data = await raw_http_get(f"v1/workflows/{workflow_id}/fallback-episodes", params=params)
timer.mark("api")
except NotFoundError:
return make_result(
"skyvern_script_fallback_episodes",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.WORKFLOW_NOT_FOUND,
f"Workflow {workflow_id!r} not found",
"Verify the workflow ID with skyvern_workflow_list",
),
)
except Exception as e:
LOG.error("script_fallback_episodes_failed", workflow_id=workflow_id, error=str(e))
return make_result(
"skyvern_script_fallback_episodes",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
)
result: dict[str, Any] = {"workflow_id": workflow_id}
if isinstance(data, dict):
result["episodes"] = data.get("episodes", [])
result["total_count"] = data.get("total_count", 0)
result["page"] = data.get("page", page)
result["page_size"] = data.get("page_size", page_size)
else:
result["episodes"] = data
result["total_count"] = len(data) if isinstance(data, list) else 0
return make_result("skyvern_script_fallback_episodes", data=result, timing_ms=timer.timing_ms)
async def skyvern_script_deploy(
script_id: Annotated[str, Field(description="Script ID to deploy a new version for (starts with s_)")],
files: Annotated[
str,
Field(
description='JSON array of file objects: [{"path": "main.py", "content": "<base64-encoded>", "encoding": "base64"}]'
),
],
) -> dict[str, Any]:
"""Deploy a new version of a cached script with updated files. Creates a new version
that will be used on the next workflow run. File content must be base64-encoded."""
if err := validate_script_id(script_id, "skyvern_script_deploy"):
return err
try:
parsed_files = json.loads(files)
if not isinstance(parsed_files, list):
raise ValueError("files must be a JSON array")
typed_files = [ScriptFileCreate(**file_data) for file_data in parsed_files]
except (json.JSONDecodeError, TypeError, ValueError, ValidationError) as e:
return make_result(
"skyvern_script_deploy",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid files JSON: {e}",
'Provide a JSON array: [{"path": "main.py", "content": "<base64>", "encoding": "base64"}]',
),
)
skyvern = get_skyvern()
with Timer() as timer:
try:
result = await skyvern.deploy_script(script_id, files=typed_files)
timer.mark("sdk")
except NotFoundError:
return make_result(
"skyvern_script_deploy",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Script {script_id!r} not found",
"Use skyvern_script_list_for_workflow to find valid script IDs",
),
)
except Exception as e:
LOG.error("script_deploy_failed", script_id=script_id, error=str(e))
return make_result(
"skyvern_script_deploy",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
)
data: dict[str, Any] = {"script_id": script_id}
if hasattr(result, "model_dump"):
data.update(result.model_dump(mode="json"))
elif isinstance(result, dict):
data.update(result)
return make_result("skyvern_script_deploy", data=data, timing_ms=timer.timing_ms)

View file

@ -24,7 +24,7 @@ from skyvern.schemas.workflows import WorkflowCreateYAMLRequest as WorkflowCreat
from ._common import ErrorCode, Timer, make_error, make_result
from ._session import get_skyvern
from ._validation import validate_folder_id
from ._validation import validate_folder_id, validate_run_id, validate_workflow_id
LOG = structlog.get_logger()
_SUMMARY_TOP_LEVEL_KEY_LIMIT = 8
@ -45,7 +45,7 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:
Uses Any to avoid tight coupling with Fern-generated client types.
"""
return {
data: dict[str, Any] = {
"workflow_permanent_id": wf.workflow_permanent_id,
"workflow_id": wf.workflow_id,
"title": wf.title,
@ -57,6 +57,11 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:
"created_at": wf.created_at.isoformat() if wf.created_at else None,
"modified_at": wf.modified_at.isoformat() if wf.modified_at else None,
}
for caching_field in ("run_with", "code_version", "adaptive_caching"):
val = getattr(wf, caching_field, None)
if val is not None:
data[caching_field] = val
return data
def _serialize_workflow_full(wf: Any) -> dict[str, Any]:
@ -87,6 +92,7 @@ def _serialize_run(run: Any) -> dict[str, Any]:
"app_url",
"browser_session_id",
"run_with",
"ai_fallback",
):
val = getattr(run, field, None)
if val is not None:
@ -103,6 +109,10 @@ def _serialize_run(run: Any) -> dict[str, Any]:
if val is not None:
data[ts_field] = val.isoformat()
script_run = getattr(run, "script_run", None)
if script_run is not None:
data["script_run"] = script_run.model_dump(mode="json") if hasattr(script_run, "model_dump") else script_run
return data
@ -288,6 +298,12 @@ def _serialize_run_summary(run: Any) -> dict[str, Any]:
if run_with:
summary["run_with"] = run_with
script_run = _get_value(run, "script_run")
if script_run is not None:
sr = _jsonable(script_run)
if isinstance(sr, dict) and sr.get("ai_fallback_triggered") is not None:
summary["ai_fallback_triggered"] = sr["ai_fallback_triggered"]
workflow_title = _get_value(run, "workflow_title")
if workflow_title:
summary["workflow_title"] = workflow_title
@ -326,6 +342,8 @@ def _serialize_run_full(run: Any) -> dict[str, Any]:
"browser_profile_id",
"run_with",
"total_steps",
"script_run",
"ai_fallback",
):
value = _get_value(run, field)
if value is not None:
@ -368,56 +386,6 @@ async def _get_workflow_run_status(
return response.json()
def _validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
"""Validate workflow_id format. Returns a make_result error dict or None if valid."""
if "/" in workflow_id or "\\" in workflow_id:
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"workflow_id must not contain path separators",
"Provide a valid workflow permanent ID (starts with wpid_)",
),
)
if not workflow_id.startswith("wpid_"):
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid workflow_id format: {workflow_id!r}",
"Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
),
)
return None
def _validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
"""Validate run_id format. Returns a make_result error dict or None if valid."""
if "/" in run_id or "\\" in run_id:
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"run_id must not contain path separators",
"Provide a valid run ID (starts with wr_ or tsk_v2_)",
),
)
if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
return make_result(
action,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid run_id format: {run_id!r}",
"Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
),
)
return None
async def _get_workflow_by_id(workflow_id: str, version: int | None = None) -> dict[str, Any]:
"""Fetch a single workflow by ID via the Skyvern API.
@ -780,7 +748,7 @@ async def skyvern_workflow_get(
) -> dict[str, Any]:
"""Get the full definition of a specific workflow. Use when you need to inspect a workflow's
blocks, parameters, and configuration before running or updating it."""
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_get"):
if err := validate_workflow_id(workflow_id, "skyvern_workflow_get"):
return err
with Timer() as timer:
@ -937,7 +905,7 @@ async def skyvern_workflow_update(
) -> dict[str, Any]:
"""Update an existing workflow's definition. Use when you need to modify a workflow's blocks,
parameters, or configuration. Creates a new version of the workflow."""
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update"):
if err := validate_workflow_id(workflow_id, "skyvern_workflow_update"):
return err
if format not in ("json", "yaml", "auto"):
@ -1017,7 +985,7 @@ async def skyvern_workflow_delete(
) -> dict[str, Any]:
"""Delete a workflow permanently. Use when you need to remove a workflow that is no longer needed.
Requires force=true to prevent accidental deletion."""
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
if err := validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
return err
if not force:
@ -1077,7 +1045,7 @@ async def skyvern_workflow_update_folder(
] = None,
) -> dict[str, Any]:
"""Assign a workflow to a folder, or remove it from its current folder."""
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
if err := validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
return err
if folder_id is not None and (err := validate_folder_id(folder_id, "skyvern_workflow_update_folder")):
return err
@ -1152,7 +1120,7 @@ async def skyvern_workflow_run(
Returns immediately by default (async) set wait=true to block until completion.
Default timeout is 300s (5 minutes). For longer workflows, increase timeout_seconds
or use wait=false and poll with skyvern_workflow_status."""
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_run"):
if err := validate_workflow_id(workflow_id, "skyvern_workflow_run"):
return err
parsed_params: dict[str, Any] | None = None
@ -1248,7 +1216,7 @@ async def skyvern_workflow_status(
) -> dict[str, Any]:
"""Check the status and progress of a workflow or task run. Use when you need to monitor
a running workflow, check if it completed, or retrieve its output."""
if err := _validate_run_id(run_id, "skyvern_workflow_status"):
if err := validate_run_id(run_id, "skyvern_workflow_status"):
return err
if verbosity not in {"summary", "full"}:
return make_result(
@ -1307,7 +1275,7 @@ async def skyvern_workflow_cancel(
) -> dict[str, Any]:
"""Cancel a running workflow or task. Use when you need to stop a workflow that is taking
too long, is stuck, or is no longer needed."""
if err := _validate_run_id(run_id, "skyvern_workflow_cancel"):
if err := validate_run_id(run_id, "skyvern_workflow_cancel"):
return err
skyvern = get_skyvern()

View file

@ -0,0 +1,42 @@
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
import skyvern.cli.mcp_tools._common as common_tools
@pytest.mark.asyncio
async def test_raw_http_get_returns_empty_dict_for_204(monkeypatch: pytest.MonkeyPatch) -> None:
response = SimpleNamespace(
status_code=204,
text="",
json=Mock(side_effect=AssertionError("json() should not be called for 204 responses")),
)
fake_client = SimpleNamespace(
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
)
monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
result = await common_tools.raw_http_get("v1/test")
assert result == {}
@pytest.mark.asyncio
async def test_raw_http_get_returns_raw_text_for_non_json_success(monkeypatch: pytest.MonkeyPatch) -> None:
response = SimpleNamespace(
status_code=200,
text="<html>ok</html>",
json=Mock(side_effect=ValueError("not json")),
)
fake_client = SimpleNamespace(
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
)
monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
result = await common_tools.raw_http_get("v1/test")
assert result == {"raw": "<html>ok</html>"}

View file

@ -0,0 +1,482 @@
"""Live MCP server tests for script/caching tools.
Tests call tools through the actual FastMCP Client, exactly as Claude Code would.
API responses are mocked at the HTTP layer so we test the full MCP pipeline:
Client FastMCP tool function raw_http_get/SDK (mocked) API
"""
from __future__ import annotations
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock
import pytest
from fastmcp import Client
import skyvern.cli.mcp_tools.scripts as script_tools
import skyvern.cli.mcp_tools.workflow as workflow_tools
from skyvern.cli.mcp_tools import mcp
from skyvern.client.types import ScriptFileCreate
# ---------------------------------------------------------------------------
# Fake API payloads
# ---------------------------------------------------------------------------
FAKE_SCRIPTS = {
"scripts": [
{
"script_id": "s_abc",
"cache_key": "hash",
"cache_key_value": "default",
"status": "published",
"latest_version": 2,
"version_count": 2,
"total_runs": 5,
"success_rate": 0.8,
"is_pinned": False,
}
]
}
FAKE_CODE = {
"blocks": {
"fill_form": "async def fill_form(page, ctx):\n await page.fill('xpath=//input', ctx.parameters['name'])\n",
},
"main_script": "import skyvern\n\n@skyvern.workflow(title='Test')\nasync def run(params):\n pass\n",
"script_id": "s_abc",
"version": 2,
}
FAKE_VERSIONS = {
"versions": [
{"version": 1, "script_revision_id": "srev_1", "created_at": "2026-03-20T10:00:00Z", "run_id": "wr_001"},
{"version": 2, "script_revision_id": "srev_2", "created_at": "2026-03-22T14:00:00Z", "run_id": "wr_002"},
]
}
FAKE_EPISODES = {
"episodes": [
{
"episode_id": "ep_1",
"block_label": "fill_form",
"fallback_type": "selector_miss",
"error_message": "Element not found: site redesigned",
"classify_result": None,
"fallback_succeeded": True,
"workflow_run_id": "wr_002",
"page_url": "https://example.com/form",
"reviewed": True,
"created_at": "2026-03-22T14:01:00Z",
}
],
"total_count": 1,
"page": 1,
"page_size": 20,
}
def _mock_raw_http(responses: dict):
"""Return a mock raw_http_get that routes by path substring."""
async def mock_get(path, params=None):
for key, val in responses.items():
if key in path:
return val
raise RuntimeError(f"Unmocked path: {path}")
return mock_get
# ---------------------------------------------------------------------------
# Scenario 1: "Show me the scripts for this workflow"
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_list_scripts_via_mcp(monkeypatch):
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"scripts/workflows/": FAKE_SCRIPTS,
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_list_for_workflow",
{
"workflow_id": "wpid_test",
},
)
assert result.data["ok"] is True
scripts = result.data["data"]["scripts"]
assert len(scripts) == 1
assert scripts[0]["script_id"] == "s_abc"
assert scripts[0]["success_rate"] == 0.8
assert scripts[0]["version"] == 2
@pytest.mark.parametrize(
("payload", "expected_scripts"),
[
({"scripts": None}, None),
({"scripts": {"unexpected": "shape"}}, {"unexpected": "shape"}),
],
)
@pytest.mark.asyncio
async def test_list_scripts_handles_missing_script_list_via_mcp(monkeypatch, payload, expected_scripts):
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"scripts/workflows/": payload,
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_list_for_workflow",
{
"workflow_id": "wpid_test",
},
)
assert result.data["ok"] is True
assert result.data["data"]["scripts"] == expected_scripts
assert result.data["data"]["count"] == 0
# ---------------------------------------------------------------------------
# Scenario 2: "Print the script that was made"
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_get_script_code_via_mcp(monkeypatch):
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"scripts/s_abc/versions/2": FAKE_CODE,
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_get_code",
{
"script_id": "s_abc",
"version": 2,
},
)
assert result.data["ok"] is True
data = result.data["data"]
assert "fill_form" in data["blocks"]
assert "page.fill" in data["blocks"]["fill_form"]
assert "@skyvern.workflow" in data["main_script"]
@pytest.mark.asyncio
async def test_get_script_code_resolves_latest_via_mcp(monkeypatch):
"""When version is omitted, tool fetches metadata first to find latest."""
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"v1/scripts/s_abc/versions/2": FAKE_CODE,
"v1/scripts/s_abc": {"script_id": "s_abc", "version": 2},
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_get_code",
{
"script_id": "s_abc",
},
)
assert result.data["ok"] is True
assert result.data["data"]["version"] == 2
# ---------------------------------------------------------------------------
# Scenario 3: "How did the script evolve?"
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_script_versions_via_mcp(monkeypatch):
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"versions": FAKE_VERSIONS,
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_versions",
{
"script_id": "s_abc",
},
)
assert result.data["ok"] is True
versions = result.data["data"]["versions"]
assert len(versions) == 2
assert versions[0]["version"] == 1
assert versions[1]["version"] == 2
# ---------------------------------------------------------------------------
# Scenario 4: "Why did it fall back to AI?"
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_fallback_episodes_via_mcp(monkeypatch):
monkeypatch.setattr(
script_tools,
"raw_http_get",
_mock_raw_http(
{
"fallback-episodes": FAKE_EPISODES,
}
),
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_fallback_episodes",
{
"workflow_id": "wpid_test",
},
)
assert result.data["ok"] is True
data = result.data["data"]
assert data["total_count"] == 1
ep = data["episodes"][0]
assert ep["fallback_type"] == "selector_miss"
assert "site redesigned" in ep["error_message"]
assert ep["fallback_succeeded"] is True
@pytest.mark.asyncio
async def test_fallback_episodes_rejects_invalid_workflow_run_id_via_mcp(monkeypatch):
raw_http_get = AsyncMock(return_value=FAKE_EPISODES)
monkeypatch.setattr(script_tools, "raw_http_get", raw_http_get)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_fallback_episodes",
{
"workflow_id": "wpid_test",
"workflow_run_id": "bad_run_id",
},
)
assert result.data["ok"] is False
assert result.data["error"]["code"] == script_tools.ErrorCode.INVALID_INPUT
raw_http_get.assert_not_awaited()
# ---------------------------------------------------------------------------
# Scenario 5: "Edit the script"
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_deploy_script_via_mcp(monkeypatch):
deploy_resp = SimpleNamespace(
script_id="s_abc",
version=3,
script_revision_id="srev_3",
model_dump=lambda mode="python": {"script_id": "s_abc", "version": 3, "script_revision_id": "srev_3"},
)
fake_client = SimpleNamespace(deploy_script=AsyncMock(return_value=deploy_resp))
monkeypatch.setattr(script_tools, "get_skyvern", lambda: fake_client)
import base64
files = json.dumps([{"path": "main.py", "content": base64.b64encode(b"# edited").decode(), "encoding": "base64"}])
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_deploy",
{
"script_id": "s_abc",
"files": files,
},
)
assert result.data["ok"] is True
assert result.data["data"]["version"] == 3
fake_client.deploy_script.assert_awaited_once()
called_files = fake_client.deploy_script.await_args.kwargs["files"]
assert len(called_files) == 1
assert isinstance(called_files[0], ScriptFileCreate)
assert called_files[0].path == "main.py"
# ---------------------------------------------------------------------------
# Scenario 6: Workflow create shows caching defaults
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_workflow_create_surfaces_caching_fields_via_mcp(monkeypatch):
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
fake_wf = SimpleNamespace(
workflow_permanent_id="wpid_new",
workflow_id="wf_1",
title="Test",
version=1,
status="published",
description=None,
is_saved_task=False,
folder_id=None,
created_at=now,
modified_at=now,
code_version=2,
adaptive_caching=True,
run_with="code",
)
fake_client = SimpleNamespace(create_workflow=AsyncMock(return_value=fake_wf))
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
definition = json.dumps(
{
"title": "Test",
"workflow_definition": {
"parameters": [],
"blocks": [
{
"block_type": "navigation",
"label": "s1",
"url": "https://example.com",
"navigation_goal": "Click",
}
],
},
}
)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_workflow_create",
{
"definition": definition,
"format": "json",
},
)
assert result.data["ok"] is True
data = result.data["data"]
assert data["code_version"] == 2
assert data["run_with"] == "code"
assert data["adaptive_caching"] is True
# ---------------------------------------------------------------------------
# Scenario 7: Run status shows script_run + ai_fallback
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_workflow_status_shows_script_run_via_mcp(monkeypatch):
payload = {
"workflow_run_id": "wr_test",
"status": "completed",
"run_with": "code",
"workflow_title": "Test",
"script_run": {"ai_fallback_triggered": True, "script_id": "s_abc"},
"outputs": {"result": "ok"},
}
fake_resp = SimpleNamespace(status_code=200, json=lambda: payload, text="")
fake_client = SimpleNamespace(
_client_wrapper=SimpleNamespace(
httpx_client=SimpleNamespace(request=AsyncMock(return_value=fake_resp)),
),
)
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_workflow_status",
{
"run_id": "wr_test",
"verbosity": "full",
},
)
assert result.data["ok"] is True
data = result.data["data"]
assert data["run_with"] == "code"
assert data["script_run"]["ai_fallback_triggered"] is True
# ---------------------------------------------------------------------------
# Validation: bad inputs get clear errors
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_bad_workflow_id_returns_error_via_mcp():
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_list_for_workflow",
{
"workflow_id": "not_a_wpid",
},
)
assert result.data["ok"] is False
assert "wpid_" in str(result.data["error"])
@pytest.mark.asyncio
async def test_bad_script_id_returns_error_via_mcp():
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_get_code",
{
"script_id": "wrong_prefix",
},
)
assert result.data["ok"] is False
assert "s_" in str(result.data["error"])
@pytest.mark.asyncio
async def test_bad_deploy_json_returns_error_via_mcp():
async with Client(mcp) as client:
result = await client.call_tool(
"skyvern_script_deploy",
{
"script_id": "s_abc",
"files": "not json",
},
)
assert result.data["ok"] is False
assert "JSON" in result.data["error"]["message"]