mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
feat: MCP script/caching tools for code v2 visibility (#5243)
This commit is contained in:
parent
6d3aa06164
commit
31d37a5b01
8 changed files with 1048 additions and 59 deletions
|
|
@ -49,6 +49,13 @@ from .inspection import (
|
|||
skyvern_network_requests,
|
||||
)
|
||||
from .prompts import build_workflow, debug_automation, extract_data, qa_test
|
||||
from .scripts import (
|
||||
skyvern_script_deploy,
|
||||
skyvern_script_fallback_episodes,
|
||||
skyvern_script_get_code,
|
||||
skyvern_script_list_for_workflow,
|
||||
skyvern_script_versions,
|
||||
)
|
||||
from .session import (
|
||||
skyvern_browser_session_close,
|
||||
skyvern_browser_session_connect,
|
||||
|
|
@ -104,6 +111,8 @@ targeted test cases, open a browser against the dev server, and report pass/fail
|
|||
| Debug browser issues | skyvern_browser_session_create → skyvern_navigate | skyvern_console_messages / skyvern_network_requests |
|
||||
| Build a reusable automation | skyvern_workflow_create (no session needed) | skyvern_workflow_run to test |
|
||||
| Run an existing automation | skyvern_workflow_run (no session needed) | skyvern_workflow_status to check |
|
||||
| View cached scripts | skyvern_script_list_for_workflow (no session needed) | skyvern_script_get_code to see code |
|
||||
| Check why AI fallback happened | skyvern_script_fallback_episodes (no session needed) | skyvern_script_versions for history |
|
||||
| One-off autonomous task | skyvern_run_task (no session needed) | Check result in response |
|
||||
|
||||
## Tool Selection
|
||||
|
|
@ -127,6 +136,10 @@ targeted test cases, open a browser against the dev server, and report pass/fail
|
|||
| "What credentials do I have?" | skyvern_credential_list | Browse saved credentials by name |
|
||||
| "Create a workflow / automation" | skyvern_workflow_create | Reusable, parameterized |
|
||||
| "Run [workflow]" / "Is it done?" | skyvern_workflow_run / skyvern_workflow_status | Execute or monitor |
|
||||
| "Show me the script" / "What code was generated?" | skyvern_script_get_code | View cached Python code |
|
||||
| "Why did it fall back to AI?" | skyvern_script_fallback_episodes | Inspect AI fallback details |
|
||||
| "Run this with AI agent" / "Force agent mode" | skyvern_workflow_run(run_with="agent") | Override cached script |
|
||||
| "Edit / update the script" | skyvern_script_deploy | Deploy new script version |
|
||||
|
||||
## Critical Rules
|
||||
1. Use Skyvern for all browser tasks. curl/wget/requests are fine for APIs and file downloads.
|
||||
|
|
@ -171,6 +184,8 @@ skyvern_wait, skyvern_drag support three modes. When unsure, use intent. For mul
|
|||
- skyvern_drag requires a session AND a navigated page with draggable elements
|
||||
- skyvern_console_messages / skyvern_network_requests capture events from session start — call anytime
|
||||
- skyvern_run_task is one-off — for reusable automations, use skyvern_workflow_create
|
||||
- Script tools (list, get_code, versions, fallback_episodes, deploy) do NOT need a browser session
|
||||
- Use skyvern_script_list_for_workflow as the entry point to discover script IDs for a workflow
|
||||
|
||||
## Engine Selection
|
||||
|
||||
|
|
@ -190,6 +205,41 @@ workflow block definitions — skyvern_run_task always uses engine 2.0 internall
|
|||
|
||||
Other engines (`openai-cua`, `anthropic-cua`, `ui-tars`) are available for advanced use cases but are not recommended as defaults.
|
||||
|
||||
## Caching & Script Execution
|
||||
|
||||
Skyvern workflows support two execution modes controlled by `run_with`:
|
||||
|
||||
| `run_with` value | Behavior |
|
||||
|------------------|----------|
|
||||
| `"code"` (default for MCP-created workflows) | Runs a cached Python script generated from a previous successful AI run. \
|
||||
10-100x faster, no LLM calls. Falls back to AI if the script fails. |
|
||||
| `"agent"` | Always runs with the AI agent (LLM-driven navigation). Use for first-run exploration or when the site changed. |
|
||||
| `null` / omitted | Inherits from the workflow definition. MCP defaults to `"code"`. |
|
||||
|
||||
### How Caching Works
|
||||
|
||||
1. **First run** — The AI agent navigates the site, recording every action.
|
||||
2. **Script generation** — After a successful run, a deterministic Python script is generated from the recorded actions.
|
||||
3. **Subsequent runs** — The script replays actions directly (no LLM calls). If a selector fails, AI takes over for that step.
|
||||
4. **Script evolution** — Each AI fallback improves the script. Over time, fallbacks decrease.
|
||||
|
||||
MCP-created workflows automatically set `code_version=2` and `run_with="code"` unless you explicitly override them.
|
||||
|
||||
### When to Override
|
||||
|
||||
- Set `run_with="agent"` in skyvern_workflow_run when: testing a new workflow for the first time, debugging a cached \
|
||||
script, or when the target site redesigned its UI.
|
||||
- Set `run_with="code"` (or omit — it's the default) when: the workflow has run successfully before and you want \
|
||||
maximum speed.
|
||||
|
||||
### Script Tools
|
||||
|
||||
- **skyvern_script_list_for_workflow** — Entry point: find scripts for a workflow (wpid → script IDs)
|
||||
- **skyvern_script_get_code** — View the generated Python code for a script version
|
||||
- **skyvern_script_versions** — List version history showing how the script evolved
|
||||
- **skyvern_script_fallback_episodes** — See when and why the AI agent took over from the cached script
|
||||
- **skyvern_script_deploy** — Deploy an updated script version
|
||||
|
||||
## Getting Started
|
||||
|
||||
**Exploring a website**: skyvern_browser_session_create → skyvern_navigate → skyvern_screenshot → \
|
||||
|
|
@ -234,6 +284,9 @@ BAD (1 giant block trying to do everything):
|
|||
Use `{{parameter_key}}` to reference workflow input parameters in any block field.
|
||||
Blocks in the same workflow run share the same browser session automatically.
|
||||
To inspect a real workflow for reference, use skyvern_workflow_get.
|
||||
Workflows created via MCP default to code execution mode (code_version=2, run_with="code"). \
|
||||
The first run uses the AI agent to learn the navigation; subsequent runs replay a cached script. \
|
||||
To force AI agent mode on a specific run, pass run_with="agent" to skyvern_workflow_run.
|
||||
|
||||
### Block Types Reference
|
||||
- **navigation** — fill forms, click buttons, navigate multi-step flows (most common)
|
||||
|
|
@ -333,6 +386,13 @@ mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_run)
|
|||
mcp.tool(tags={"workflow"}, annotations=_RO)(skyvern_workflow_status)
|
||||
mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_cancel)
|
||||
|
||||
# -- Script/caching tools (no browser needed) --
|
||||
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_list_for_workflow)
|
||||
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_get_code)
|
||||
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_versions)
|
||||
mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_fallback_episodes)
|
||||
mcp.tool(tags={"script"}, annotations=_MUT)(skyvern_script_deploy)
|
||||
|
||||
# -- Prompts (methodology guides injected into LLM conversations) --
|
||||
mcp.prompt()(build_workflow)
|
||||
mcp.prompt()(debug_automation)
|
||||
|
|
@ -393,6 +453,12 @@ __all__ = [
|
|||
"skyvern_workflow_run",
|
||||
"skyvern_workflow_status",
|
||||
"skyvern_workflow_cancel",
|
||||
# Script/caching
|
||||
"skyvern_script_list_for_workflow",
|
||||
"skyvern_script_get_code",
|
||||
"skyvern_script_versions",
|
||||
"skyvern_script_fallback_episodes",
|
||||
"skyvern_script_deploy",
|
||||
# Prompts
|
||||
"build_workflow",
|
||||
"debug_automation",
|
||||
|
|
|
|||
|
|
@ -5,8 +5,45 @@ MCP tools import from here; the canonical implementations live in core/.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from skyvern.cli.core.artifacts import get_artifact_dir, save_artifact
|
||||
from skyvern.cli.core.result import Artifact, BrowserContext, ErrorCode, Timer, make_error, make_result
|
||||
from skyvern.client.errors import NotFoundError
|
||||
|
||||
|
||||
async def raw_http_get(path: str, params: dict[str, Any] | None = None) -> Any:
|
||||
"""GET request to Skyvern API for endpoints without SDK methods.
|
||||
|
||||
Raises NotFoundError on 404, RuntimeError on other HTTP errors.
|
||||
"""
|
||||
from ._session import get_skyvern
|
||||
|
||||
skyvern = get_skyvern()
|
||||
# Temporary workaround: these MCP routes do not have public Fern SDK methods yet,
|
||||
# so we reach through the generated client's private wrapper. Revisit if the SDK
|
||||
# is regenerated or adds first-class methods for these endpoints.
|
||||
response = await skyvern._client_wrapper.httpx_client.request(
|
||||
path,
|
||||
method="GET",
|
||||
params=params or {},
|
||||
)
|
||||
if response.status_code == 404:
|
||||
raise NotFoundError(body={"detail": f"Not found: {path}"})
|
||||
if response.status_code >= 400:
|
||||
detail = ""
|
||||
try:
|
||||
detail = response.json().get("detail", response.text)
|
||||
except Exception:
|
||||
detail = response.text
|
||||
raise RuntimeError(f"HTTP {response.status_code}: {detail}")
|
||||
if response.status_code == 204:
|
||||
return {}
|
||||
try:
|
||||
return response.json()
|
||||
except Exception:
|
||||
return {"raw": response.text}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Artifact",
|
||||
|
|
@ -16,5 +53,6 @@ __all__ = [
|
|||
"get_artifact_dir",
|
||||
"make_error",
|
||||
"make_result",
|
||||
"raw_http_get",
|
||||
"save_artifact",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -27,3 +27,75 @@ def validate_folder_id(folder_id: str, action: str) -> dict[str, Any] | None:
|
|||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
|
||||
if "/" in workflow_id or "\\" in workflow_id:
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"workflow_id must not contain path separators",
|
||||
"Provide a valid workflow permanent ID (starts with wpid_)",
|
||||
),
|
||||
)
|
||||
if not workflow_id.startswith("wpid_"):
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid workflow_id format: {workflow_id!r}",
|
||||
"Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
|
||||
if "/" in run_id or "\\" in run_id:
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"run_id must not contain path separators",
|
||||
"Provide a valid run ID (starts with wr_ or tsk_v2_)",
|
||||
),
|
||||
)
|
||||
if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid run_id format: {run_id!r}",
|
||||
"Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def validate_script_id(script_id: str, action: str) -> dict[str, Any] | None:
|
||||
if "/" in script_id or "\\" in script_id:
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"script_id must not contain path separators",
|
||||
"Provide a valid script ID (starts with s_)",
|
||||
),
|
||||
)
|
||||
if not script_id.startswith("s_"):
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid script_id format: {script_id!r}",
|
||||
"Script IDs start with s_. Use skyvern_script_list_for_workflow to find script IDs.",
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -399,6 +399,34 @@ After extraction, check the returned data before using it:
|
|||
or what the data looks like), not the schema.
|
||||
- Use `skyvern_validate` for page-level assertions before extracting \
|
||||
("Is this the search results page?" / "Are there at least 10 results visible?").
|
||||
|
||||
## Caching Considerations
|
||||
|
||||
Workflows created via MCP default to Code 2.0 (code_version=2, run_with="code").
|
||||
|
||||
### What this means for workflow design
|
||||
|
||||
- **First run**: The AI agent runs all blocks, recording actions. A cached script is generated afterward.
|
||||
- **Subsequent runs**: The script replays deterministically — 10-100x faster, no LLM costs.
|
||||
- **AI fallback**: If the script encounters an element it cannot find, it falls back to the AI agent \
|
||||
for that step. The fallback episode is recorded and used to improve the script.
|
||||
|
||||
### Design for cacheability
|
||||
|
||||
1. Use stable selectors: navigation goals that reference exact field labels cache better than vague \
|
||||
descriptions. "Fill in the 'Company Name' field" caches better than "fill in the first text box."
|
||||
2. Avoid dynamic page content in goals: if a page shows different content each time, the cached script \
|
||||
may need frequent AI fallbacks. Consider splitting dynamic sections into separate blocks.
|
||||
3. Parameterize all variable data: cached scripts substitute parameters at runtime. Hardcoded values \
|
||||
in navigation_goal become part of the script literally.
|
||||
|
||||
### Overriding execution mode at run time
|
||||
|
||||
Pass `run_with="agent"` to `skyvern_workflow_run` to force AI execution for a specific run without \
|
||||
changing the workflow definition. This is useful for:
|
||||
- First runs when no script exists yet (the system handles this automatically)
|
||||
- Debugging: comparing AI behavior vs script behavior
|
||||
- Sites that changed layout since the last successful script run
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
293
skyvern/cli/mcp_tools/scripts.py
Normal file
293
skyvern/cli/mcp_tools/scripts.py
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
"""Skyvern MCP script tools — visibility into cached scripts and fallback episodes.
|
||||
|
||||
Tools for listing scripts, viewing generated code, checking version history,
|
||||
inspecting AI fallback episodes, and deploying updated script versions.
|
||||
These tools do not require a browser session.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Annotated, Any
|
||||
|
||||
import structlog
|
||||
from pydantic import Field, ValidationError
|
||||
|
||||
from skyvern.client.errors import NotFoundError
|
||||
from skyvern.client.types import ScriptFileCreate
|
||||
|
||||
from ._common import ErrorCode, Timer, make_error, make_result, raw_http_get
|
||||
from ._session import get_skyvern
|
||||
from ._validation import validate_run_id, validate_script_id, validate_workflow_id
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Script tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def skyvern_script_list_for_workflow(
|
||||
workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
|
||||
) -> dict[str, Any]:
|
||||
"""List all cached scripts for a workflow. Use this as the entry point to discover
|
||||
script IDs for a given workflow. Returns script metadata including version count,
|
||||
success rate, and cache key information."""
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_script_list_for_workflow"):
|
||||
return err
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
data = await raw_http_get(f"v1/scripts/workflows/{workflow_id}")
|
||||
timer.mark("api")
|
||||
except NotFoundError:
|
||||
return make_result(
|
||||
"skyvern_script_list_for_workflow",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.WORKFLOW_NOT_FOUND,
|
||||
f"Workflow {workflow_id!r} not found",
|
||||
"Verify the workflow ID with skyvern_workflow_list",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("script_list_for_workflow_failed", workflow_id=workflow_id, error=str(e))
|
||||
return make_result(
|
||||
"skyvern_script_list_for_workflow",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
|
||||
)
|
||||
|
||||
raw_scripts = data.get("scripts", []) if isinstance(data, dict) else data
|
||||
scripts: Any = []
|
||||
if isinstance(raw_scripts, list):
|
||||
for script in raw_scripts:
|
||||
if not isinstance(script, dict):
|
||||
scripts.append(script)
|
||||
continue
|
||||
script_data = dict(script)
|
||||
if "version" not in script_data and "latest_version" in script_data:
|
||||
script_data["version"] = script_data["latest_version"]
|
||||
scripts.append(script_data)
|
||||
else:
|
||||
scripts = raw_scripts
|
||||
count = len(scripts) if isinstance(scripts, list) else 0
|
||||
return make_result(
|
||||
"skyvern_script_list_for_workflow",
|
||||
data={"workflow_id": workflow_id, "scripts": scripts, "count": count},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_script_get_code(
|
||||
script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
|
||||
version: Annotated[int | None, Field(description="Version number. Omit to get the latest version.")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Get the generated Python code for a cached script. Returns the main orchestrator
|
||||
script and per-block code. Use skyvern_script_list_for_workflow to find script IDs first."""
|
||||
if err := validate_script_id(script_id, "skyvern_script_get_code"):
|
||||
return err
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if version is None:
|
||||
script_meta = await raw_http_get(f"v1/scripts/{script_id}")
|
||||
timer.mark("resolve_version")
|
||||
version = script_meta.get("version", 1) if isinstance(script_meta, dict) else 1
|
||||
|
||||
data = await raw_http_get(f"v1/scripts/{script_id}/versions/{version}")
|
||||
timer.mark("api")
|
||||
except NotFoundError:
|
||||
return make_result(
|
||||
"skyvern_script_get_code",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Script {script_id!r} version {version} not found",
|
||||
"Use skyvern_script_versions to see available versions",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("script_get_code_failed", script_id=script_id, version=version, error=str(e))
|
||||
return make_result(
|
||||
"skyvern_script_get_code",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
|
||||
)
|
||||
|
||||
result: dict[str, Any] = {
|
||||
"script_id": script_id,
|
||||
"version": version,
|
||||
}
|
||||
if isinstance(data, dict):
|
||||
result["blocks"] = data.get("blocks", {})
|
||||
result["main_script"] = data.get("main_script")
|
||||
return make_result("skyvern_script_get_code", data=result, timing_ms=timer.timing_ms)
|
||||
|
||||
|
||||
async def skyvern_script_versions(
|
||||
script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
|
||||
) -> dict[str, Any]:
|
||||
"""List all versions of a cached script. Shows version history including
|
||||
creation timestamps and which run triggered each version."""
|
||||
if err := validate_script_id(script_id, "skyvern_script_versions"):
|
||||
return err
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
data = await raw_http_get(f"v1/scripts/{script_id}/versions")
|
||||
timer.mark("api")
|
||||
except NotFoundError:
|
||||
return make_result(
|
||||
"skyvern_script_versions",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Script {script_id!r} not found",
|
||||
"Use skyvern_script_list_for_workflow to find valid script IDs",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("script_versions_failed", script_id=script_id, error=str(e))
|
||||
return make_result(
|
||||
"skyvern_script_versions",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
|
||||
)
|
||||
|
||||
versions = data.get("versions", []) if isinstance(data, dict) else data
|
||||
return make_result(
|
||||
"skyvern_script_versions",
|
||||
data={"script_id": script_id, "versions": versions, "count": len(versions)},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_script_fallback_episodes(
|
||||
workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
|
||||
workflow_run_id: Annotated[str | None, Field(description="Filter to a specific run (starts with wr_)")] = None,
|
||||
block_label: Annotated[str | None, Field(description="Filter to a specific block label")] = None,
|
||||
page: Annotated[int, Field(description="Page number (1-based)", ge=1)] = 1,
|
||||
page_size: Annotated[int, Field(description="Results per page", ge=1, le=100)] = 20,
|
||||
) -> dict[str, Any]:
|
||||
"""List AI fallback episodes for a workflow's cached scripts. Each episode records
|
||||
when a cached script's selector failed and the AI agent took over. Shows error details,
|
||||
block label, and whether the fallback succeeded. Useful for understanding why a script
|
||||
fell back to AI and how the script evolved."""
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_script_fallback_episodes"):
|
||||
return err
|
||||
if workflow_run_id is not None:
|
||||
if err := validate_run_id(workflow_run_id, "skyvern_script_fallback_episodes"):
|
||||
return err
|
||||
|
||||
params: dict[str, Any] = {"page": page, "page_size": page_size}
|
||||
if workflow_run_id is not None:
|
||||
params["workflow_run_id"] = workflow_run_id
|
||||
if block_label is not None:
|
||||
params["block_label"] = block_label
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
data = await raw_http_get(f"v1/workflows/{workflow_id}/fallback-episodes", params=params)
|
||||
timer.mark("api")
|
||||
except NotFoundError:
|
||||
return make_result(
|
||||
"skyvern_script_fallback_episodes",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.WORKFLOW_NOT_FOUND,
|
||||
f"Workflow {workflow_id!r} not found",
|
||||
"Verify the workflow ID with skyvern_workflow_list",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("script_fallback_episodes_failed", workflow_id=workflow_id, error=str(e))
|
||||
return make_result(
|
||||
"skyvern_script_fallback_episodes",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
|
||||
)
|
||||
|
||||
result: dict[str, Any] = {"workflow_id": workflow_id}
|
||||
if isinstance(data, dict):
|
||||
result["episodes"] = data.get("episodes", [])
|
||||
result["total_count"] = data.get("total_count", 0)
|
||||
result["page"] = data.get("page", page)
|
||||
result["page_size"] = data.get("page_size", page_size)
|
||||
else:
|
||||
result["episodes"] = data
|
||||
result["total_count"] = len(data) if isinstance(data, list) else 0
|
||||
return make_result("skyvern_script_fallback_episodes", data=result, timing_ms=timer.timing_ms)
|
||||
|
||||
|
||||
async def skyvern_script_deploy(
|
||||
script_id: Annotated[str, Field(description="Script ID to deploy a new version for (starts with s_)")],
|
||||
files: Annotated[
|
||||
str,
|
||||
Field(
|
||||
description='JSON array of file objects: [{"path": "main.py", "content": "<base64-encoded>", "encoding": "base64"}]'
|
||||
),
|
||||
],
|
||||
) -> dict[str, Any]:
|
||||
"""Deploy a new version of a cached script with updated files. Creates a new version
|
||||
that will be used on the next workflow run. File content must be base64-encoded."""
|
||||
if err := validate_script_id(script_id, "skyvern_script_deploy"):
|
||||
return err
|
||||
|
||||
try:
|
||||
parsed_files = json.loads(files)
|
||||
if not isinstance(parsed_files, list):
|
||||
raise ValueError("files must be a JSON array")
|
||||
typed_files = [ScriptFileCreate(**file_data) for file_data in parsed_files]
|
||||
except (json.JSONDecodeError, TypeError, ValueError, ValidationError) as e:
|
||||
return make_result(
|
||||
"skyvern_script_deploy",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid files JSON: {e}",
|
||||
'Provide a JSON array: [{"path": "main.py", "content": "<base64>", "encoding": "base64"}]',
|
||||
),
|
||||
)
|
||||
|
||||
skyvern = get_skyvern()
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
result = await skyvern.deploy_script(script_id, files=typed_files)
|
||||
timer.mark("sdk")
|
||||
except NotFoundError:
|
||||
return make_result(
|
||||
"skyvern_script_deploy",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Script {script_id!r} not found",
|
||||
"Use skyvern_script_list_for_workflow to find valid script IDs",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("script_deploy_failed", script_id=script_id, error=str(e))
|
||||
return make_result(
|
||||
"skyvern_script_deploy",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
|
||||
)
|
||||
|
||||
data: dict[str, Any] = {"script_id": script_id}
|
||||
if hasattr(result, "model_dump"):
|
||||
data.update(result.model_dump(mode="json"))
|
||||
elif isinstance(result, dict):
|
||||
data.update(result)
|
||||
return make_result("skyvern_script_deploy", data=data, timing_ms=timer.timing_ms)
|
||||
|
|
@ -24,7 +24,7 @@ from skyvern.schemas.workflows import WorkflowCreateYAMLRequest as WorkflowCreat
|
|||
|
||||
from ._common import ErrorCode, Timer, make_error, make_result
|
||||
from ._session import get_skyvern
|
||||
from ._validation import validate_folder_id
|
||||
from ._validation import validate_folder_id, validate_run_id, validate_workflow_id
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
_SUMMARY_TOP_LEVEL_KEY_LIMIT = 8
|
||||
|
|
@ -45,7 +45,7 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:
|
|||
|
||||
Uses Any to avoid tight coupling with Fern-generated client types.
|
||||
"""
|
||||
return {
|
||||
data: dict[str, Any] = {
|
||||
"workflow_permanent_id": wf.workflow_permanent_id,
|
||||
"workflow_id": wf.workflow_id,
|
||||
"title": wf.title,
|
||||
|
|
@ -57,6 +57,11 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:
|
|||
"created_at": wf.created_at.isoformat() if wf.created_at else None,
|
||||
"modified_at": wf.modified_at.isoformat() if wf.modified_at else None,
|
||||
}
|
||||
for caching_field in ("run_with", "code_version", "adaptive_caching"):
|
||||
val = getattr(wf, caching_field, None)
|
||||
if val is not None:
|
||||
data[caching_field] = val
|
||||
return data
|
||||
|
||||
|
||||
def _serialize_workflow_full(wf: Any) -> dict[str, Any]:
|
||||
|
|
@ -87,6 +92,7 @@ def _serialize_run(run: Any) -> dict[str, Any]:
|
|||
"app_url",
|
||||
"browser_session_id",
|
||||
"run_with",
|
||||
"ai_fallback",
|
||||
):
|
||||
val = getattr(run, field, None)
|
||||
if val is not None:
|
||||
|
|
@ -103,6 +109,10 @@ def _serialize_run(run: Any) -> dict[str, Any]:
|
|||
if val is not None:
|
||||
data[ts_field] = val.isoformat()
|
||||
|
||||
script_run = getattr(run, "script_run", None)
|
||||
if script_run is not None:
|
||||
data["script_run"] = script_run.model_dump(mode="json") if hasattr(script_run, "model_dump") else script_run
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
|
@ -288,6 +298,12 @@ def _serialize_run_summary(run: Any) -> dict[str, Any]:
|
|||
if run_with:
|
||||
summary["run_with"] = run_with
|
||||
|
||||
script_run = _get_value(run, "script_run")
|
||||
if script_run is not None:
|
||||
sr = _jsonable(script_run)
|
||||
if isinstance(sr, dict) and sr.get("ai_fallback_triggered") is not None:
|
||||
summary["ai_fallback_triggered"] = sr["ai_fallback_triggered"]
|
||||
|
||||
workflow_title = _get_value(run, "workflow_title")
|
||||
if workflow_title:
|
||||
summary["workflow_title"] = workflow_title
|
||||
|
|
@ -326,6 +342,8 @@ def _serialize_run_full(run: Any) -> dict[str, Any]:
|
|||
"browser_profile_id",
|
||||
"run_with",
|
||||
"total_steps",
|
||||
"script_run",
|
||||
"ai_fallback",
|
||||
):
|
||||
value = _get_value(run, field)
|
||||
if value is not None:
|
||||
|
|
@ -368,56 +386,6 @@ async def _get_workflow_run_status(
|
|||
return response.json()
|
||||
|
||||
|
||||
def _validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
|
||||
"""Validate workflow_id format. Returns a make_result error dict or None if valid."""
|
||||
if "/" in workflow_id or "\\" in workflow_id:
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"workflow_id must not contain path separators",
|
||||
"Provide a valid workflow permanent ID (starts with wpid_)",
|
||||
),
|
||||
)
|
||||
if not workflow_id.startswith("wpid_"):
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid workflow_id format: {workflow_id!r}",
|
||||
"Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
|
||||
"""Validate run_id format. Returns a make_result error dict or None if valid."""
|
||||
if "/" in run_id or "\\" in run_id:
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"run_id must not contain path separators",
|
||||
"Provide a valid run ID (starts with wr_ or tsk_v2_)",
|
||||
),
|
||||
)
|
||||
if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
|
||||
return make_result(
|
||||
action,
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid run_id format: {run_id!r}",
|
||||
"Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_workflow_by_id(workflow_id: str, version: int | None = None) -> dict[str, Any]:
|
||||
"""Fetch a single workflow by ID via the Skyvern API.
|
||||
|
||||
|
|
@ -780,7 +748,7 @@ async def skyvern_workflow_get(
|
|||
) -> dict[str, Any]:
|
||||
"""Get the full definition of a specific workflow. Use when you need to inspect a workflow's
|
||||
blocks, parameters, and configuration before running or updating it."""
|
||||
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_get"):
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_workflow_get"):
|
||||
return err
|
||||
|
||||
with Timer() as timer:
|
||||
|
|
@ -937,7 +905,7 @@ async def skyvern_workflow_update(
|
|||
) -> dict[str, Any]:
|
||||
"""Update an existing workflow's definition. Use when you need to modify a workflow's blocks,
|
||||
parameters, or configuration. Creates a new version of the workflow."""
|
||||
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update"):
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_workflow_update"):
|
||||
return err
|
||||
|
||||
if format not in ("json", "yaml", "auto"):
|
||||
|
|
@ -1017,7 +985,7 @@ async def skyvern_workflow_delete(
|
|||
) -> dict[str, Any]:
|
||||
"""Delete a workflow permanently. Use when you need to remove a workflow that is no longer needed.
|
||||
Requires force=true to prevent accidental deletion."""
|
||||
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
|
||||
return err
|
||||
|
||||
if not force:
|
||||
|
|
@ -1077,7 +1045,7 @@ async def skyvern_workflow_update_folder(
|
|||
] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Assign a workflow to a folder, or remove it from its current folder."""
|
||||
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
|
||||
return err
|
||||
if folder_id is not None and (err := validate_folder_id(folder_id, "skyvern_workflow_update_folder")):
|
||||
return err
|
||||
|
|
@ -1152,7 +1120,7 @@ async def skyvern_workflow_run(
|
|||
Returns immediately by default (async) — set wait=true to block until completion.
|
||||
Default timeout is 300s (5 minutes). For longer workflows, increase timeout_seconds
|
||||
or use wait=false and poll with skyvern_workflow_status."""
|
||||
if err := _validate_workflow_id(workflow_id, "skyvern_workflow_run"):
|
||||
if err := validate_workflow_id(workflow_id, "skyvern_workflow_run"):
|
||||
return err
|
||||
|
||||
parsed_params: dict[str, Any] | None = None
|
||||
|
|
@ -1248,7 +1216,7 @@ async def skyvern_workflow_status(
|
|||
) -> dict[str, Any]:
|
||||
"""Check the status and progress of a workflow or task run. Use when you need to monitor
|
||||
a running workflow, check if it completed, or retrieve its output."""
|
||||
if err := _validate_run_id(run_id, "skyvern_workflow_status"):
|
||||
if err := validate_run_id(run_id, "skyvern_workflow_status"):
|
||||
return err
|
||||
if verbosity not in {"summary", "full"}:
|
||||
return make_result(
|
||||
|
|
@ -1307,7 +1275,7 @@ async def skyvern_workflow_cancel(
|
|||
) -> dict[str, Any]:
|
||||
"""Cancel a running workflow or task. Use when you need to stop a workflow that is taking
|
||||
too long, is stuck, or is no longer needed."""
|
||||
if err := _validate_run_id(run_id, "skyvern_workflow_cancel"):
|
||||
if err := validate_run_id(run_id, "skyvern_workflow_cancel"):
|
||||
return err
|
||||
|
||||
skyvern = get_skyvern()
|
||||
|
|
|
|||
42
tests/unit/test_mcp_common.py
Normal file
42
tests/unit/test_mcp_common.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import skyvern.cli.mcp_tools._common as common_tools
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_http_get_returns_empty_dict_for_204(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
response = SimpleNamespace(
|
||||
status_code=204,
|
||||
text="",
|
||||
json=Mock(side_effect=AssertionError("json() should not be called for 204 responses")),
|
||||
)
|
||||
fake_client = SimpleNamespace(
|
||||
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
|
||||
)
|
||||
monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
|
||||
|
||||
result = await common_tools.raw_http_get("v1/test")
|
||||
|
||||
assert result == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_http_get_returns_raw_text_for_non_json_success(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
response = SimpleNamespace(
|
||||
status_code=200,
|
||||
text="<html>ok</html>",
|
||||
json=Mock(side_effect=ValueError("not json")),
|
||||
)
|
||||
fake_client = SimpleNamespace(
|
||||
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
|
||||
)
|
||||
monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
|
||||
|
||||
result = await common_tools.raw_http_get("v1/test")
|
||||
|
||||
assert result == {"raw": "<html>ok</html>"}
|
||||
482
tests/unit/test_mcp_script_caching_live.py
Normal file
482
tests/unit/test_mcp_script_caching_live.py
Normal file
|
|
@ -0,0 +1,482 @@
|
|||
"""Live MCP server tests for script/caching tools.
|
||||
|
||||
Tests call tools through the actual FastMCP Client, exactly as Claude Code would.
|
||||
API responses are mocked at the HTTP layer so we test the full MCP pipeline:
|
||||
Client → FastMCP → tool function → raw_http_get/SDK → (mocked) API
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from fastmcp import Client
|
||||
|
||||
import skyvern.cli.mcp_tools.scripts as script_tools
|
||||
import skyvern.cli.mcp_tools.workflow as workflow_tools
|
||||
from skyvern.cli.mcp_tools import mcp
|
||||
from skyvern.client.types import ScriptFileCreate
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake API payloads
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FAKE_SCRIPTS = {
|
||||
"scripts": [
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
"cache_key": "hash",
|
||||
"cache_key_value": "default",
|
||||
"status": "published",
|
||||
"latest_version": 2,
|
||||
"version_count": 2,
|
||||
"total_runs": 5,
|
||||
"success_rate": 0.8,
|
||||
"is_pinned": False,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
FAKE_CODE = {
|
||||
"blocks": {
|
||||
"fill_form": "async def fill_form(page, ctx):\n await page.fill('xpath=//input', ctx.parameters['name'])\n",
|
||||
},
|
||||
"main_script": "import skyvern\n\n@skyvern.workflow(title='Test')\nasync def run(params):\n pass\n",
|
||||
"script_id": "s_abc",
|
||||
"version": 2,
|
||||
}
|
||||
|
||||
FAKE_VERSIONS = {
|
||||
"versions": [
|
||||
{"version": 1, "script_revision_id": "srev_1", "created_at": "2026-03-20T10:00:00Z", "run_id": "wr_001"},
|
||||
{"version": 2, "script_revision_id": "srev_2", "created_at": "2026-03-22T14:00:00Z", "run_id": "wr_002"},
|
||||
]
|
||||
}
|
||||
|
||||
FAKE_EPISODES = {
|
||||
"episodes": [
|
||||
{
|
||||
"episode_id": "ep_1",
|
||||
"block_label": "fill_form",
|
||||
"fallback_type": "selector_miss",
|
||||
"error_message": "Element not found: site redesigned",
|
||||
"classify_result": None,
|
||||
"fallback_succeeded": True,
|
||||
"workflow_run_id": "wr_002",
|
||||
"page_url": "https://example.com/form",
|
||||
"reviewed": True,
|
||||
"created_at": "2026-03-22T14:01:00Z",
|
||||
}
|
||||
],
|
||||
"total_count": 1,
|
||||
"page": 1,
|
||||
"page_size": 20,
|
||||
}
|
||||
|
||||
|
||||
def _mock_raw_http(responses: dict):
|
||||
"""Return a mock raw_http_get that routes by path substring."""
|
||||
|
||||
async def mock_get(path, params=None):
|
||||
for key, val in responses.items():
|
||||
if key in path:
|
||||
return val
|
||||
raise RuntimeError(f"Unmocked path: {path}")
|
||||
|
||||
return mock_get
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 1: "Show me the scripts for this workflow"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_scripts_via_mcp(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"scripts/workflows/": FAKE_SCRIPTS,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_list_for_workflow",
|
||||
{
|
||||
"workflow_id": "wpid_test",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
scripts = result.data["data"]["scripts"]
|
||||
assert len(scripts) == 1
|
||||
assert scripts[0]["script_id"] == "s_abc"
|
||||
assert scripts[0]["success_rate"] == 0.8
|
||||
assert scripts[0]["version"] == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("payload", "expected_scripts"),
|
||||
[
|
||||
({"scripts": None}, None),
|
||||
({"scripts": {"unexpected": "shape"}}, {"unexpected": "shape"}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_scripts_handles_missing_script_list_via_mcp(monkeypatch, payload, expected_scripts):
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"scripts/workflows/": payload,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_list_for_workflow",
|
||||
{
|
||||
"workflow_id": "wpid_test",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
assert result.data["data"]["scripts"] == expected_scripts
|
||||
assert result.data["data"]["count"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 2: "Print the script that was made"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_script_code_via_mcp(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"scripts/s_abc/versions/2": FAKE_CODE,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_get_code",
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
"version": 2,
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
data = result.data["data"]
|
||||
assert "fill_form" in data["blocks"]
|
||||
assert "page.fill" in data["blocks"]["fill_form"]
|
||||
assert "@skyvern.workflow" in data["main_script"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_script_code_resolves_latest_via_mcp(monkeypatch):
|
||||
"""When version is omitted, tool fetches metadata first to find latest."""
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"v1/scripts/s_abc/versions/2": FAKE_CODE,
|
||||
"v1/scripts/s_abc": {"script_id": "s_abc", "version": 2},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_get_code",
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
assert result.data["data"]["version"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 3: "How did the script evolve?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_script_versions_via_mcp(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"versions": FAKE_VERSIONS,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_versions",
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
versions = result.data["data"]["versions"]
|
||||
assert len(versions) == 2
|
||||
assert versions[0]["version"] == 1
|
||||
assert versions[1]["version"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 4: "Why did it fall back to AI?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_episodes_via_mcp(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
script_tools,
|
||||
"raw_http_get",
|
||||
_mock_raw_http(
|
||||
{
|
||||
"fallback-episodes": FAKE_EPISODES,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_fallback_episodes",
|
||||
{
|
||||
"workflow_id": "wpid_test",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
data = result.data["data"]
|
||||
assert data["total_count"] == 1
|
||||
ep = data["episodes"][0]
|
||||
assert ep["fallback_type"] == "selector_miss"
|
||||
assert "site redesigned" in ep["error_message"]
|
||||
assert ep["fallback_succeeded"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fallback_episodes_rejects_invalid_workflow_run_id_via_mcp(monkeypatch):
|
||||
raw_http_get = AsyncMock(return_value=FAKE_EPISODES)
|
||||
monkeypatch.setattr(script_tools, "raw_http_get", raw_http_get)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_fallback_episodes",
|
||||
{
|
||||
"workflow_id": "wpid_test",
|
||||
"workflow_run_id": "bad_run_id",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is False
|
||||
assert result.data["error"]["code"] == script_tools.ErrorCode.INVALID_INPUT
|
||||
raw_http_get.assert_not_awaited()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 5: "Edit the script"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deploy_script_via_mcp(monkeypatch):
|
||||
deploy_resp = SimpleNamespace(
|
||||
script_id="s_abc",
|
||||
version=3,
|
||||
script_revision_id="srev_3",
|
||||
model_dump=lambda mode="python": {"script_id": "s_abc", "version": 3, "script_revision_id": "srev_3"},
|
||||
)
|
||||
fake_client = SimpleNamespace(deploy_script=AsyncMock(return_value=deploy_resp))
|
||||
monkeypatch.setattr(script_tools, "get_skyvern", lambda: fake_client)
|
||||
|
||||
import base64
|
||||
|
||||
files = json.dumps([{"path": "main.py", "content": base64.b64encode(b"# edited").decode(), "encoding": "base64"}])
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_deploy",
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
"files": files,
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
assert result.data["data"]["version"] == 3
|
||||
fake_client.deploy_script.assert_awaited_once()
|
||||
called_files = fake_client.deploy_script.await_args.kwargs["files"]
|
||||
assert len(called_files) == 1
|
||||
assert isinstance(called_files[0], ScriptFileCreate)
|
||||
assert called_files[0].path == "main.py"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 6: Workflow create shows caching defaults
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workflow_create_surfaces_caching_fields_via_mcp(monkeypatch):
|
||||
from datetime import datetime, timezone
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
fake_wf = SimpleNamespace(
|
||||
workflow_permanent_id="wpid_new",
|
||||
workflow_id="wf_1",
|
||||
title="Test",
|
||||
version=1,
|
||||
status="published",
|
||||
description=None,
|
||||
is_saved_task=False,
|
||||
folder_id=None,
|
||||
created_at=now,
|
||||
modified_at=now,
|
||||
code_version=2,
|
||||
adaptive_caching=True,
|
||||
run_with="code",
|
||||
)
|
||||
fake_client = SimpleNamespace(create_workflow=AsyncMock(return_value=fake_wf))
|
||||
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
|
||||
|
||||
definition = json.dumps(
|
||||
{
|
||||
"title": "Test",
|
||||
"workflow_definition": {
|
||||
"parameters": [],
|
||||
"blocks": [
|
||||
{
|
||||
"block_type": "navigation",
|
||||
"label": "s1",
|
||||
"url": "https://example.com",
|
||||
"navigation_goal": "Click",
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_workflow_create",
|
||||
{
|
||||
"definition": definition,
|
||||
"format": "json",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
data = result.data["data"]
|
||||
assert data["code_version"] == 2
|
||||
assert data["run_with"] == "code"
|
||||
assert data["adaptive_caching"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scenario 7: Run status shows script_run + ai_fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workflow_status_shows_script_run_via_mcp(monkeypatch):
|
||||
payload = {
|
||||
"workflow_run_id": "wr_test",
|
||||
"status": "completed",
|
||||
"run_with": "code",
|
||||
"workflow_title": "Test",
|
||||
"script_run": {"ai_fallback_triggered": True, "script_id": "s_abc"},
|
||||
"outputs": {"result": "ok"},
|
||||
}
|
||||
fake_resp = SimpleNamespace(status_code=200, json=lambda: payload, text="")
|
||||
fake_client = SimpleNamespace(
|
||||
_client_wrapper=SimpleNamespace(
|
||||
httpx_client=SimpleNamespace(request=AsyncMock(return_value=fake_resp)),
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
|
||||
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_workflow_status",
|
||||
{
|
||||
"run_id": "wr_test",
|
||||
"verbosity": "full",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is True
|
||||
data = result.data["data"]
|
||||
assert data["run_with"] == "code"
|
||||
assert data["script_run"]["ai_fallback_triggered"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validation: bad inputs get clear errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_workflow_id_returns_error_via_mcp():
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_list_for_workflow",
|
||||
{
|
||||
"workflow_id": "not_a_wpid",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is False
|
||||
assert "wpid_" in str(result.data["error"])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_script_id_returns_error_via_mcp():
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_get_code",
|
||||
{
|
||||
"script_id": "wrong_prefix",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is False
|
||||
assert "s_" in str(result.data["error"])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_deploy_json_returns_error_via_mcp():
|
||||
async with Client(mcp) as client:
|
||||
result = await client.call_tool(
|
||||
"skyvern_script_deploy",
|
||||
{
|
||||
"script_id": "s_abc",
|
||||
"files": "not json",
|
||||
},
|
||||
)
|
||||
|
||||
assert result.data["ok"] is False
|
||||
assert "JSON" in result.data["error"]["message"]
|
||||
Loading…
Add table
Add a link
Reference in a new issue