mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-26 10:41:14 +00:00
482 lines
14 KiB
Python
482 lines
14 KiB
Python
"""Live MCP server tests for script/caching tools.
|
|
|
|
Tests call tools through the actual FastMCP Client, exactly as Claude Code would.
|
|
API responses are mocked at the HTTP layer so we test the full MCP pipeline:
|
|
Client → FastMCP → tool function → raw_http_get/SDK → (mocked) API
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
from fastmcp import Client
|
|
|
|
import skyvern.cli.mcp_tools.scripts as script_tools
|
|
import skyvern.cli.mcp_tools.workflow as workflow_tools
|
|
from skyvern.cli.mcp_tools import mcp
|
|
from skyvern.client.types import ScriptFileCreate
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fake API payloads
|
|
# ---------------------------------------------------------------------------
|
|
|
|
FAKE_SCRIPTS = {
|
|
"scripts": [
|
|
{
|
|
"script_id": "s_abc",
|
|
"cache_key": "hash",
|
|
"cache_key_value": "default",
|
|
"status": "published",
|
|
"latest_version": 2,
|
|
"version_count": 2,
|
|
"total_runs": 5,
|
|
"success_rate": 0.8,
|
|
"is_pinned": False,
|
|
}
|
|
]
|
|
}
|
|
|
|
FAKE_CODE = {
|
|
"blocks": {
|
|
"fill_form": "async def fill_form(page, ctx):\n await page.fill('xpath=//input', ctx.parameters['name'])\n",
|
|
},
|
|
"main_script": "import skyvern\n\n@skyvern.workflow(title='Test')\nasync def run(params):\n pass\n",
|
|
"script_id": "s_abc",
|
|
"version": 2,
|
|
}
|
|
|
|
FAKE_VERSIONS = {
|
|
"versions": [
|
|
{"version": 1, "script_revision_id": "srev_1", "created_at": "2026-03-20T10:00:00Z", "run_id": "wr_001"},
|
|
{"version": 2, "script_revision_id": "srev_2", "created_at": "2026-03-22T14:00:00Z", "run_id": "wr_002"},
|
|
]
|
|
}
|
|
|
|
FAKE_EPISODES = {
|
|
"episodes": [
|
|
{
|
|
"episode_id": "ep_1",
|
|
"block_label": "fill_form",
|
|
"fallback_type": "selector_miss",
|
|
"error_message": "Element not found: site redesigned",
|
|
"classify_result": None,
|
|
"fallback_succeeded": True,
|
|
"workflow_run_id": "wr_002",
|
|
"page_url": "https://example.com/form",
|
|
"reviewed": True,
|
|
"created_at": "2026-03-22T14:01:00Z",
|
|
}
|
|
],
|
|
"total_count": 1,
|
|
"page": 1,
|
|
"page_size": 20,
|
|
}
|
|
|
|
|
|
def _mock_raw_http(responses: dict):
|
|
"""Return a mock raw_http_get that routes by path substring."""
|
|
|
|
async def mock_get(path, params=None):
|
|
for key, val in responses.items():
|
|
if key in path:
|
|
return val
|
|
raise RuntimeError(f"Unmocked path: {path}")
|
|
|
|
return mock_get
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 1: "Show me the scripts for this workflow"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_scripts_via_mcp(monkeypatch):
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"scripts/workflows/": FAKE_SCRIPTS,
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_list_for_workflow",
|
|
{
|
|
"workflow_id": "wpid_test",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
scripts = result.data["data"]["scripts"]
|
|
assert len(scripts) == 1
|
|
assert scripts[0]["script_id"] == "s_abc"
|
|
assert scripts[0]["success_rate"] == 0.8
|
|
assert scripts[0]["version"] == 2
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("payload", "expected_scripts"),
|
|
[
|
|
({"scripts": None}, None),
|
|
({"scripts": {"unexpected": "shape"}}, {"unexpected": "shape"}),
|
|
],
|
|
)
|
|
@pytest.mark.asyncio
|
|
async def test_list_scripts_handles_missing_script_list_via_mcp(monkeypatch, payload, expected_scripts):
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"scripts/workflows/": payload,
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_list_for_workflow",
|
|
{
|
|
"workflow_id": "wpid_test",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
assert result.data["data"]["scripts"] == expected_scripts
|
|
assert result.data["data"]["count"] == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 2: "Print the script that was made"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_script_code_via_mcp(monkeypatch):
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"scripts/s_abc/versions/2": FAKE_CODE,
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_get_code",
|
|
{
|
|
"script_id": "s_abc",
|
|
"version": 2,
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
data = result.data["data"]
|
|
assert "fill_form" in data["blocks"]
|
|
# Semgrep false positive: this checks a script code path, not a user-supplied URL.
|
|
assert "page.fill" in data["blocks"]["fill_form"] # nosemgrep: incomplete-url-substring-sanitization
|
|
assert "@skyvern.workflow" in data["main_script"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_script_code_resolves_latest_via_mcp(monkeypatch):
|
|
"""When version is omitted, tool fetches metadata first to find latest."""
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"v1/scripts/s_abc/versions/2": FAKE_CODE,
|
|
"v1/scripts/s_abc": {"script_id": "s_abc", "version": 2},
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_get_code",
|
|
{
|
|
"script_id": "s_abc",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
assert result.data["data"]["version"] == 2
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 3: "How did the script evolve?"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_script_versions_via_mcp(monkeypatch):
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"versions": FAKE_VERSIONS,
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_versions",
|
|
{
|
|
"script_id": "s_abc",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
versions = result.data["data"]["versions"]
|
|
assert len(versions) == 2
|
|
assert versions[0]["version"] == 1
|
|
assert versions[1]["version"] == 2
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 4: "Why did it fall back to AI?"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_episodes_via_mcp(monkeypatch):
|
|
monkeypatch.setattr(
|
|
script_tools,
|
|
"raw_http_get",
|
|
_mock_raw_http(
|
|
{
|
|
"fallback-episodes": FAKE_EPISODES,
|
|
}
|
|
),
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_fallback_episodes",
|
|
{
|
|
"workflow_id": "wpid_test",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
data = result.data["data"]
|
|
assert data["total_count"] == 1
|
|
ep = data["episodes"][0]
|
|
assert ep["fallback_type"] == "selector_miss"
|
|
assert "site redesigned" in ep["error_message"]
|
|
assert ep["fallback_succeeded"] is True
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_episodes_rejects_invalid_workflow_run_id_via_mcp(monkeypatch):
|
|
raw_http_get = AsyncMock(return_value=FAKE_EPISODES)
|
|
monkeypatch.setattr(script_tools, "raw_http_get", raw_http_get)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_fallback_episodes",
|
|
{
|
|
"workflow_id": "wpid_test",
|
|
"workflow_run_id": "bad_run_id",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is False
|
|
assert result.data["error"]["code"] == script_tools.ErrorCode.INVALID_INPUT
|
|
raw_http_get.assert_not_awaited()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 5: "Edit the script"
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deploy_script_via_mcp(monkeypatch):
|
|
deploy_resp = SimpleNamespace(
|
|
script_id="s_abc",
|
|
version=3,
|
|
script_revision_id="srev_3",
|
|
model_dump=lambda mode="python": {"script_id": "s_abc", "version": 3, "script_revision_id": "srev_3"},
|
|
)
|
|
fake_client = SimpleNamespace(deploy_script=AsyncMock(return_value=deploy_resp))
|
|
monkeypatch.setattr(script_tools, "get_skyvern", lambda: fake_client)
|
|
|
|
import base64
|
|
|
|
files = json.dumps([{"path": "main.py", "content": base64.b64encode(b"# edited").decode(), "encoding": "base64"}])
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_deploy",
|
|
{
|
|
"script_id": "s_abc",
|
|
"files": files,
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
assert result.data["data"]["version"] == 3
|
|
fake_client.deploy_script.assert_awaited_once()
|
|
called_files = fake_client.deploy_script.await_args.kwargs["files"]
|
|
assert len(called_files) == 1
|
|
assert isinstance(called_files[0], ScriptFileCreate)
|
|
assert called_files[0].path == "main.py"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 6: Workflow create shows caching defaults
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_workflow_create_surfaces_caching_fields_via_mcp(monkeypatch):
|
|
payload = {
|
|
"workflow_permanent_id": "wpid_new",
|
|
"workflow_id": "wf_1",
|
|
"title": "Test",
|
|
"version": 1,
|
|
"status": "published",
|
|
"description": None,
|
|
"is_saved_task": False,
|
|
"folder_id": None,
|
|
"created_at": "2026-04-23T10:00:00+00:00",
|
|
"modified_at": "2026-04-23T10:00:00+00:00",
|
|
"code_version": 2,
|
|
"adaptive_caching": True,
|
|
"run_with": "code",
|
|
}
|
|
response = SimpleNamespace(status_code=200, json=lambda: payload, text="")
|
|
request_mock = AsyncMock(return_value=response)
|
|
fake_client = SimpleNamespace(_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request_mock)))
|
|
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
|
|
|
|
definition = json.dumps(
|
|
{
|
|
"title": "Test",
|
|
"workflow_definition": {
|
|
"parameters": [],
|
|
"blocks": [
|
|
{
|
|
"block_type": "navigation",
|
|
"label": "s1",
|
|
"url": "https://example.com",
|
|
"navigation_goal": "Click",
|
|
}
|
|
],
|
|
},
|
|
}
|
|
)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_workflow_create",
|
|
{
|
|
"definition": definition,
|
|
"format": "json",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
data = result.data["data"]
|
|
assert data["code_version"] == 2
|
|
assert data["run_with"] == "code"
|
|
assert data["adaptive_caching"] is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Scenario 7: Run status shows script_run + ai_fallback
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_workflow_status_shows_script_run_via_mcp(monkeypatch):
|
|
payload = {
|
|
"workflow_run_id": "wr_test",
|
|
"status": "completed",
|
|
"run_with": "code",
|
|
"workflow_title": "Test",
|
|
"script_run": {"ai_fallback_triggered": True, "script_id": "s_abc"},
|
|
"outputs": {"result": "ok"},
|
|
}
|
|
fake_resp = SimpleNamespace(status_code=200, json=lambda: payload, text="")
|
|
fake_client = SimpleNamespace(
|
|
_client_wrapper=SimpleNamespace(
|
|
httpx_client=SimpleNamespace(request=AsyncMock(return_value=fake_resp)),
|
|
),
|
|
)
|
|
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
|
|
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_workflow_status",
|
|
{
|
|
"run_id": "wr_test",
|
|
"verbosity": "full",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is True
|
|
data = result.data["data"]
|
|
assert data["run_with"] == "code"
|
|
assert data["script_run"]["ai_fallback_triggered"] is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Validation: bad inputs get clear errors
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bad_workflow_id_returns_error_via_mcp():
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_list_for_workflow",
|
|
{
|
|
"workflow_id": "not_a_wpid",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is False
|
|
assert "wpid_" in str(result.data["error"])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bad_script_id_returns_error_via_mcp():
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_get_code",
|
|
{
|
|
"script_id": "wrong_prefix",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is False
|
|
assert "s_" in str(result.data["error"])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bad_deploy_json_returns_error_via_mcp():
|
|
async with Client(mcp) as client:
|
|
result = await client.call_tool(
|
|
"skyvern_script_deploy",
|
|
{
|
|
"script_id": "s_abc",
|
|
"files": "not json",
|
|
},
|
|
)
|
|
|
|
assert result.data["ok"] is False
|
|
assert "JSON" in result.data["error"]["message"]
|