Skyvern/tests/unit/test_mcp_script_caching_live.py

"""Live MCP server tests for script/caching tools.

Tests call tools through the actual FastMCP Client, exactly as Claude Code would.
API responses are mocked at the HTTP layer so we test the full MCP pipeline:
  Client → FastMCP → tool function → raw_http_get/SDK → (mocked) API
"""

from __future__ import annotations

import json
from types import SimpleNamespace
from unittest.mock import AsyncMock

import pytest
from fastmcp import Client

import skyvern.cli.mcp_tools.scripts as script_tools
import skyvern.cli.mcp_tools.workflow as workflow_tools
from skyvern.cli.mcp_tools import mcp
from skyvern.client.types import ScriptFileCreate

# ---------------------------------------------------------------------------
# Fake API payloads
# ---------------------------------------------------------------------------

FAKE_SCRIPTS = {
    "scripts": [
        {
            "script_id": "s_abc",
            "cache_key": "hash",
            "cache_key_value": "default",
            "status": "published",
            "latest_version": 2,
            "version_count": 2,
            "total_runs": 5,
            "success_rate": 0.8,
            "is_pinned": False,
        }
    ]
}

FAKE_CODE = {
    "blocks": {
        "fill_form": "async def fill_form(page, ctx):\n    await page.fill('xpath=//input', ctx.parameters['name'])\n",
    },
    "main_script": "import skyvern\n\n@skyvern.workflow(title='Test')\nasync def run(params):\n    pass\n",
    "script_id": "s_abc",
    "version": 2,
}

FAKE_VERSIONS = {
    "versions": [
        {"version": 1, "script_revision_id": "srev_1", "created_at": "2026-03-20T10:00:00Z", "run_id": "wr_001"},
        {"version": 2, "script_revision_id": "srev_2", "created_at": "2026-03-22T14:00:00Z", "run_id": "wr_002"},
    ]
}

FAKE_EPISODES = {
    "episodes": [
        {
            "episode_id": "ep_1",
            "block_label": "fill_form",
            "fallback_type": "selector_miss",
            "error_message": "Element not found: site redesigned",
            "classify_result": None,
            "fallback_succeeded": True,
            "workflow_run_id": "wr_002",
            "page_url": "https://example.com/form",
            "reviewed": True,
            "created_at": "2026-03-22T14:01:00Z",
        }
    ],
    "total_count": 1,
    "page": 1,
    "page_size": 20,
}


def _mock_raw_http(responses: dict):
    """Return a mock raw_http_get that routes by path substring."""

    async def mock_get(path, params=None):
        for key, val in responses.items():
            if key in path:
                return val
        raise RuntimeError(f"Unmocked path: {path}")

    return mock_get


# ---------------------------------------------------------------------------
# Scenario 1: "Show me the scripts for this workflow"
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_list_scripts_via_mcp(monkeypatch):
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "scripts/workflows/": FAKE_SCRIPTS,
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_list_for_workflow",
            {
                "workflow_id": "wpid_test",
            },
        )

    assert result.data["ok"] is True
    scripts = result.data["data"]["scripts"]
    assert len(scripts) == 1
    assert scripts[0]["script_id"] == "s_abc"
    assert scripts[0]["success_rate"] == 0.8
    assert scripts[0]["version"] == 2


@pytest.mark.parametrize(
    ("payload", "expected_scripts"),
    [
        ({"scripts": None}, None),
        ({"scripts": {"unexpected": "shape"}}, {"unexpected": "shape"}),
    ],
)
@pytest.mark.asyncio
async def test_list_scripts_handles_missing_script_list_via_mcp(monkeypatch, payload, expected_scripts):
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "scripts/workflows/": payload,
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_list_for_workflow",
            {
                "workflow_id": "wpid_test",
            },
        )

    assert result.data["ok"] is True
    assert result.data["data"]["scripts"] == expected_scripts
    assert result.data["data"]["count"] == 0


# ---------------------------------------------------------------------------
# Scenario 2: "Print the script that was made"
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_get_script_code_via_mcp(monkeypatch):
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "scripts/s_abc/versions/2": FAKE_CODE,
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_get_code",
            {
                "script_id": "s_abc",
                "version": 2,
            },
        )

    assert result.data["ok"] is True
    data = result.data["data"]
    assert "fill_form" in data["blocks"]
    # Semgrep false positive: this checks a script code path, not a user-supplied URL.
    assert "page.fill" in data["blocks"]["fill_form"]  # nosemgrep: incomplete-url-substring-sanitization
    assert "@skyvern.workflow" in data["main_script"]


@pytest.mark.asyncio
async def test_get_script_code_resolves_latest_via_mcp(monkeypatch):
    """When version is omitted, tool fetches metadata first to find latest."""
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "v1/scripts/s_abc/versions/2": FAKE_CODE,
                "v1/scripts/s_abc": {"script_id": "s_abc", "version": 2},
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_get_code",
            {
                "script_id": "s_abc",
            },
        )

    assert result.data["ok"] is True
    assert result.data["data"]["version"] == 2


# ---------------------------------------------------------------------------
# Scenario 3: "How did the script evolve?"
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_script_versions_via_mcp(monkeypatch):
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "versions": FAKE_VERSIONS,
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_versions",
            {
                "script_id": "s_abc",
            },
        )

    assert result.data["ok"] is True
    versions = result.data["data"]["versions"]
    assert len(versions) == 2
    assert versions[0]["version"] == 1
    assert versions[1]["version"] == 2


# ---------------------------------------------------------------------------
# Scenario 4: "Why did it fall back to AI?"
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_fallback_episodes_via_mcp(monkeypatch):
    monkeypatch.setattr(
        script_tools,
        "raw_http_get",
        _mock_raw_http(
            {
                "fallback-episodes": FAKE_EPISODES,
            }
        ),
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_fallback_episodes",
            {
                "workflow_id": "wpid_test",
            },
        )

    assert result.data["ok"] is True
    data = result.data["data"]
    assert data["total_count"] == 1
    ep = data["episodes"][0]
    assert ep["fallback_type"] == "selector_miss"
    assert "site redesigned" in ep["error_message"]
    assert ep["fallback_succeeded"] is True


@pytest.mark.asyncio
async def test_fallback_episodes_rejects_invalid_workflow_run_id_via_mcp(monkeypatch):
    raw_http_get = AsyncMock(return_value=FAKE_EPISODES)
    monkeypatch.setattr(script_tools, "raw_http_get", raw_http_get)

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_fallback_episodes",
            {
                "workflow_id": "wpid_test",
                "workflow_run_id": "bad_run_id",
            },
        )

    assert result.data["ok"] is False
    assert result.data["error"]["code"] == script_tools.ErrorCode.INVALID_INPUT
    raw_http_get.assert_not_awaited()


# ---------------------------------------------------------------------------
# Scenario 5: "Edit the script"
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_deploy_script_via_mcp(monkeypatch):
    deploy_resp = SimpleNamespace(
        script_id="s_abc",
        version=3,
        script_revision_id="srev_3",
        model_dump=lambda mode="python": {"script_id": "s_abc", "version": 3, "script_revision_id": "srev_3"},
    )
    fake_client = SimpleNamespace(deploy_script=AsyncMock(return_value=deploy_resp))
    monkeypatch.setattr(script_tools, "get_skyvern", lambda: fake_client)

    import base64

    files = json.dumps([{"path": "main.py", "content": base64.b64encode(b"# edited").decode(), "encoding": "base64"}])

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_deploy",
            {
                "script_id": "s_abc",
                "files": files,
            },
        )

    assert result.data["ok"] is True
    assert result.data["data"]["version"] == 3
    fake_client.deploy_script.assert_awaited_once()
    called_files = fake_client.deploy_script.await_args.kwargs["files"]
    assert len(called_files) == 1
    assert isinstance(called_files[0], ScriptFileCreate)
    assert called_files[0].path == "main.py"


# ---------------------------------------------------------------------------
# Scenario 6: Workflow create shows caching defaults
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_workflow_create_surfaces_caching_fields_via_mcp(monkeypatch):
    payload = {
        "workflow_permanent_id": "wpid_new",
        "workflow_id": "wf_1",
        "title": "Test",
        "version": 1,
        "status": "published",
        "description": None,
        "is_saved_task": False,
        "folder_id": None,
        "created_at": "2026-04-23T10:00:00+00:00",
        "modified_at": "2026-04-23T10:00:00+00:00",
        "code_version": 2,
        "adaptive_caching": True,
        "run_with": "code",
    }
    response = SimpleNamespace(status_code=200, json=lambda: payload, text="")
    request_mock = AsyncMock(return_value=response)
    fake_client = SimpleNamespace(_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request_mock)))
    monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)

    definition = json.dumps(
        {
            "title": "Test",
            "workflow_definition": {
                "parameters": [],
                "blocks": [
                    {
                        "block_type": "navigation",
                        "label": "s1",
                        "url": "https://example.com",
                        "navigation_goal": "Click",
                    }
                ],
            },
        }
    )

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_workflow_create",
            {
                "definition": definition,
                "format": "json",
            },
        )

    assert result.data["ok"] is True
    data = result.data["data"]
    assert data["code_version"] == 2
    assert data["run_with"] == "code"
    assert data["adaptive_caching"] is True


# ---------------------------------------------------------------------------
# Scenario 7: Run status shows script_run + ai_fallback
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_workflow_status_shows_script_run_via_mcp(monkeypatch):
    payload = {
        "workflow_run_id": "wr_test",
        "status": "completed",
        "run_with": "code",
        "workflow_title": "Test",
        "script_run": {"ai_fallback_triggered": True, "script_id": "s_abc"},
        "outputs": {"result": "ok"},
    }
    fake_resp = SimpleNamespace(status_code=200, json=lambda: payload, text="")
    fake_client = SimpleNamespace(
        _client_wrapper=SimpleNamespace(
            httpx_client=SimpleNamespace(request=AsyncMock(return_value=fake_resp)),
        ),
    )
    monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)

    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_workflow_status",
            {
                "run_id": "wr_test",
                "verbosity": "full",
            },
        )

    assert result.data["ok"] is True
    data = result.data["data"]
    assert data["run_with"] == "code"
    assert data["script_run"]["ai_fallback_triggered"] is True


# ---------------------------------------------------------------------------
# Validation: bad inputs get clear errors
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_bad_workflow_id_returns_error_via_mcp():
    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_list_for_workflow",
            {
                "workflow_id": "not_a_wpid",
            },
        )

    assert result.data["ok"] is False
    assert "wpid_" in str(result.data["error"])


@pytest.mark.asyncio
async def test_bad_script_id_returns_error_via_mcp():
    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_get_code",
            {
                "script_id": "wrong_prefix",
            },
        )

    assert result.data["ok"] is False
    assert "s_" in str(result.data["error"])


@pytest.mark.asyncio
async def test_bad_deploy_json_returns_error_via_mcp():
    async with Client(mcp) as client:
        result = await client.call_tool(
            "skyvern_script_deploy",
            {
                "script_id": "s_abc",
                "files": "not json",
            },
        )

    assert result.data["ok"] is False
    assert "JSON" in result.data["error"]["message"]