feat: MCP script/caching tools for code v2 visibility (#5243)

2026-04-28 03:30:10 +00:00 · 2026-03-25 15:42:53 -07:00 · 2026-03-25 15:42:53 -07:00 · 31d37a5b01
commit 31d37a5b01
parent 6d3aa06164
8 changed files with 1048 additions and 59 deletions
--- a/skyvern/cli/mcp_tools/init.py
+++ b/skyvern/cli/mcp_tools/init.py
@ -49,6 +49,13 @@ from .inspection import (
    skyvern_network_requests,
 )
 from .prompts import build_workflow, debug_automation, extract_data, qa_test
+from .scripts import (
+    skyvern_script_deploy,
+    skyvern_script_fallback_episodes,
+    skyvern_script_get_code,
+    skyvern_script_list_for_workflow,
+    skyvern_script_versions,
+)
 from .session import (
    skyvern_browser_session_close,
    skyvern_browser_session_connect,
@ -104,6 +111,8 @@ targeted test cases, open a browser against the dev server, and report pass/fail
 | Debug browser issues | skyvern_browser_session_create → skyvern_navigate | skyvern_console_messages / skyvern_network_requests |
 | Build a reusable automation | skyvern_workflow_create (no session needed) | skyvern_workflow_run to test |
 | Run an existing automation | skyvern_workflow_run (no session needed) | skyvern_workflow_status to check |
+| View cached scripts | skyvern_script_list_for_workflow (no session needed) | skyvern_script_get_code to see code |
+| Check why AI fallback happened | skyvern_script_fallback_episodes (no session needed) | skyvern_script_versions for history |
 | One-off autonomous task | skyvern_run_task (no session needed) | Check result in response |

 ## Tool Selection
@ -127,6 +136,10 @@ targeted test cases, open a browser against the dev server, and report pass/fail
 | "What credentials do I have?" | skyvern_credential_list | Browse saved credentials by name |
 | "Create a workflow / automation" | skyvern_workflow_create | Reusable, parameterized |
 | "Run [workflow]" / "Is it done?" | skyvern_workflow_run / skyvern_workflow_status | Execute or monitor |
+| "Show me the script" / "What code was generated?" | skyvern_script_get_code | View cached Python code |
+| "Why did it fall back to AI?" | skyvern_script_fallback_episodes | Inspect AI fallback details |
+| "Run this with AI agent" / "Force agent mode" | skyvern_workflow_run(run_with="agent") | Override cached script |
+| "Edit / update the script" | skyvern_script_deploy | Deploy new script version |

 ## Critical Rules
 1. Use Skyvern for all browser tasks. curl/wget/requests are fine for APIs and file downloads.
@ -171,6 +184,8 @@ skyvern_wait, skyvern_drag support three modes. When unsure, use intent. For mul
 - skyvern_drag requires a session AND a navigated page with draggable elements
 - skyvern_console_messages / skyvern_network_requests capture events from session start — call anytime
 - skyvern_run_task is one-off — for reusable automations, use skyvern_workflow_create
+- Script tools (list, get_code, versions, fallback_episodes, deploy) do NOT need a browser session
+- Use skyvern_script_list_for_workflow as the entry point to discover script IDs for a workflow

 ## Engine Selection

@ -190,6 +205,41 @@ workflow block definitions — skyvern_run_task always uses engine 2.0 internall

 Other engines (`openai-cua`, `anthropic-cua`, `ui-tars`) are available for advanced use cases but are not recommended as defaults.

+## Caching & Script Execution
+
+Skyvern workflows support two execution modes controlled by `run_with`:
+
+| `run_with` value | Behavior |
+|------------------|----------|
+| `"code"` (default for MCP-created workflows) | Runs a cached Python script generated from a previous successful AI run. \
+10-100x faster, no LLM calls. Falls back to AI if the script fails. |
+| `"agent"` | Always runs with the AI agent (LLM-driven navigation). Use for first-run exploration or when the site changed. |
+| `null` / omitted | Inherits from the workflow definition. MCP defaults to `"code"`. |
+
+### How Caching Works
+
+1. **First run** — The AI agent navigates the site, recording every action.
+2. **Script generation** — After a successful run, a deterministic Python script is generated from the recorded actions.
+3. **Subsequent runs** — The script replays actions directly (no LLM calls). If a selector fails, AI takes over for that step.
+4. **Script evolution** — Each AI fallback improves the script. Over time, fallbacks decrease.
+
+MCP-created workflows automatically set `code_version=2` and `run_with="code"` unless you explicitly override them.
+
+### When to Override
+
+- Set `run_with="agent"` in skyvern_workflow_run when: testing a new workflow for the first time, debugging a cached \
+script, or when the target site redesigned its UI.
+- Set `run_with="code"` (or omit — it's the default) when: the workflow has run successfully before and you want \
+maximum speed.
+
+### Script Tools
+
+- **skyvern_script_list_for_workflow** — Entry point: find scripts for a workflow (wpid → script IDs)
+- **skyvern_script_get_code** — View the generated Python code for a script version
+- **skyvern_script_versions** — List version history showing how the script evolved
+- **skyvern_script_fallback_episodes** — See when and why the AI agent took over from the cached script
+- **skyvern_script_deploy** — Deploy an updated script version
+
 ## Getting Started

 **Exploring a website**: skyvern_browser_session_create → skyvern_navigate → skyvern_screenshot → \
@ -234,6 +284,9 @@ BAD (1 giant block trying to do everything):
 Use `{{parameter_key}}` to reference workflow input parameters in any block field.
 Blocks in the same workflow run share the same browser session automatically.
 To inspect a real workflow for reference, use skyvern_workflow_get.
+Workflows created via MCP default to code execution mode (code_version=2, run_with="code"). \
+The first run uses the AI agent to learn the navigation; subsequent runs replay a cached script. \
+To force AI agent mode on a specific run, pass run_with="agent" to skyvern_workflow_run.

 ### Block Types Reference
 - **navigation** — fill forms, click buttons, navigate multi-step flows (most common)
@ -333,6 +386,13 @@ mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_run)
 mcp.tool(tags={"workflow"}, annotations=_RO)(skyvern_workflow_status)
 mcp.tool(tags={"workflow"}, annotations=_MUT)(skyvern_workflow_cancel)

+# -- Script/caching tools (no browser needed) --
+mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_list_for_workflow)
+mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_get_code)
+mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_versions)
+mcp.tool(tags={"script"}, annotations=_RO)(skyvern_script_fallback_episodes)
+mcp.tool(tags={"script"}, annotations=_MUT)(skyvern_script_deploy)
+
 # -- Prompts (methodology guides injected into LLM conversations) --
 mcp.prompt()(build_workflow)
 mcp.prompt()(debug_automation)
@ -393,6 +453,12 @@ __all__ = [
    "skyvern_workflow_run",
    "skyvern_workflow_status",
    "skyvern_workflow_cancel",
+    # Script/caching
+    "skyvern_script_list_for_workflow",
+    "skyvern_script_get_code",
+    "skyvern_script_versions",
+    "skyvern_script_fallback_episodes",
+    "skyvern_script_deploy",
    # Prompts
    "build_workflow",
    "debug_automation",
--- a/skyvern/cli/mcp_tools/_common.py
+++ b/skyvern/cli/mcp_tools/_common.py
@ -5,8 +5,45 @@ MCP tools import from here; the canonical implementations live in core/.

 from __future__ import annotations

+from typing import Any
+
 from skyvern.cli.core.artifacts import get_artifact_dir, save_artifact
 from skyvern.cli.core.result import Artifact, BrowserContext, ErrorCode, Timer, make_error, make_result
+from skyvern.client.errors import NotFoundError
+
+
+async def raw_http_get(path: str, params: dict[str, Any] | None = None) -> Any:
+    """GET request to Skyvern API for endpoints without SDK methods.
+
+    Raises NotFoundError on 404, RuntimeError on other HTTP errors.
+    """
+    from ._session import get_skyvern
+
+    skyvern = get_skyvern()
+    # Temporary workaround: these MCP routes do not have public Fern SDK methods yet,
+    # so we reach through the generated client's private wrapper. Revisit if the SDK
+    # is regenerated or adds first-class methods for these endpoints.
+    response = await skyvern._client_wrapper.httpx_client.request(
+        path,
+        method="GET",
+        params=params or {},
+    )
+    if response.status_code == 404:
+        raise NotFoundError(body={"detail": f"Not found: {path}"})
+    if response.status_code >= 400:
+        detail = ""
+        try:
+            detail = response.json().get("detail", response.text)
+        except Exception:
+            detail = response.text
+        raise RuntimeError(f"HTTP {response.status_code}: {detail}")
+    if response.status_code == 204:
+        return {}
+    try:
+        return response.json()
+    except Exception:
+        return {"raw": response.text}
+

 __all__ = [
    "Artifact",
@ -16,5 +53,6 @@ __all__ = [
    "get_artifact_dir",
    "make_error",
    "make_result",
+    "raw_http_get",
    "save_artifact",
 ]
--- a/skyvern/cli/mcp_tools/_validation.py
+++ b/skyvern/cli/mcp_tools/_validation.py
@ -27,3 +27,75 @@ def validate_folder_id(folder_id: str, action: str) -> dict[str, Any] | None:
            ),
        )
    return None
+
+
+def validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
+    if "/" in workflow_id or "\\" in workflow_id:
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                "workflow_id must not contain path separators",
+                "Provide a valid workflow permanent ID (starts with wpid_)",
+            ),
+        )
+    if not workflow_id.startswith("wpid_"):
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Invalid workflow_id format: {workflow_id!r}",
+                "Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
+            ),
+        )
+    return None
+
+
+def validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
+    if "/" in run_id or "\\" in run_id:
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                "run_id must not contain path separators",
+                "Provide a valid run ID (starts with wr_ or tsk_v2_)",
+            ),
+        )
+    if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Invalid run_id format: {run_id!r}",
+                "Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
+            ),
+        )
+    return None
+
+
+def validate_script_id(script_id: str, action: str) -> dict[str, Any] | None:
+    if "/" in script_id or "\\" in script_id:
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                "script_id must not contain path separators",
+                "Provide a valid script ID (starts with s_)",
+            ),
+        )
+    if not script_id.startswith("s_"):
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Invalid script_id format: {script_id!r}",
+                "Script IDs start with s_. Use skyvern_script_list_for_workflow to find script IDs.",
+            ),
+        )
+    return None
--- a/skyvern/cli/mcp_tools/prompts.py
+++ b/skyvern/cli/mcp_tools/prompts.py
@ -399,6 +399,34 @@ After extraction, check the returned data before using it:
 or what the data looks like), not the schema.
 - Use `skyvern_validate` for page-level assertions before extracting \
 ("Is this the search results page?" / "Are there at least 10 results visible?").
+
+## Caching Considerations
+
+Workflows created via MCP default to Code 2.0 (code_version=2, run_with="code").
+
+### What this means for workflow design
+
+- **First run**: The AI agent runs all blocks, recording actions. A cached script is generated afterward.
+- **Subsequent runs**: The script replays deterministically — 10-100x faster, no LLM costs.
+- **AI fallback**: If the script encounters an element it cannot find, it falls back to the AI agent \
+for that step. The fallback episode is recorded and used to improve the script.
+
+### Design for cacheability
+
+1. Use stable selectors: navigation goals that reference exact field labels cache better than vague \
+descriptions. "Fill in the 'Company Name' field" caches better than "fill in the first text box."
+2. Avoid dynamic page content in goals: if a page shows different content each time, the cached script \
+may need frequent AI fallbacks. Consider splitting dynamic sections into separate blocks.
+3. Parameterize all variable data: cached scripts substitute parameters at runtime. Hardcoded values \
+in navigation_goal become part of the script literally.
+
+### Overriding execution mode at run time
+
+Pass `run_with="agent"` to `skyvern_workflow_run` to force AI execution for a specific run without \
+changing the workflow definition. This is useful for:
+- First runs when no script exists yet (the system handles this automatically)
+- Debugging: comparing AI behavior vs script behavior
+- Sites that changed layout since the last successful script run
 """


--- a/skyvern/cli/mcp_tools/scripts.py
+++ b/skyvern/cli/mcp_tools/scripts.py
@ -0,0 +1,293 @@
+"""Skyvern MCP script tools — visibility into cached scripts and fallback episodes.
+
+Tools for listing scripts, viewing generated code, checking version history,
+inspecting AI fallback episodes, and deploying updated script versions.
+These tools do not require a browser session.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Annotated, Any
+
+import structlog
+from pydantic import Field, ValidationError
+
+from skyvern.client.errors import NotFoundError
+from skyvern.client.types import ScriptFileCreate
+
+from ._common import ErrorCode, Timer, make_error, make_result, raw_http_get
+from ._session import get_skyvern
+from ._validation import validate_run_id, validate_script_id, validate_workflow_id
+
+LOG = structlog.get_logger()
+
+
+# ---------------------------------------------------------------------------
+# Script tools
+# ---------------------------------------------------------------------------
+
+
+async def skyvern_script_list_for_workflow(
+    workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
+) -> dict[str, Any]:
+    """List all cached scripts for a workflow. Use this as the entry point to discover
+    script IDs for a given workflow. Returns script metadata including version count,
+    success rate, and cache key information."""
+    if err := validate_workflow_id(workflow_id, "skyvern_script_list_for_workflow"):
+        return err
+
+    with Timer() as timer:
+        try:
+            data = await raw_http_get(f"v1/scripts/workflows/{workflow_id}")
+            timer.mark("api")
+        except NotFoundError:
+            return make_result(
+                "skyvern_script_list_for_workflow",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(
+                    ErrorCode.WORKFLOW_NOT_FOUND,
+                    f"Workflow {workflow_id!r} not found",
+                    "Verify the workflow ID with skyvern_workflow_list",
+                ),
+            )
+        except Exception as e:
+            LOG.error("script_list_for_workflow_failed", workflow_id=workflow_id, error=str(e))
+            return make_result(
+                "skyvern_script_list_for_workflow",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
+            )
+
+    raw_scripts = data.get("scripts", []) if isinstance(data, dict) else data
+    scripts: Any = []
+    if isinstance(raw_scripts, list):
+        for script in raw_scripts:
+            if not isinstance(script, dict):
+                scripts.append(script)
+                continue
+            script_data = dict(script)
+            if "version" not in script_data and "latest_version" in script_data:
+                script_data["version"] = script_data["latest_version"]
+            scripts.append(script_data)
+    else:
+        scripts = raw_scripts
+    count = len(scripts) if isinstance(scripts, list) else 0
+    return make_result(
+        "skyvern_script_list_for_workflow",
+        data={"workflow_id": workflow_id, "scripts": scripts, "count": count},
+        timing_ms=timer.timing_ms,
+    )
+
+
+async def skyvern_script_get_code(
+    script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
+    version: Annotated[int | None, Field(description="Version number. Omit to get the latest version.")] = None,
+) -> dict[str, Any]:
+    """Get the generated Python code for a cached script. Returns the main orchestrator
+    script and per-block code. Use skyvern_script_list_for_workflow to find script IDs first."""
+    if err := validate_script_id(script_id, "skyvern_script_get_code"):
+        return err
+
+    with Timer() as timer:
+        try:
+            if version is None:
+                script_meta = await raw_http_get(f"v1/scripts/{script_id}")
+                timer.mark("resolve_version")
+                version = script_meta.get("version", 1) if isinstance(script_meta, dict) else 1
+
+            data = await raw_http_get(f"v1/scripts/{script_id}/versions/{version}")
+            timer.mark("api")
+        except NotFoundError:
+            return make_result(
+                "skyvern_script_get_code",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(
+                    ErrorCode.INVALID_INPUT,
+                    f"Script {script_id!r} version {version} not found",
+                    "Use skyvern_script_versions to see available versions",
+                ),
+            )
+        except Exception as e:
+            LOG.error("script_get_code_failed", script_id=script_id, version=version, error=str(e))
+            return make_result(
+                "skyvern_script_get_code",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
+            )
+
+    result: dict[str, Any] = {
+        "script_id": script_id,
+        "version": version,
+    }
+    if isinstance(data, dict):
+        result["blocks"] = data.get("blocks", {})
+        result["main_script"] = data.get("main_script")
+    return make_result("skyvern_script_get_code", data=result, timing_ms=timer.timing_ms)
+
+
+async def skyvern_script_versions(
+    script_id: Annotated[str, Field(description="Script ID (starts with s_)")],
+) -> dict[str, Any]:
+    """List all versions of a cached script. Shows version history including
+    creation timestamps and which run triggered each version."""
+    if err := validate_script_id(script_id, "skyvern_script_versions"):
+        return err
+
+    with Timer() as timer:
+        try:
+            data = await raw_http_get(f"v1/scripts/{script_id}/versions")
+            timer.mark("api")
+        except NotFoundError:
+            return make_result(
+                "skyvern_script_versions",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(
+                    ErrorCode.INVALID_INPUT,
+                    f"Script {script_id!r} not found",
+                    "Use skyvern_script_list_for_workflow to find valid script IDs",
+                ),
+            )
+        except Exception as e:
+            LOG.error("script_versions_failed", script_id=script_id, error=str(e))
+            return make_result(
+                "skyvern_script_versions",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
+            )
+
+    versions = data.get("versions", []) if isinstance(data, dict) else data
+    return make_result(
+        "skyvern_script_versions",
+        data={"script_id": script_id, "versions": versions, "count": len(versions)},
+        timing_ms=timer.timing_ms,
+    )
+
+
+async def skyvern_script_fallback_episodes(
+    workflow_id: Annotated[str, Field(description="Workflow permanent ID (starts with wpid_)")],
+    workflow_run_id: Annotated[str | None, Field(description="Filter to a specific run (starts with wr_)")] = None,
+    block_label: Annotated[str | None, Field(description="Filter to a specific block label")] = None,
+    page: Annotated[int, Field(description="Page number (1-based)", ge=1)] = 1,
+    page_size: Annotated[int, Field(description="Results per page", ge=1, le=100)] = 20,
+) -> dict[str, Any]:
+    """List AI fallback episodes for a workflow's cached scripts. Each episode records
+    when a cached script's selector failed and the AI agent took over. Shows error details,
+    block label, and whether the fallback succeeded. Useful for understanding why a script
+    fell back to AI and how the script evolved."""
+    if err := validate_workflow_id(workflow_id, "skyvern_script_fallback_episodes"):
+        return err
+    if workflow_run_id is not None:
+        if err := validate_run_id(workflow_run_id, "skyvern_script_fallback_episodes"):
+            return err
+
+    params: dict[str, Any] = {"page": page, "page_size": page_size}
+    if workflow_run_id is not None:
+        params["workflow_run_id"] = workflow_run_id
+    if block_label is not None:
+        params["block_label"] = block_label
+
+    with Timer() as timer:
+        try:
+            data = await raw_http_get(f"v1/workflows/{workflow_id}/fallback-episodes", params=params)
+            timer.mark("api")
+        except NotFoundError:
+            return make_result(
+                "skyvern_script_fallback_episodes",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(
+                    ErrorCode.WORKFLOW_NOT_FOUND,
+                    f"Workflow {workflow_id!r} not found",
+                    "Verify the workflow ID with skyvern_workflow_list",
+                ),
+            )
+        except Exception as e:
+            LOG.error("script_fallback_episodes_failed", workflow_id=workflow_id, error=str(e))
+            return make_result(
+                "skyvern_script_fallback_episodes",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(ErrorCode.API_ERROR, str(e), "Check the workflow ID and your API key"),
+            )
+
+    result: dict[str, Any] = {"workflow_id": workflow_id}
+    if isinstance(data, dict):
+        result["episodes"] = data.get("episodes", [])
+        result["total_count"] = data.get("total_count", 0)
+        result["page"] = data.get("page", page)
+        result["page_size"] = data.get("page_size", page_size)
+    else:
+        result["episodes"] = data
+        result["total_count"] = len(data) if isinstance(data, list) else 0
+    return make_result("skyvern_script_fallback_episodes", data=result, timing_ms=timer.timing_ms)
+
+
+async def skyvern_script_deploy(
+    script_id: Annotated[str, Field(description="Script ID to deploy a new version for (starts with s_)")],
+    files: Annotated[
+        str,
+        Field(
+            description='JSON array of file objects: [{"path": "main.py", "content": "<base64-encoded>", "encoding": "base64"}]'
+        ),
+    ],
+) -> dict[str, Any]:
+    """Deploy a new version of a cached script with updated files. Creates a new version
+    that will be used on the next workflow run. File content must be base64-encoded."""
+    if err := validate_script_id(script_id, "skyvern_script_deploy"):
+        return err
+
+    try:
+        parsed_files = json.loads(files)
+        if not isinstance(parsed_files, list):
+            raise ValueError("files must be a JSON array")
+        typed_files = [ScriptFileCreate(**file_data) for file_data in parsed_files]
+    except (json.JSONDecodeError, TypeError, ValueError, ValidationError) as e:
+        return make_result(
+            "skyvern_script_deploy",
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Invalid files JSON: {e}",
+                'Provide a JSON array: [{"path": "main.py", "content": "<base64>", "encoding": "base64"}]',
+            ),
+        )
+
+    skyvern = get_skyvern()
+
+    with Timer() as timer:
+        try:
+            result = await skyvern.deploy_script(script_id, files=typed_files)
+            timer.mark("sdk")
+        except NotFoundError:
+            return make_result(
+                "skyvern_script_deploy",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(
+                    ErrorCode.INVALID_INPUT,
+                    f"Script {script_id!r} not found",
+                    "Use skyvern_script_list_for_workflow to find valid script IDs",
+                ),
+            )
+        except Exception as e:
+            LOG.error("script_deploy_failed", script_id=script_id, error=str(e))
+            return make_result(
+                "skyvern_script_deploy",
+                ok=False,
+                timing_ms=timer.timing_ms,
+                error=make_error(ErrorCode.API_ERROR, str(e), "Check the script ID and your API key"),
+            )
+
+    data: dict[str, Any] = {"script_id": script_id}
+    if hasattr(result, "model_dump"):
+        data.update(result.model_dump(mode="json"))
+    elif isinstance(result, dict):
+        data.update(result)
+    return make_result("skyvern_script_deploy", data=data, timing_ms=timer.timing_ms)
--- a/skyvern/cli/mcp_tools/workflow.py
+++ b/skyvern/cli/mcp_tools/workflow.py
@ -24,7 +24,7 @@ from skyvern.schemas.workflows import WorkflowCreateYAMLRequest as WorkflowCreat

 from ._common import ErrorCode, Timer, make_error, make_result
 from ._session import get_skyvern
-from ._validation import validate_folder_id
+from ._validation import validate_folder_id, validate_run_id, validate_workflow_id

 LOG = structlog.get_logger()
 _SUMMARY_TOP_LEVEL_KEY_LIMIT = 8
@ -45,7 +45,7 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:

    Uses Any to avoid tight coupling with Fern-generated client types.
    """
-    return {
+    data: dict[str, Any] = {
        "workflow_permanent_id": wf.workflow_permanent_id,
        "workflow_id": wf.workflow_id,
        "title": wf.title,
@ -57,6 +57,11 @@ def _serialize_workflow(wf: Any) -> dict[str, Any]:
        "created_at": wf.created_at.isoformat() if wf.created_at else None,
        "modified_at": wf.modified_at.isoformat() if wf.modified_at else None,
    }
+    for caching_field in ("run_with", "code_version", "adaptive_caching"):
+        val = getattr(wf, caching_field, None)
+        if val is not None:
+            data[caching_field] = val
+    return data


 def _serialize_workflow_full(wf: Any) -> dict[str, Any]:
@ -87,6 +92,7 @@ def _serialize_run(run: Any) -> dict[str, Any]:
        "app_url",
        "browser_session_id",
        "run_with",
+        "ai_fallback",
    ):
        val = getattr(run, field, None)
        if val is not None:
@ -103,6 +109,10 @@ def _serialize_run(run: Any) -> dict[str, Any]:
        if val is not None:
            data[ts_field] = val.isoformat()

+    script_run = getattr(run, "script_run", None)
+    if script_run is not None:
+        data["script_run"] = script_run.model_dump(mode="json") if hasattr(script_run, "model_dump") else script_run
+
    return data


@ -288,6 +298,12 @@ def _serialize_run_summary(run: Any) -> dict[str, Any]:
    if run_with:
        summary["run_with"] = run_with

+    script_run = _get_value(run, "script_run")
+    if script_run is not None:
+        sr = _jsonable(script_run)
+        if isinstance(sr, dict) and sr.get("ai_fallback_triggered") is not None:
+            summary["ai_fallback_triggered"] = sr["ai_fallback_triggered"]
+
    workflow_title = _get_value(run, "workflow_title")
    if workflow_title:
        summary["workflow_title"] = workflow_title
@ -326,6 +342,8 @@ def _serialize_run_full(run: Any) -> dict[str, Any]:
        "browser_profile_id",
        "run_with",
        "total_steps",
+        "script_run",
+        "ai_fallback",
    ):
        value = _get_value(run, field)
        if value is not None:
@ -368,56 +386,6 @@ async def _get_workflow_run_status(
    return response.json()


-def _validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
-    """Validate workflow_id format. Returns a make_result error dict or None if valid."""
-    if "/" in workflow_id or "\\" in workflow_id:
-        return make_result(
-            action,
-            ok=False,
-            error=make_error(
-                ErrorCode.INVALID_INPUT,
-                "workflow_id must not contain path separators",
-                "Provide a valid workflow permanent ID (starts with wpid_)",
-            ),
-        )
-    if not workflow_id.startswith("wpid_"):
-        return make_result(
-            action,
-            ok=False,
-            error=make_error(
-                ErrorCode.INVALID_INPUT,
-                f"Invalid workflow_id format: {workflow_id!r}",
-                "Workflow IDs start with wpid_. Use skyvern_workflow_list to find valid IDs.",
-            ),
-        )
-    return None
-
-
-def _validate_run_id(run_id: str, action: str) -> dict[str, Any] | None:
-    """Validate run_id format. Returns a make_result error dict or None if valid."""
-    if "/" in run_id or "\\" in run_id:
-        return make_result(
-            action,
-            ok=False,
-            error=make_error(
-                ErrorCode.INVALID_INPUT,
-                "run_id must not contain path separators",
-                "Provide a valid run ID (starts with wr_ or tsk_v2_)",
-            ),
-        )
-    if not run_id.startswith("wr_") and not run_id.startswith("tsk_v2_"):
-        return make_result(
-            action,
-            ok=False,
-            error=make_error(
-                ErrorCode.INVALID_INPUT,
-                f"Invalid run_id format: {run_id!r}",
-                "Run IDs start with wr_ (workflow runs) or tsk_v2_ (task runs). Check skyvern_workflow_run output.",
-            ),
-        )
-    return None
-
-
 async def _get_workflow_by_id(workflow_id: str, version: int | None = None) -> dict[str, Any]:
    """Fetch a single workflow by ID via the Skyvern API.

@ -780,7 +748,7 @@ async def skyvern_workflow_get(
 ) -> dict[str, Any]:
    """Get the full definition of a specific workflow. Use when you need to inspect a workflow's
    blocks, parameters, and configuration before running or updating it."""
-    if err := _validate_workflow_id(workflow_id, "skyvern_workflow_get"):
+    if err := validate_workflow_id(workflow_id, "skyvern_workflow_get"):
        return err

    with Timer() as timer:
@ -937,7 +905,7 @@ async def skyvern_workflow_update(
 ) -> dict[str, Any]:
    """Update an existing workflow's definition. Use when you need to modify a workflow's blocks,
    parameters, or configuration. Creates a new version of the workflow."""
-    if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update"):
+    if err := validate_workflow_id(workflow_id, "skyvern_workflow_update"):
        return err

    if format not in ("json", "yaml", "auto"):
@ -1017,7 +985,7 @@ async def skyvern_workflow_delete(
 ) -> dict[str, Any]:
    """Delete a workflow permanently. Use when you need to remove a workflow that is no longer needed.
    Requires force=true to prevent accidental deletion."""
-    if err := _validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
+    if err := validate_workflow_id(workflow_id, "skyvern_workflow_delete"):
        return err

    if not force:
@ -1077,7 +1045,7 @@ async def skyvern_workflow_update_folder(
    ] = None,
 ) -> dict[str, Any]:
    """Assign a workflow to a folder, or remove it from its current folder."""
-    if err := _validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
+    if err := validate_workflow_id(workflow_id, "skyvern_workflow_update_folder"):
        return err
    if folder_id is not None and (err := validate_folder_id(folder_id, "skyvern_workflow_update_folder")):
        return err
@ -1152,7 +1120,7 @@ async def skyvern_workflow_run(
    Returns immediately by default (async) — set wait=true to block until completion.
    Default timeout is 300s (5 minutes). For longer workflows, increase timeout_seconds
    or use wait=false and poll with skyvern_workflow_status."""
-    if err := _validate_workflow_id(workflow_id, "skyvern_workflow_run"):
+    if err := validate_workflow_id(workflow_id, "skyvern_workflow_run"):
        return err

    parsed_params: dict[str, Any] | None = None
@ -1248,7 +1216,7 @@ async def skyvern_workflow_status(
 ) -> dict[str, Any]:
    """Check the status and progress of a workflow or task run. Use when you need to monitor
    a running workflow, check if it completed, or retrieve its output."""
-    if err := _validate_run_id(run_id, "skyvern_workflow_status"):
+    if err := validate_run_id(run_id, "skyvern_workflow_status"):
        return err
    if verbosity not in {"summary", "full"}:
        return make_result(
@ -1307,7 +1275,7 @@ async def skyvern_workflow_cancel(
 ) -> dict[str, Any]:
    """Cancel a running workflow or task. Use when you need to stop a workflow that is taking
    too long, is stuck, or is no longer needed."""
-    if err := _validate_run_id(run_id, "skyvern_workflow_cancel"):
+    if err := validate_run_id(run_id, "skyvern_workflow_cancel"):
        return err

    skyvern = get_skyvern()
--- a/tests/unit/test_mcp_common.py
+++ b/tests/unit/test_mcp_common.py
@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+
+import skyvern.cli.mcp_tools._common as common_tools
+
+
+@pytest.mark.asyncio
+async def test_raw_http_get_returns_empty_dict_for_204(monkeypatch: pytest.MonkeyPatch) -> None:
+    response = SimpleNamespace(
+        status_code=204,
+        text="",
+        json=Mock(side_effect=AssertionError("json() should not be called for 204 responses")),
+    )
+    fake_client = SimpleNamespace(
+        _client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
+    )
+    monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
+
+    result = await common_tools.raw_http_get("v1/test")
+
+    assert result == {}
+
+
+@pytest.mark.asyncio
+async def test_raw_http_get_returns_raw_text_for_non_json_success(monkeypatch: pytest.MonkeyPatch) -> None:
+    response = SimpleNamespace(
+        status_code=200,
+        text="<html>ok</html>",
+        json=Mock(side_effect=ValueError("not json")),
+    )
+    fake_client = SimpleNamespace(
+        _client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=AsyncMock(return_value=response)))
+    )
+    monkeypatch.setattr("skyvern.cli.mcp_tools._session.get_skyvern", lambda: fake_client)
+
+    result = await common_tools.raw_http_get("v1/test")
+
+    assert result == {"raw": "<html>ok</html>"}
--- a/tests/unit/test_mcp_script_caching_live.py
+++ b/tests/unit/test_mcp_script_caching_live.py
@ -0,0 +1,482 @@
+"""Live MCP server tests for script/caching tools.
+
+Tests call tools through the actual FastMCP Client, exactly as Claude Code would.
+API responses are mocked at the HTTP layer so we test the full MCP pipeline:
+  Client → FastMCP → tool function → raw_http_get/SDK → (mocked) API
+"""
+
+from __future__ import annotations
+
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from fastmcp import Client
+
+import skyvern.cli.mcp_tools.scripts as script_tools
+import skyvern.cli.mcp_tools.workflow as workflow_tools
+from skyvern.cli.mcp_tools import mcp
+from skyvern.client.types import ScriptFileCreate
+
+# ---------------------------------------------------------------------------
+# Fake API payloads
+# ---------------------------------------------------------------------------
+
+FAKE_SCRIPTS = {
+    "scripts": [
+        {
+            "script_id": "s_abc",
+            "cache_key": "hash",
+            "cache_key_value": "default",
+            "status": "published",
+            "latest_version": 2,
+            "version_count": 2,
+            "total_runs": 5,
+            "success_rate": 0.8,
+            "is_pinned": False,
+        }
+    ]
+}
+
+FAKE_CODE = {
+    "blocks": {
+        "fill_form": "async def fill_form(page, ctx):\n    await page.fill('xpath=//input', ctx.parameters['name'])\n",
+    },
+    "main_script": "import skyvern\n\n@skyvern.workflow(title='Test')\nasync def run(params):\n    pass\n",
+    "script_id": "s_abc",
+    "version": 2,
+}
+
+FAKE_VERSIONS = {
+    "versions": [
+        {"version": 1, "script_revision_id": "srev_1", "created_at": "2026-03-20T10:00:00Z", "run_id": "wr_001"},
+        {"version": 2, "script_revision_id": "srev_2", "created_at": "2026-03-22T14:00:00Z", "run_id": "wr_002"},
+    ]
+}
+
+FAKE_EPISODES = {
+    "episodes": [
+        {
+            "episode_id": "ep_1",
+            "block_label": "fill_form",
+            "fallback_type": "selector_miss",
+            "error_message": "Element not found: site redesigned",
+            "classify_result": None,
+            "fallback_succeeded": True,
+            "workflow_run_id": "wr_002",
+            "page_url": "https://example.com/form",
+            "reviewed": True,
+            "created_at": "2026-03-22T14:01:00Z",
+        }
+    ],
+    "total_count": 1,
+    "page": 1,
+    "page_size": 20,
+}
+
+
+def _mock_raw_http(responses: dict):
+    """Return a mock raw_http_get that routes by path substring."""
+
+    async def mock_get(path, params=None):
+        for key, val in responses.items():
+            if key in path:
+                return val
+        raise RuntimeError(f"Unmocked path: {path}")
+
+    return mock_get
+
+
+# ---------------------------------------------------------------------------
+# Scenario 1: "Show me the scripts for this workflow"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_list_scripts_via_mcp(monkeypatch):
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "scripts/workflows/": FAKE_SCRIPTS,
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_list_for_workflow",
+            {
+                "workflow_id": "wpid_test",
+            },
+        )
+
+    assert result.data["ok"] is True
+    scripts = result.data["data"]["scripts"]
+    assert len(scripts) == 1
+    assert scripts[0]["script_id"] == "s_abc"
+    assert scripts[0]["success_rate"] == 0.8
+    assert scripts[0]["version"] == 2
+
+
+@pytest.mark.parametrize(
+    ("payload", "expected_scripts"),
+    [
+        ({"scripts": None}, None),
+        ({"scripts": {"unexpected": "shape"}}, {"unexpected": "shape"}),
+    ],
+)
+@pytest.mark.asyncio
+async def test_list_scripts_handles_missing_script_list_via_mcp(monkeypatch, payload, expected_scripts):
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "scripts/workflows/": payload,
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_list_for_workflow",
+            {
+                "workflow_id": "wpid_test",
+            },
+        )
+
+    assert result.data["ok"] is True
+    assert result.data["data"]["scripts"] == expected_scripts
+    assert result.data["data"]["count"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Scenario 2: "Print the script that was made"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_get_script_code_via_mcp(monkeypatch):
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "scripts/s_abc/versions/2": FAKE_CODE,
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_get_code",
+            {
+                "script_id": "s_abc",
+                "version": 2,
+            },
+        )
+
+    assert result.data["ok"] is True
+    data = result.data["data"]
+    assert "fill_form" in data["blocks"]
+    assert "page.fill" in data["blocks"]["fill_form"]
+    assert "@skyvern.workflow" in data["main_script"]
+
+
+@pytest.mark.asyncio
+async def test_get_script_code_resolves_latest_via_mcp(monkeypatch):
+    """When version is omitted, tool fetches metadata first to find latest."""
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "v1/scripts/s_abc/versions/2": FAKE_CODE,
+                "v1/scripts/s_abc": {"script_id": "s_abc", "version": 2},
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_get_code",
+            {
+                "script_id": "s_abc",
+            },
+        )
+
+    assert result.data["ok"] is True
+    assert result.data["data"]["version"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Scenario 3: "How did the script evolve?"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_script_versions_via_mcp(monkeypatch):
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "versions": FAKE_VERSIONS,
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_versions",
+            {
+                "script_id": "s_abc",
+            },
+        )
+
+    assert result.data["ok"] is True
+    versions = result.data["data"]["versions"]
+    assert len(versions) == 2
+    assert versions[0]["version"] == 1
+    assert versions[1]["version"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Scenario 4: "Why did it fall back to AI?"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_fallback_episodes_via_mcp(monkeypatch):
+    monkeypatch.setattr(
+        script_tools,
+        "raw_http_get",
+        _mock_raw_http(
+            {
+                "fallback-episodes": FAKE_EPISODES,
+            }
+        ),
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_fallback_episodes",
+            {
+                "workflow_id": "wpid_test",
+            },
+        )
+
+    assert result.data["ok"] is True
+    data = result.data["data"]
+    assert data["total_count"] == 1
+    ep = data["episodes"][0]
+    assert ep["fallback_type"] == "selector_miss"
+    assert "site redesigned" in ep["error_message"]
+    assert ep["fallback_succeeded"] is True
+
+
+@pytest.mark.asyncio
+async def test_fallback_episodes_rejects_invalid_workflow_run_id_via_mcp(monkeypatch):
+    raw_http_get = AsyncMock(return_value=FAKE_EPISODES)
+    monkeypatch.setattr(script_tools, "raw_http_get", raw_http_get)
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_fallback_episodes",
+            {
+                "workflow_id": "wpid_test",
+                "workflow_run_id": "bad_run_id",
+            },
+        )
+
+    assert result.data["ok"] is False
+    assert result.data["error"]["code"] == script_tools.ErrorCode.INVALID_INPUT
+    raw_http_get.assert_not_awaited()
+
+
+# ---------------------------------------------------------------------------
+# Scenario 5: "Edit the script"
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_deploy_script_via_mcp(monkeypatch):
+    deploy_resp = SimpleNamespace(
+        script_id="s_abc",
+        version=3,
+        script_revision_id="srev_3",
+        model_dump=lambda mode="python": {"script_id": "s_abc", "version": 3, "script_revision_id": "srev_3"},
+    )
+    fake_client = SimpleNamespace(deploy_script=AsyncMock(return_value=deploy_resp))
+    monkeypatch.setattr(script_tools, "get_skyvern", lambda: fake_client)
+
+    import base64
+
+    files = json.dumps([{"path": "main.py", "content": base64.b64encode(b"# edited").decode(), "encoding": "base64"}])
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_deploy",
+            {
+                "script_id": "s_abc",
+                "files": files,
+            },
+        )
+
+    assert result.data["ok"] is True
+    assert result.data["data"]["version"] == 3
+    fake_client.deploy_script.assert_awaited_once()
+    called_files = fake_client.deploy_script.await_args.kwargs["files"]
+    assert len(called_files) == 1
+    assert isinstance(called_files[0], ScriptFileCreate)
+    assert called_files[0].path == "main.py"
+
+
+# ---------------------------------------------------------------------------
+# Scenario 6: Workflow create shows caching defaults
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_workflow_create_surfaces_caching_fields_via_mcp(monkeypatch):
+    from datetime import datetime, timezone
+
+    now = datetime.now(timezone.utc)
+    fake_wf = SimpleNamespace(
+        workflow_permanent_id="wpid_new",
+        workflow_id="wf_1",
+        title="Test",
+        version=1,
+        status="published",
+        description=None,
+        is_saved_task=False,
+        folder_id=None,
+        created_at=now,
+        modified_at=now,
+        code_version=2,
+        adaptive_caching=True,
+        run_with="code",
+    )
+    fake_client = SimpleNamespace(create_workflow=AsyncMock(return_value=fake_wf))
+    monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
+
+    definition = json.dumps(
+        {
+            "title": "Test",
+            "workflow_definition": {
+                "parameters": [],
+                "blocks": [
+                    {
+                        "block_type": "navigation",
+                        "label": "s1",
+                        "url": "https://example.com",
+                        "navigation_goal": "Click",
+                    }
+                ],
+            },
+        }
+    )
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_workflow_create",
+            {
+                "definition": definition,
+                "format": "json",
+            },
+        )
+
+    assert result.data["ok"] is True
+    data = result.data["data"]
+    assert data["code_version"] == 2
+    assert data["run_with"] == "code"
+    assert data["adaptive_caching"] is True
+
+
+# ---------------------------------------------------------------------------
+# Scenario 7: Run status shows script_run + ai_fallback
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_workflow_status_shows_script_run_via_mcp(monkeypatch):
+    payload = {
+        "workflow_run_id": "wr_test",
+        "status": "completed",
+        "run_with": "code",
+        "workflow_title": "Test",
+        "script_run": {"ai_fallback_triggered": True, "script_id": "s_abc"},
+        "outputs": {"result": "ok"},
+    }
+    fake_resp = SimpleNamespace(status_code=200, json=lambda: payload, text="")
+    fake_client = SimpleNamespace(
+        _client_wrapper=SimpleNamespace(
+            httpx_client=SimpleNamespace(request=AsyncMock(return_value=fake_resp)),
+        ),
+    )
+    monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
+
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_workflow_status",
+            {
+                "run_id": "wr_test",
+                "verbosity": "full",
+            },
+        )
+
+    assert result.data["ok"] is True
+    data = result.data["data"]
+    assert data["run_with"] == "code"
+    assert data["script_run"]["ai_fallback_triggered"] is True
+
+
+# ---------------------------------------------------------------------------
+# Validation: bad inputs get clear errors
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_bad_workflow_id_returns_error_via_mcp():
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_list_for_workflow",
+            {
+                "workflow_id": "not_a_wpid",
+            },
+        )
+
+    assert result.data["ok"] is False
+    assert "wpid_" in str(result.data["error"])
+
+
+@pytest.mark.asyncio
+async def test_bad_script_id_returns_error_via_mcp():
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_get_code",
+            {
+                "script_id": "wrong_prefix",
+            },
+        )
+
+    assert result.data["ok"] is False
+    assert "s_" in str(result.data["error"])
+
+
+@pytest.mark.asyncio
+async def test_bad_deploy_json_returns_error_via_mcp():
+    async with Client(mcp) as client:
+        result = await client.call_tool(
+            "skyvern_script_deploy",
+            {
+                "script_id": "s_abc",
+                "files": "not json",
+            },
+        )
+
+    assert result.data["ok"] is False
+    assert "JSON" in result.data["error"]["message"]