[SKY-8322] Compact MCP workflow status responses and fix wr lookup (#5084)

This commit is contained in:
Marc Kelechava 2026-03-12 18:47:21 -07:00 committed by GitHub
parent a0d082da40
commit 965fac68c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 5015 additions and 340 deletions

View file

@ -223,7 +223,7 @@ skip = ["webeye/actions/__init__.py", "forge/sdk/__init__.py"]
plugins = "sqlalchemy.ext.mypy.plugin"
[project.scripts]
skyvern = "skyvern.cli.commands:cli_app"
skyvern = "skyvern.__main__:main"
[tool.pytest.ini_options]
norecursedirs = ["eval", "tests/sdk"]

View file

@ -1,4 +1,11 @@
from skyvern.cli.commands import cli_app
def main() -> None:
from skyvern._cli_bootstrap import configure_cli_bootstrap_logging # noqa: PLC0415
configure_cli_bootstrap_logging()
from skyvern.cli.commands import cli_app # noqa: PLC0415
cli_app() # type: ignore
if __name__ == "__main__":
cli_app() # type: ignore
main()

32
skyvern/_cli_bootstrap.py Normal file
View file

@ -0,0 +1,32 @@
import logging
from collections.abc import Set
_DEFAULT_CLI_LOG_LEVEL = "WARNING"
_QUIET_CLI_LOGGERS = ("skyvern", "httpx", "litellm", "playwright", "httpcore")
def _resolve_cli_log_level_name(settings: object) -> str:
"""Honor explicit settings while keeping CLI defaults quiet."""
fields_set: Set[str] = getattr(settings, "model_fields_set", set())
configured_level = str(getattr(settings, "LOG_LEVEL", _DEFAULT_CLI_LOG_LEVEL)).upper()
if "LOG_LEVEL" in fields_set:
return configured_level
return _DEFAULT_CLI_LOG_LEVEL
def configure_cli_bootstrap_logging() -> None:
"""Clamp CLI process logging before importing the command tree."""
from skyvern.config import settings # noqa: PLC0415
from skyvern.forge.sdk.forge_log import setup_logger # noqa: PLC0415
log_level_name = _resolve_cli_log_level_name(settings)
settings.LOG_LEVEL = log_level_name
setup_logger()
log_level = logging.getLevelName(log_level_name)
if not isinstance(log_level, int):
log_level = logging.WARNING
logging.getLogger().setLevel(log_level)
for logger_name in _QUIET_CLI_LOGGERS:
logging.getLogger(logger_name).setLevel(log_level)

View file

@ -1,9 +1,7 @@
import logging
import typer
from dotenv import load_dotenv
from skyvern.forge.sdk.forge_log import setup_logger as _setup_logger
from skyvern._cli_bootstrap import configure_cli_bootstrap_logging as _configure_cli_bootstrap_logging
from skyvern.utils.env_paths import resolve_backend_env_path
from ..auth_command import login as login_command
@ -34,10 +32,8 @@ def configure_cli_logging() -> None:
return
_cli_logging_configured = True
# Suppress noisy SDK/third-party logs for CLI execution only.
for logger_name in ("skyvern", "httpx", "litellm", "playwright", "httpcore"):
logging.getLogger(logger_name).setLevel(logging.WARNING)
_setup_logger()
# Keep callback-time execution aligned with the entrypoint bootstrap.
_configure_cli_bootstrap_logging()
cli_app = typer.Typer(

View file

@ -1,9 +1,16 @@
from dotenv import load_dotenv
from skyvern._cli_bootstrap import configure_cli_bootstrap_logging
from skyvern.utils.env_paths import resolve_backend_env_path
from . import cli_app
if __name__ == "__main__": # pragma: no cover - manual CLI invocation
def main() -> None:
configure_cli_bootstrap_logging()
from . import cli_app # noqa: PLC0415
load_dotenv(resolve_backend_env_path())
cli_app()
if __name__ == "__main__": # pragma: no cover - manual CLI invocation
main()

View file

@ -9,8 +9,9 @@ from __future__ import annotations
import asyncio
import json
from datetime import datetime
from enum import Enum
from typing import Annotated, Any
from typing import Annotated, Any, Literal
import structlog
import yaml
@ -26,6 +27,13 @@ from ._session import get_skyvern
from ._validation import validate_folder_id
LOG = structlog.get_logger()
_SUMMARY_TOP_LEVEL_KEY_LIMIT = 8
_SUMMARY_SCALAR_PREVIEW_LIMIT = 3
_SUMMARY_ARTIFACT_PREVIEW_LIMIT = 4
_SUMMARY_STRING_PREVIEW_LIMIT = 120
_SUMMARY_RECURSION_LIMIT = 10
_SCREENSHOT_LIST_KEYS = frozenset({"task_screenshots", "workflow_screenshots", "screenshot_urls"})
_SCREENSHOT_ARTIFACT_ID_KEYS = frozenset({"task_screenshot_artifact_ids", "workflow_screenshot_artifact_ids"})
# ---------------------------------------------------------------------------
# Helpers
@ -98,6 +106,268 @@ def _serialize_run(run: Any) -> dict[str, Any]:
return data
def _get_value(obj: Any, key: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
def _get_run_id(run: Any) -> str | None:
return _get_value(run, "run_id") or _get_value(run, "workflow_run_id")
def _jsonable(value: Any) -> Any:
if hasattr(value, "model_dump"):
return value.model_dump(mode="json")
if isinstance(value, datetime):
return value.isoformat()
if isinstance(value, list):
return [_jsonable(item) for item in value]
if isinstance(value, tuple):
return [_jsonable(item) for item in value]
if isinstance(value, dict):
return {k: _jsonable(v) for k, v in value.items()}
return value
def _truncate_preview(value: Any) -> Any:
value = _jsonable(value)
if isinstance(value, str) and len(value) > _SUMMARY_STRING_PREVIEW_LIMIT:
return f"{value[: _SUMMARY_STRING_PREVIEW_LIMIT - 3]}..."
return value
def _is_scalarish(value: Any) -> bool:
return value is None or isinstance(value, str | int | float | bool)
def _init_output_stats() -> dict[str, Any]:
return {
"has_extracted_information": False,
"nested_screenshot_count": 0,
"artifact_id_count": 0,
"artifact_ids_preview": [],
}
def _note_artifact_ids(stats: dict[str, Any], values: list[Any]) -> None:
stats["artifact_id_count"] += len(values)
preview = stats["artifact_ids_preview"]
for value in values:
if len(preview) >= _SUMMARY_ARTIFACT_PREVIEW_LIMIT:
break
value_str = str(value)
if value_str not in preview:
preview.append(value_str)
def _scan_output_value(value: Any, stats: dict[str, Any], depth: int = 0) -> None:
if depth > _SUMMARY_RECURSION_LIMIT:
return
value = _jsonable(value)
if isinstance(value, dict):
for key, nested_value in value.items():
if key == "extracted_information" and nested_value is not None:
stats["has_extracted_information"] = True
if key in _SCREENSHOT_LIST_KEYS and isinstance(nested_value, list):
stats["nested_screenshot_count"] += len(nested_value)
if key in _SCREENSHOT_ARTIFACT_ID_KEYS and isinstance(nested_value, list):
_note_artifact_ids(stats, nested_value)
_scan_output_value(nested_value, stats, depth + 1)
return
if isinstance(value, list):
for item in value:
_scan_output_value(item, stats, depth + 1)
def _summarize_output_value(output_value: Any) -> tuple[dict[str, Any], dict[str, Any]]:
if output_value is None:
return {"present": False}, _init_output_stats()
output_value = _jsonable(output_value)
stats = _init_output_stats()
_scan_output_value(output_value, stats)
summary: dict[str, Any] = {"present": True}
if isinstance(output_value, dict):
top_level_keys = list(output_value.keys())
summary["top_level_keys"] = top_level_keys[:_SUMMARY_TOP_LEVEL_KEY_LIMIT]
if len(top_level_keys) > _SUMMARY_TOP_LEVEL_KEY_LIMIT:
summary["top_level_key_count"] = len(top_level_keys)
summary["block_output_count"] = len([key for key in top_level_keys if key != "extracted_information"])
scalar_preview: dict[str, Any] = {}
for key, value in output_value.items():
if key == "extracted_information":
continue
if _is_scalarish(value):
scalar_preview[key] = _truncate_preview(value)
elif (
isinstance(value, list)
and value
and len(value) <= _SUMMARY_SCALAR_PREVIEW_LIMIT
and all(_is_scalarish(item) for item in value)
):
scalar_preview[key] = [_truncate_preview(item) for item in value]
if len(scalar_preview) >= _SUMMARY_SCALAR_PREVIEW_LIMIT:
break
if scalar_preview:
summary["scalar_preview"] = scalar_preview
elif isinstance(output_value, list):
summary["item_count"] = len(output_value)
if (
output_value
and len(output_value) <= _SUMMARY_SCALAR_PREVIEW_LIMIT
and all(_is_scalarish(item) for item in output_value)
):
summary["scalar_preview"] = [_truncate_preview(item) for item in output_value]
else:
summary["scalar_preview"] = _truncate_preview(output_value)
summary["has_extracted_information"] = stats["has_extracted_information"]
summary["nested_screenshot_count"] = stats["nested_screenshot_count"]
summary["artifact_id_count"] = stats["artifact_id_count"]
return summary, stats
def _summarize_artifacts(run: Any, output_stats: dict[str, Any]) -> dict[str, Any]:
downloaded_files = _jsonable(_get_value(run, "downloaded_files")) or []
screenshot_urls = _jsonable(_get_value(run, "screenshot_urls")) or []
summary: dict[str, Any] = {
"recording_available": bool(_get_value(run, "recording_url")),
"workflow_screenshot_count": len(screenshot_urls),
"downloaded_file_count": len(downloaded_files),
"artifact_id_count": output_stats["artifact_id_count"],
}
filenames = [
filename
for filename in (_get_value(file_info, "filename") for file_info in downloaded_files)
if isinstance(filename, str) and filename
]
if filenames:
summary["downloaded_file_names"] = filenames[:_SUMMARY_SCALAR_PREVIEW_LIMIT]
if output_stats["artifact_ids_preview"]:
summary["artifact_ids_preview"] = output_stats["artifact_ids_preview"]
return summary
def _serialize_run_summary(run: Any) -> dict[str, Any]:
run_id = _get_run_id(run)
run_type = _get_value(run, "run_type")
if run_type is None and _get_value(run, "workflow_run_id"):
run_type = "workflow_run"
output_value = _get_value(run, "output")
if output_value is None and _get_value(run, "outputs") is not None:
output_value = _get_value(run, "outputs")
output_summary, output_stats = _summarize_output_value(output_value)
summary: dict[str, Any] = {
"run_id": run_id,
"status": str(_get_value(run, "status")) if _get_value(run, "status") is not None else None,
"run_type": str(run_type) if run_type is not None else None,
"artifact_summary": _summarize_artifacts(run, output_stats),
"output_summary": output_summary,
}
failure_reason = _get_value(run, "failure_reason")
if failure_reason:
summary["failure_reason"] = failure_reason
run_with = _get_value(run, "run_with")
if run_with:
summary["run_with"] = run_with
workflow_title = _get_value(run, "workflow_title")
if workflow_title:
summary["workflow_title"] = workflow_title
step_count = _get_value(run, "step_count")
total_steps = _get_value(run, "total_steps")
if step_count is not None:
summary["step_count"] = step_count
elif total_steps is not None:
summary["total_steps"] = total_steps
return {key: value for key, value in summary.items() if value is not None}
def _serialize_run_full(run: Any) -> dict[str, Any]:
if not isinstance(run, dict):
return _serialize_run(run)
data: dict[str, Any] = {
"run_id": _get_run_id(run),
"status": str(_get_value(run, "status")) if _get_value(run, "status") is not None else None,
"run_type": "workflow_run" if _get_value(run, "workflow_run_id") else _get_value(run, "run_type"),
}
for field in (
"workflow_id",
"workflow_title",
"failure_reason",
"recording_url",
"screenshot_urls",
"downloaded_files",
"downloaded_file_urls",
"parameters",
"errors",
"browser_session_id",
"browser_profile_id",
"run_with",
"total_steps",
):
value = _get_value(run, field)
if value is not None:
data[field] = _jsonable(value)
outputs = _get_value(run, "outputs")
if outputs is not None:
data["output"] = _jsonable(outputs)
for ts_field in ("created_at", "modified_at", "started_at", "finished_at", "queued_at"):
value = _get_value(run, ts_field)
if value is not None:
data[ts_field] = _jsonable(value)
return {key: value for key, value in data.items() if value is not None}
async def _get_workflow_run_status(
workflow_run_id: str,
*,
include_output_details: bool,
) -> dict[str, Any]:
skyvern = get_skyvern()
# The generated SDK only exposes get_run() for /v1/runs/{run_id}; wr_... IDs
# require the workflow-run detail route until a public SDK helper exists.
response = await skyvern._client_wrapper.httpx_client.request(
f"api/v1/workflows/runs/{workflow_run_id}",
method="GET",
params={"include_output_details": include_output_details},
)
if response.status_code == 404:
raise NotFoundError(body={"detail": f"Workflow run {workflow_run_id!r} not found"})
if response.status_code >= 400:
detail = ""
try:
detail = response.json().get("detail", response.text)
except Exception:
detail = response.text
raise RuntimeError(f"HTTP {response.status_code}: {detail}")
return response.json()
def _validate_workflow_id(workflow_id: str, action: str) -> dict[str, Any] | None:
"""Validate workflow_id format. Returns a make_result error dict or None if valid."""
if "/" in workflow_id or "\\" in workflow_id:
@ -911,17 +1181,36 @@ async def skyvern_workflow_run(
async def skyvern_workflow_status(
run_id: Annotated[str, "Run ID to check (wr_... for workflow runs, tsk_v2_... for task runs)"],
verbosity: Annotated[
Literal["summary", "full"],
Field(description="`summary` returns a compact status payload. `full` includes outputs, timestamps, and URLs."),
] = "summary",
) -> dict[str, Any]:
"""Check the status and progress of a workflow or task run. Use when you need to monitor
a running workflow, check if it completed, or retrieve its output."""
if err := _validate_run_id(run_id, "skyvern_workflow_status"):
return err
skyvern = get_skyvern()
if verbosity not in {"summary", "full"}:
return make_result(
"skyvern_workflow_status",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid verbosity: {verbosity!r}",
"Use verbosity='summary' for compact status or verbosity='full' for full detail.",
),
)
with Timer() as timer:
try:
run = await skyvern.get_run(run_id)
if run_id.startswith("wr_"):
run = await _get_workflow_run_status(
run_id,
include_output_details=verbosity == "full",
)
else:
skyvern = get_skyvern()
run = await skyvern.get_run(run_id)
timer.mark("sdk")
except NotFoundError:
return make_result(
@ -942,8 +1231,14 @@ async def skyvern_workflow_status(
error=make_error(ErrorCode.API_ERROR, str(e), "Check the run ID and your API key"),
)
data = _serialize_run(run)
data["sdk_equivalent"] = f"await skyvern.get_run({run_id!r})"
data = _serialize_run_full(run) if verbosity == "full" else _serialize_run_summary(run)
if run_id.startswith("wr_"):
data["sdk_equivalent"] = f"await skyvern_workflow_status(run_id={run_id!r}, verbosity={verbosity!r})"
else:
verbosity_arg = "" if verbosity == "summary" else f", verbosity={verbosity!r}"
data["sdk_equivalent"] = (
f"await skyvern.get_run({run_id!r}) # or skyvern_workflow_status(run_id={run_id!r}{verbosity_arg})"
)
return make_result("skyvern_workflow_status", data=data, timing_ms=timer.timing_ms)

View file

@ -253,7 +253,7 @@ def workflow_status(
"""Get workflow run status."""
async def _run() -> dict[str, Any]:
return await tool_workflow_status(run_id=run_id)
return await tool_workflow_status(run_id=run_id, verbosity="full")
_run_tool(_run, json_output=json_output, hint_on_exception="Check the run ID and API key.")

View file

@ -28,6 +28,7 @@ class Settings(BaseSettings):
# settings for experimentation
ENABLE_EXP_ALL_TEXTUAL_ELEMENTS_INTERACTABLE: bool = False
ENABLE_DOM_PARSER_V2: bool = False
ADDITIONAL_MODULES: list[str] = []

View file

@ -1,6 +1,7 @@
import asyncio
import copy
import hashlib
from collections import deque
from datetime import timedelta
from typing import Dict, List
@ -81,10 +82,8 @@ def _get_shape_cache_key(hash: str) -> str:
return f"skyvern:shape:{hash}"
def _remove_skyvern_attributes(element: Dict) -> Dict:
"""
To get the original HTML element without skyvern attributes
"""
def _remove_skyvern_attributes_legacy(element: Dict) -> Dict:
"""Legacy: deep-copy based attribute removal (pre-V2)."""
element_copied = copy.deepcopy(element)
for attr in ELEMENT_NODE_ATTRIBUTES:
if element_copied.get(attr):
@ -102,12 +101,36 @@ def _remove_skyvern_attributes(element: Dict) -> Dict:
trimmed_children = []
for child in children:
trimmed_children.append(_remove_skyvern_attributes(child))
trimmed_children.append(_remove_skyvern_attributes_legacy(child))
element_copied["children"] = trimmed_children
return element_copied
def _remove_skyvern_attributes(element: Dict) -> Dict:
"""
To get the original HTML element without skyvern attributes.
V2 uses shallow copy + filtered dicts instead of double deepcopy for performance.
"""
if not settings.ENABLE_DOM_PARSER_V2:
return _remove_skyvern_attributes_legacy(element)
element_copied = dict(element)
for attr in ELEMENT_NODE_ATTRIBUTES:
element_copied.pop(attr, None)
if "attributes" in element_copied:
element_copied["attributes"] = {
k: v for k, v in element_copied["attributes"].items() if k not in USELESS_SHAPE_ATTRIBUTE
}
children: List[Dict] | None = element_copied.get("children")
if children is not None:
element_copied["children"] = [_remove_skyvern_attributes(child) for child in children]
return element_copied
def _add_to_dropped_css_svg_element_map(hashed_key: str | None) -> None:
context = skyvern_context.ensure_context()
if hashed_key:
@ -519,7 +542,7 @@ class AgentFunction:
skyvern_frame = await SkyvernFrame.create_instance(frame=frame)
current_frame_index = context.frame_index_map.get(frame, 0)
queue = []
queue: deque = deque()
element_cnt = 0
eligible_svgs = [] # List to store eligible SVGs and their frames
@ -527,7 +550,7 @@ class AgentFunction:
queue.append(element)
while queue:
queue_ele = queue.pop(0)
queue_ele = queue.popleft()
element_cnt += 1
if element_cnt == MAX_ELEMENT_CNT:

View file

@ -157,7 +157,8 @@ class DomUtils {
// this.
let computedStyle;
if ((clientRect.width === 0 || clientRect.height === 0) && testChildren) {
for (const child of Array.from(element.children)) {
for (let ci = 0; ci < element.children.length; ci++) {
const child = element.children[ci];
computedStyle = getElementComputedStyle(child, null);
if (!computedStyle) {
continue;
@ -360,11 +361,27 @@ class QuadTreeNode {
}
}
// from playwright
// from playwright — with per-tree-build cache for non-pseudo styles
const _computedStyleCache = new WeakMap();
function getElementComputedStyle(element, pseudo) {
return element.ownerDocument && element.ownerDocument.defaultView
? element.ownerDocument.defaultView.getComputedStyle(element, pseudo)
: undefined;
if (!element.ownerDocument || !element.ownerDocument.defaultView) {
return undefined;
}
// Cache only non-pseudo styles (the hot path called 3-5× per element)
if (pseudo == null) {
const cached = _computedStyleCache.get(element);
if (cached !== undefined) {
return cached;
}
const style = element.ownerDocument.defaultView.getComputedStyle(
element,
null,
);
_computedStyleCache.set(element, style);
return style;
}
return element.ownerDocument.defaultView.getComputedStyle(element, pseudo);
}
// from playwright: https://github.com/microsoft/playwright/blob/1b65f26f0287c0352e76673bc5f85bc36c934b55/packages/playwright-core/src/server/injected/domUtils.ts#L76-L98
@ -411,8 +428,14 @@ function hasASPClientControl() {
return typeof ASPxClientControl !== "undefined";
}
// Cache for isHoverOnlyElement — called up to 3× per element
// (isElementVisible, isInteractable, buildElementObject)
const _hoverOnlyCache = new WeakMap();
// Check if element is only visible on hover (e.g., hover-only buttons)
function isHoverOnlyElement(element) {
const cached = _hoverOnlyCache.get(element);
if (cached !== undefined) return cached;
// Check for common hover-only patterns in class names
const className = element.className?.toString() ?? "";
const parentClassName = element.parentElement?.className?.toString() ?? "";
@ -424,6 +447,7 @@ function isHoverOnlyElement(element) {
parentClassName.includes("hover-") ||
parentClassName.includes("-hover")
) {
_hoverOnlyCache.set(element, true);
return true;
}
@ -440,12 +464,14 @@ function isHoverOnlyElement(element) {
parentClass.includes("item")
) {
// This element might be revealed on parent hover
_hoverOnlyCache.set(element, true);
return true;
}
parent = parent.parentElement;
depth += 1;
}
_hoverOnlyCache.set(element, false);
return false;
}
@ -454,9 +480,10 @@ function isHoverOnlyElement(element) {
function isElementVisible(element) {
// TODO: This is a hack to not check visibility for option elements
// because they are not visible by default. We check their parent instead for visibility.
const elTagName = element.tagName.toLowerCase();
if (
element.tagName.toLowerCase() === "option" ||
(element.tagName.toLowerCase() === "input" &&
elTagName === "option" ||
(elTagName === "input" &&
(element.type === "radio" || element.type === "checkbox"))
)
return element.parentElement && isElementVisible(element.parentElement);
@ -622,11 +649,10 @@ function expectHitTarget(hitPoint, targetElement) {
}
function getChildElements(element) {
if (element.childElementCount !== 0) {
return Array.from(element.children);
} else {
return [];
}
// Return the live HTMLCollection directly — callers iterate by index,
// and processElement only modifies attributes (not child structure),
// so the live collection remains stable during iteration.
return element.children;
}
function isParent(parent, child) {
@ -661,9 +687,10 @@ function isHidden(element) {
return true;
}
if (element.hidden) {
const tn = element.tagName.toLowerCase();
if (
style?.cursor === "pointer" &&
element.tagName.toLowerCase() === "input" &&
tn === "input" &&
(element.type === "submit" || element.type === "button")
) {
// there are cases where the input is a "submit" button and the cursor is a pointer but the element has the hidden attr.
@ -723,6 +750,25 @@ function hasAngularClickBinding(element) {
);
}
// Pre-computed Set for widget roles — avoids recreating array on every hasWidgetRole call
const WIDGET_ROLES = new Set([
"button",
"link",
"checkbox",
"menuitem",
"menuitemcheckbox",
"menuitemradio",
"radio",
"tab",
"combobox",
"searchbox",
"slider",
"spinbutton",
"switch",
"gridcell",
"option",
]);
function hasWidgetRole(element) {
const role = element.getAttribute("role");
if (!role) {
@ -730,44 +776,15 @@ function hasWidgetRole(element) {
}
// https://developer.mozilla.org/en-US/docs/Web/Accessibility/ARIA/Roles#2._widget_roles
// Not all roles make sense for the time being so we only check for the ones that do
if (role.toLowerCase().trim() === "textbox") {
const normalizedRole = role.toLowerCase().trim();
if (normalizedRole === "textbox") {
return !isReadonlyElement(element);
}
const widgetRoles = [
"button",
"link",
"checkbox",
"menuitem",
"menuitemcheckbox",
"menuitemradio",
"radio",
"tab",
"combobox",
"searchbox",
"slider",
"spinbutton",
"switch",
"gridcell",
"option",
];
return widgetRoles.includes(role.toLowerCase().trim());
return WIDGET_ROLES.has(normalizedRole);
}
function isTableRelatedElement(element) {
const tagName = element.tagName.toLowerCase();
return [
"table",
"caption",
"thead",
"tbody",
"tfoot",
"tr",
"th",
"td",
"colgroup",
"col",
].includes(tagName);
return TABLE_RELATED_TAGS.has(element.tagName.toLowerCase());
}
function isDOMNodeRepresentDiv(element) {
@ -844,17 +861,74 @@ function isValidCSSSelector(selector) {
}
}
function isInteractable(element, hoverStylesMap) {
if (!isElementVisible(element)) {
return false;
}
// Pre-computed tag sets for O(1) lookup in isInteractable hot path
const ALWAYS_INTERACTABLE_TAGS = new Set([
"button",
"select",
"option",
"textarea",
]);
const POINTER_CHECK_TAGS = new Set([
"div",
"img",
"span",
"a",
"i",
"li",
"p",
"td",
"svg",
"strong",
"h1",
"h2",
"h3",
"h4",
]);
const SKIP_TAGS = new Set(["html", "iframe", "frameset", "frame"]);
// Attributes that should be normalized to boolean true/false in buildElementObject
const BOOLEAN_ATTRS = new Set([
"required",
"aria-required",
"checked",
"aria-checked",
"selected",
"aria-selected",
"readonly",
"aria-readonly",
"disabled",
"aria-disabled",
]);
const TABLE_RELATED_TAGS = new Set([
"table",
"caption",
"thead",
"tbody",
"tfoot",
"tr",
"th",
"td",
"colgroup",
"col",
]);
if (isHidden(element)) {
return false;
}
function isInteractable(
element,
hoverStylesMap,
skipVisibilityChecks = false,
pageFlags = {},
) {
if (!skipVisibilityChecks) {
if (!isElementVisible(element)) {
return false;
}
if (isScriptOrStyle(element)) {
return false;
if (isHidden(element)) {
return false;
}
if (isScriptOrStyle(element)) {
return false;
}
}
if (hasWidgetRole(element)) {
@ -879,19 +953,7 @@ function isInteractable(element, hoverStylesMap) {
}
const tagName = element.tagName.toLowerCase();
if (tagName === "html") {
return false;
}
if (tagName === "iframe") {
return false;
}
if (tagName === "frameset") {
return false;
}
if (tagName === "frame") {
if (SKIP_TAGS.has(tagName)) {
return false;
}
@ -904,12 +966,7 @@ function isInteractable(element, hoverStylesMap) {
return false;
}
if (
tagName === "button" ||
tagName === "select" ||
tagName === "option" ||
tagName === "textarea"
) {
if (ALWAYS_INTERACTABLE_TAGS.has(tagName)) {
return true;
}
@ -943,18 +1000,11 @@ function isInteractable(element, hoverStylesMap) {
// support listbox and options underneath it
// div element should be checked here before the css pointer
if (
(tagName === "ul" || tagName === "div") &&
element.hasAttribute("role") &&
element.getAttribute("role").toLowerCase() === "listbox"
) {
const roleAttr = element.getAttribute("role")?.toLowerCase();
if ((tagName === "ul" || tagName === "div") && roleAttr === "listbox") {
return true;
}
if (
(tagName === "li" || tagName === "div") &&
element.hasAttribute("role") &&
element.getAttribute("role").toLowerCase() === "option"
) {
if ((tagName === "li" || tagName === "div") && roleAttr === "option") {
return true;
}
@ -974,6 +1024,7 @@ function isInteractable(element, hoverStylesMap) {
if (
tagName === "div" &&
className.includes("pac-item") &&
pageFlags.hasPacContainers !== false &&
element.closest('div[class*="pac-container"]')
) {
return true;
@ -981,33 +1032,23 @@ function isInteractable(element, hoverStylesMap) {
if (
tagName === "div" &&
element.hasAttribute("aria-disabled") &&
element.getAttribute("aria-disabled").toLowerCase() === "false"
element.getAttribute("aria-disabled")?.toLowerCase() === "false"
) {
return true;
}
if (tagName === "span" && element.closest('div[id*="dropdown-container"]')) {
if (
tagName === "span" &&
pageFlags.hasDropdownContainers !== false &&
element.closest('div[id*="dropdown-container"]')
) {
return true;
}
// FIXME: maybe we need to enable the pointer check for all elements?
if (
tagName === "div" ||
tagName === "img" ||
tagName === "span" ||
tagName === "a" ||
tagName === "i" ||
tagName === "li" ||
tagName === "p" ||
tagName === "td" ||
tagName === "svg" ||
tagName === "strong" ||
tagName === "h1" ||
tagName === "h2" ||
tagName === "h3" ||
tagName === "h4" ||
// sometime it's a customized element like <my-login-button>, we should check pointer style
POINTER_CHECK_TAGS.has(tagName) ||
// customized elements like <my-login-button>, check pointer style
tagName.includes("button") ||
tagName.includes("select") ||
tagName.includes("option") ||
@ -1046,11 +1087,7 @@ function isInteractable(element, hoverStylesMap) {
}
// consider <div tabindex="0"> as interactable
if (
tagName.toLowerCase() === "div" &&
element.hasAttribute("tabindex") &&
element.getAttribute("tabindex").toLowerCase() === "0"
) {
if (tagName === "div" && element.getAttribute("tabindex") === "0") {
return true;
}
@ -1099,15 +1136,10 @@ const isComboboxDropdown = (element) => {
if (element.tagName.toLowerCase() !== "input") {
return false;
}
const role = element.getAttribute("role")
? element.getAttribute("role").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
const readonly =
element.getAttribute("readonly") &&
element.getAttribute("readonly").toLowerCase() !== "false";
const role = element.getAttribute("role")?.toLowerCase() ?? "";
const haspopup = element.getAttribute("aria-haspopup")?.toLowerCase() ?? "";
const readonlyVal = element.getAttribute("readonly");
const readonly = readonlyVal && readonlyVal.toLowerCase() !== "false";
const controls = element.hasAttribute("aria-controls");
return role && haspopup && controls && readonly;
};
@ -1117,24 +1149,16 @@ const isDivComboboxDropdown = (element) => {
if (tagName !== "div") {
return false;
}
const role = element.getAttribute("role")
? element.getAttribute("role").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
const role = element.getAttribute("role")?.toLowerCase() ?? "";
const haspopup = element.getAttribute("aria-haspopup")?.toLowerCase() ?? "";
const controls = element.hasAttribute("aria-controls");
return role === "combobox" && controls && haspopup;
};
const isDropdownButton = (element) => {
const tagName = element.tagName.toLowerCase();
const type = element.getAttribute("type")
? element.getAttribute("type").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
const type = element.getAttribute("type")?.toLowerCase() ?? "";
const haspopup = element.getAttribute("aria-haspopup")?.toLowerCase() ?? "";
const hasExpanded = element.hasAttribute("aria-expanded");
return (
tagName === "button" &&
@ -1146,9 +1170,7 @@ const isDropdownButton = (element) => {
const isSelect2Dropdown = (element) => {
const tagName = element.tagName.toLowerCase();
const className = element.className.toString();
const role = element.getAttribute("role")
? element.getAttribute("role").toLowerCase()
: "";
const role = element.getAttribute("role")?.toLowerCase() ?? "";
if (tagName === "a") {
return className.includes("select2-choice");
@ -1249,9 +1271,7 @@ const isAngularDropdown = (element) => {
const tagName = element.tagName.toLowerCase();
if (tagName === "input" || tagName === "span") {
const ariaLabel = element.hasAttribute("aria-label")
? element.getAttribute("aria-label").toLowerCase()
: "";
const ariaLabel = element.getAttribute("aria-label")?.toLowerCase() ?? "";
return ariaLabel.includes("select") || ariaLabel.includes("choose");
}
@ -1272,21 +1292,34 @@ const isAngularMaterialDatePicker = (element) => {
);
};
// Cache for pseudo-element content — called in hasBeforeOrAfterPseudoContent
// and again in buildElementObject for the same element
const _pseudoContentCache = new WeakMap();
function getPseudoContent(element, pseudo) {
let elementCache = _pseudoContentCache.get(element);
if (elementCache && pseudo in elementCache) {
return elementCache[pseudo];
}
const pseudoStyle = getElementComputedStyle(element, pseudo);
if (!pseudoStyle) {
return null;
}
const content = pseudoStyle
.getPropertyValue("content")
.replace(/"/g, "")
.trim();
if (content === "none" || !content) {
return null;
let result = null;
if (pseudoStyle) {
const content = pseudoStyle
.getPropertyValue("content")
.replace(/"/g, "")
.trim();
if (content !== "none" && content) {
result = content;
}
}
return content;
if (!elementCache) {
elementCache = {};
_pseudoContentCache.set(element, elementCache);
}
elementCache[pseudo] = result;
return result;
}
function hasBeforeOrAfterPseudoContent(element) {
@ -1312,12 +1345,8 @@ function removeMultipleSpaces(str) {
return str;
}
// Optimization: check if contains multiple spaces to avoid unnecessary regex replacement
if (
str.indexOf(" ") === -1 &&
str.indexOf("\t") === -1 &&
str.indexOf("\n") === -1
) {
// Optimization: check if contains whitespace worth collapsing before running regex
if (!/\s{2}|[\t\n]/.test(str)) {
return str;
}
@ -1346,10 +1375,9 @@ function cleanupText(text) {
}
const checkStringIncludeRequire = (str) => {
const lower = str.toLowerCase();
return (
str.toLowerCase().includes("*") ||
str.toLowerCase().includes("✱") ||
str.toLowerCase().includes("require")
lower.includes("*") || lower.includes("✱") || lower.includes("require")
);
};
@ -1434,10 +1462,10 @@ function getElementText(element) {
}
function getSelectOptions(element) {
const options = Array.from(element.options);
const selectOptions = [];
for (const option of options) {
for (let i = 0; i < element.options.length; i++) {
const option = element.options[i];
selectOptions.push({
optionIndex: option.index,
text: removeMultipleSpaces(option.textContent),
@ -1518,6 +1546,7 @@ async function buildElementObject(
element,
interactable,
purgeable = false,
cachedIsInSvg = null,
) {
var element_id = element.getAttribute("unique_id") ?? (await uniqueId());
var elementTagNameLower = element.tagName.toLowerCase();
@ -1527,23 +1556,9 @@ async function buildElementObject(
if (element.attributes[Symbol.iterator]) {
for (const attr of element.attributes) {
var attrValue = attr.value;
if (
attr.name === "required" ||
attr.name === "aria-required" ||
attr.name === "checked" ||
attr.name === "aria-checked" ||
attr.name === "selected" ||
attr.name === "aria-selected" ||
attr.name === "readonly" ||
attr.name === "aria-readonly" ||
attr.name === "disabled" ||
attr.name === "aria-disabled"
) {
if (attrValue && attrValue.toLowerCase() === "false") {
attrValue = false;
} else {
attrValue = true;
}
if (BOOLEAN_ATTRS.has(attr.name)) {
attrValue =
attrValue && attrValue.toLowerCase() === "false" ? false : true;
}
attrs[attr.name] = attrValue;
}
@ -1637,7 +1652,10 @@ async function buildElementObject(
purgeable: purgeable,
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
elementTagNameLower === "svg" ||
(cachedIsInSvg !== null
? cachedIsInSvg
: element.closest("svg") !== null),
isSelectable:
elementTagNameLower === "select" ||
isDatePickerSelector(element) ||
@ -1729,6 +1747,15 @@ async function buildElementTree(
var elements = [];
var resultArray = [];
// O(1) parent lookup map — replaces the O(n) elements.find() that caused O(n²) tree building
const elementIdMap = new Map();
// Page-level check: does the page have any dropdown-container divs?
// If not, skip expensive .closest() for every span element.
const hasDropdownContainers =
document.querySelector('div[id*="dropdown-container"]') !== null;
// Page-level check: does the page have any Google Maps pac-container?
const hasPacContainers =
document.querySelector('div[class*="pac-container"]') !== null;
async function processElement(
element,
@ -1784,7 +1811,12 @@ async function buildElementTree(
}
const isVisible = isElementVisible(element);
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
let interactable = isInteractable(element, hoverStylesMap);
// Pass skipVisibilityChecks=true since we already verified visible+not hidden+not script/style.
// Pass pageFlags so .closest() calls are skipped when the page lacks those containers.
let interactable = isInteractable(element, hoverStylesMap, true, {
hasDropdownContainers,
hasPacContainers,
});
let elementObj = null;
let isParentSVG = null;
if (must_included_tags.includes(tagName)) {
@ -1807,13 +1839,25 @@ async function buildElementTree(
} else if (hasBeforeOrAfterPseudoContent(element)) {
elementObj = await buildElementObject(frame, element, interactable);
} else if (tagName === "svg") {
elementObj = await buildElementObject(frame, element, interactable);
elementObj = await buildElementObject(
frame,
element,
interactable,
false,
true,
);
} else if (
(isParentSVG = element.closest("svg")) &&
isParentSVG.getAttribute("unique_id")
) {
// if element is the children of the <svg> with an unique_id
elementObj = await buildElementObject(frame, element, interactable);
elementObj = await buildElementObject(
frame,
element,
interactable,
false,
true,
);
} else if (tagName === "div" && isDOMNodeRepresentDiv(element)) {
elementObj = await buildElementObject(frame, element, interactable);
} else if (
@ -1826,46 +1870,45 @@ async function buildElementTree(
interactable,
true,
);
} else if (
getElementText(element).length > 0 &&
getElementText(element).length <= 5000
) {
if (window.GlobalEnableAllTextualElements) {
// force all textual elements to be interactable
interactable = true;
}
elementObj = await buildElementObject(frame, element, interactable);
} else if (full_tree) {
// when building full tree, we only get text from element itself
// elements without text are purgeable
elementObj = await buildElementObject(
frame,
element,
interactable,
true,
);
if (elementObj.text.length > 0) {
elementObj.purgeable = false;
} else {
// Cache getElementText() to avoid calling it twice
const cachedText = getElementText(element);
if (cachedText.length > 0 && cachedText.length <= 5000) {
if (window.GlobalEnableAllTextualElements) {
// force all textual elements to be interactable
interactable = true;
}
elementObj = await buildElementObject(frame, element, interactable);
} else if (full_tree) {
// when building full tree, we only get text from element itself
// elements without text are purgeable
elementObj = await buildElementObject(
frame,
element,
interactable,
true,
);
if (elementObj.text.length > 0) {
elementObj.purgeable = false;
}
}
}
if (elementObj) {
elementObj.xpath = current_xpath;
elements.push(elementObj);
// If the element is interactable but has no interactable parent,
elementIdMap.set(elementObj.id, elementObj);
// If the element has no parent in the tree,
// then it starts a new tree, so add it to the result array
// and set its id as the interactable parent id for the next elements
// under it
if (parentId === null) {
resultArray.push(elementObj);
}
// If the element is interactable and has an interactable parent,
// then add it to the children of the parent
// Otherwise add it to the children of the parent via O(1) map lookup
else {
// TODO: use dict/object so that we access these in O(1) instead
elements
.find((element) => element.id === parentId)
.children.push(elementObj);
const parentObj = elementIdMap.get(parentId);
if (parentObj) {
parentObj.children.push(elementObj);
}
}
parentId = elementObj.id;
}
@ -1909,24 +1952,51 @@ async function buildElementTree(
return;
}
// if the element has options, text will be duplicated with the option text
if (element.options) {
element.options.forEach((option) => {
element.text = element.text.replace(option.text, "");
});
let text = element.text;
if (!text) {
// Still recurse into children
for (let i = 0; i < element.children.length; i++) {
trimDuplicatedText(element.children[i]);
}
return;
}
// BFS to delete duplicated text
element.children.forEach((child) => {
// delete duplicated text in the tree
element.text = element.text.replace(child.text, "");
trimDuplicatedText(child);
});
// Collect all substrings to remove, then remove them in one pass
const textsToRemove = [];
// trim multiple ";"
element.text = element.text.replace(/;+/g, ";");
// trimleft and trimright ";"
element.text = element.text.replace(new RegExp(`^;+|;+$`, "g"), "");
// if the element has options, text will be duplicated with the option text
if (element.options) {
for (let i = 0; i < element.options.length; i++) {
if (element.options[i].text) {
textsToRemove.push(element.options[i].text);
}
}
}
for (let i = 0; i < element.children.length; i++) {
if (element.children[i].text) {
textsToRemove.push(element.children[i].text);
}
}
// Remove all collected substrings from parent text
for (let i = 0; i < textsToRemove.length; i++) {
const idx = text.indexOf(textsToRemove[i]);
if (idx !== -1) {
text =
text.substring(0, idx) +
text.substring(idx + textsToRemove[i].length);
}
}
// Clean up semicolons in one pass using a single regex
text = text.replace(/;+/g, ";").replace(/^;+|;+$/g, "");
element.text = text;
// Recurse into children
for (let i = 0; i < element.children.length; i++) {
trimDuplicatedText(element.children[i]);
}
};
// some elements without children nodes should be removed out, such as <label>
@ -2068,14 +2138,19 @@ function groupElementsVisually(elements) {
return groups;
}
// Helper functions
// Helper functions — single-pass min/max avoids 4 intermediate arrays + spread
function calculateBounds(elements) {
const rects = elements.map((el) => el.rect);
const left = Math.min(...rects.map((r) => r.left));
const top = Math.min(...rects.map((r) => r.top));
const right = Math.max(...rects.map((r) => r.right));
const bottom = Math.max(...rects.map((r) => r.bottom));
let left = Infinity,
top = Infinity,
right = -Infinity,
bottom = -Infinity;
for (let i = 0; i < elements.length; i++) {
const r = elements[i].rect;
if (r.left < left) left = r.left;
if (r.top < top) top = r.top;
if (r.right > right) right = r.right;
if (r.bottom > bottom) bottom = r.bottom;
}
return {
x: left,
y: top,
@ -2110,11 +2185,18 @@ function findOverlappingElements(element, allElements, quadTree, processed) {
}
function createRectangleForGroup(group) {
const rects = group.elements.map((element) => element.rect);
const top = Math.min(...rects.map((rect) => rect.top));
const left = Math.min(...rects.map((rect) => rect.left));
const bottom = Math.max(...rects.map((rect) => rect.bottom));
const right = Math.max(...rects.map((rect) => rect.right));
const elems = group.elements;
let top = Infinity,
left = Infinity,
bottom = -Infinity,
right = -Infinity;
for (let i = 0; i < elems.length; i++) {
const r = elems[i].rect;
if (r.top < top) top = r.top;
if (r.left < left) left = r.left;
if (r.bottom > bottom) bottom = r.bottom;
if (r.right > right) right = r.right;
}
return Rect.create(left, top, right, bottom);
}
@ -2427,12 +2509,16 @@ function scrollToElementTop(element) {
*/
async function getHoverStylesMap() {
const hoverMap = new Map();
const sheets = [...document.styleSheets];
const sheets = Array.from(document.styleSheets);
const parseCssSheet = (sheet) => {
const rules = sheet.cssRules || sheet.rules;
for (const rule of rules) {
if (rule.type === 1 && rule.selectorText) {
// Fast-path: skip rules that don't contain :hover at all
if (!rule.selectorText.includes(":hover")) {
continue;
}
// Split multiple selectors (e.g., "a:hover, button:hover")
const selectors = rule.selectorText.split(",").map((s) => s.trim());
@ -2454,8 +2540,17 @@ async function getHoverStylesMap() {
continue;
}
// Early check: only keep rules that set cursor (or already have it from prior rules)
const existingStyles = hoverMap.get(baseSelector);
const hasCursorInRule = rule.style.cursor;
const hasCursorInExisting =
existingStyles && "cursor" in existingStyles;
if (!hasCursorInRule && !hasCursorInExisting) {
continue;
}
// Get or create styles object for this selector
let styles = hoverMap.get(baseSelector) || {};
let styles = existingStyles || {};
// Add all style properties
for (const prop of rule.style) {
@ -2467,18 +2562,16 @@ async function getHoverStylesMap() {
if (parts.length > 1) {
const fullSelector = selector;
styles["__nested__"] = styles["__nested__"] || [];
const nestedStyles = {};
for (const prop of rule.style) {
nestedStyles[prop] = rule.style[prop];
}
styles["__nested__"].push({
selector: fullSelector,
styles: Object.fromEntries(
[...rule.style].map((prop) => [prop, rule.style[prop]]),
),
styles: nestedStyles,
});
}
// only need the style which includes the cursor attribute.
if (!("cursor" in styles)) {
continue;
}
hoverMap.set(baseSelector, styles);
}
}
@ -2508,15 +2601,35 @@ async function getHoverStylesMap() {
url.searchParams.set("v", Date.now());
newLink.href = url.toString();
newLink.crossOrigin = "anonymous";
// until the new link loaded, removing the old one
document.head.append(newLink);
// wait for a while until the sheet is fully loaded
await asyncSleepFor(1500);
const newSheets = [...document.styleSheets];
const refreshedSheet = newSheets.find(
(s) => s.href === newLink.href,
);
// Wait for the stylesheet to load via event instead of fixed 1500ms sleep
await new Promise((resolve) => {
const timeout = setTimeout(() => {
_jsConsoleWarn(
"Stylesheet load timed out after 3s:",
newLink.href,
);
resolve();
}, 3000);
newLink.addEventListener("load", () => {
clearTimeout(timeout);
resolve();
});
newLink.addEventListener("error", () => {
clearTimeout(timeout);
resolve();
});
});
const allSheets = document.styleSheets;
let refreshedSheet = null;
for (let si = 0; si < allSheets.length; si++) {
if (allSheets[si].href === newLink.href) {
refreshedSheet = allSheets[si];
break;
}
}
if (!refreshedSheet) {
newLink.remove();
return;
@ -2578,19 +2691,18 @@ if (window.globalDomDepthMap === undefined) {
function isClassNameIncludesHidden(className) {
// some hidden elements are with the classname like `class="select-items select-hide"` or `class="dropdown-container dropdown-invisible"`
const lower = className.toLowerCase();
return (
className.toLowerCase().includes("hide") ||
className.toLowerCase().includes("invisible") ||
className.toLowerCase().includes("closed")
lower.includes("hide") ||
lower.includes("invisible") ||
lower.includes("closed")
);
}
function isClassNameIncludesActivatedStatus(className) {
// some elements are with the classname like `class="open"` or `class="active"` should be considered as activated by the click
return (
className.toLowerCase().includes("open") ||
className.toLowerCase().includes("active")
);
const lower = className.toLowerCase();
return lower.includes("open") || lower.includes("active");
}
function waitForNextFrame() {
@ -2926,21 +3038,25 @@ function isAnimationFinished() {
* This includes elements in the main document and shadow DOM.
*/
function removeAllUniqueIds() {
// Function to recursively remove unique_id from an element and its children
// Function to recursively remove unique_id from an element and its children.
// We iterate element.children directly (live HTMLCollection) since
// removeAttribute only modifies attributes, not child structure.
const removeUniqueIdFromElement = (element) => {
if (element.hasAttribute("unique_id")) {
element.removeAttribute("unique_id");
}
// Process children in the main DOM
for (const child of Array.from(element.children)) {
removeUniqueIdFromElement(child);
const children = element.children;
for (let i = 0; i < children.length; i++) {
removeUniqueIdFromElement(children[i]);
}
// Process elements in shadow DOM if present
if (element.shadowRoot) {
for (const shadowChild of Array.from(element.shadowRoot.children)) {
removeUniqueIdFromElement(shadowChild);
const shadowChildren = element.shadowRoot.children;
for (let i = 0; i < shadowChildren.length; i++) {
removeUniqueIdFromElement(shadowChildren[i]);
}
}
};

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@ import copy
import json
import typing
from abc import ABC, abstractmethod
from collections import deque
from enum import StrEnum
from typing import Any, Awaitable, Callable, Self
@ -9,6 +10,7 @@ import structlog
from playwright.async_api import Frame, Page
from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings
from skyvern.exceptions import UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context
@ -33,10 +35,8 @@ def build_attribute(key: str, value: Any) -> str:
return f'{key}="{str(value)}"' if value else key
def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str:
"""
if element is flagged as dropped, the html format is empty
"""
def _json_to_html_legacy(element: dict, need_skyvern_attrs: bool = True) -> str:
"""Legacy: always deep-copies attributes (pre-V2)."""
tag = element["tagName"]
attributes: dict[str, Any] = copy.deepcopy(element.get("attributes", {}))
@ -50,18 +50,13 @@ def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str:
context = skyvern_context.ensure_context()
# FIXME: Theoretically, all href links with over 69(64+1+4) length could be hashed
# but currently, just hash length>150 links to confirm the solution goes well
if "href" in attributes and len(attributes.get("href", "")) > 150:
href = attributes.get("href", "")
# jinja style can't accept the variable name starts with number
# adding "_" to make sure the variable name is valid.
hashed_href = "_" + calculate_sha256(href)
context.hashed_href_map[hashed_href] = href
attributes["href"] = "{{" + hashed_href + "}}"
if need_skyvern_attrs:
# adding the node attribute to attributes
for attr in ELEMENT_NODE_ATTRIBUTES:
value = element.get(attr)
if value is None:
@ -70,6 +65,92 @@ def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str:
attributes_html = " ".join(build_attribute(key, value) for key, value in attributes.items())
if element.get("isSelectable", False):
tag = "select"
text = element.get("text", "")
children_html = "".join(
_json_to_html_legacy(child, need_skyvern_attrs=need_skyvern_attrs) for child in element.get("children", [])
)
option_html = "".join(
f'<option index="{option.get("optionIndex")}">{option.get("text")}</option>'
if option.get("text")
else f'<option index="{option.get("optionIndex")}" value="{option.get("value")}">{option.get("text")}</option>'
for option in element.get("options", [])
)
if element.get("purgeable", False):
return children_html + option_html
before_pseudo_text = element.get("beforePseudoText") or ""
after_pseudo_text = element.get("afterPseudoText") or ""
if (
tag in ["img", "input", "br", "hr", "meta", "link"]
and not option_html
and not children_html
and not before_pseudo_text
and not after_pseudo_text
):
return f"<{tag} {attributes_html}/>" if attributes_html else f"<{tag}/>"
return (
(f"<{tag} {attributes_html}>" if attributes_html else f"<{tag}>")
+ before_pseudo_text
+ text
+ children_html
+ option_html
+ after_pseudo_text
+ f"</{tag}>"
)
def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str:
"""
if element is flagged as dropped, the html format is empty
"""
if not settings.ENABLE_DOM_PARSER_V2:
return _json_to_html_legacy(element, need_skyvern_attrs)
tag = element["tagName"]
original_attrs = element.get("attributes", {})
interactable = element.get("interactable", False)
if element.get("isDropped", False):
if not interactable:
return ""
else:
LOG.debug("Element is interactable. Trimmed all attributes instead of dropping it", element=element)
original_attrs = {}
context = skyvern_context.ensure_context()
# Only shallow-copy attributes when we actually need to mutate them.
# This avoids dict() allocation on every element — most elements don't need it.
attributes = original_attrs
href_val = original_attrs.get("href", "")
if href_val and len(href_val) > 150:
attributes = dict(original_attrs)
# jinja style can't accept the variable name starts with number
# adding "_" to make sure the variable name is valid.
hashed_href = "_" + calculate_sha256(href_val)
context.hashed_href_map[hashed_href] = href_val
attributes["href"] = "{{" + hashed_href + "}}"
if need_skyvern_attrs:
# adding the node attribute to attributes
has_skyvern_attrs = any(element.get(attr) is not None for attr in ELEMENT_NODE_ATTRIBUTES)
if has_skyvern_attrs:
if attributes is original_attrs:
attributes = dict(original_attrs)
for attr in ELEMENT_NODE_ATTRIBUTES:
value = element.get(attr)
if value is not None:
attributes[attr] = value
attributes_html = " ".join(build_attribute(key, value) for key, value in attributes.items())
if element.get("isSelectable", False):
tag = "select"
@ -275,25 +356,45 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
raise UnknownElementTreeFormat(fmt=fmt)
def _process_element_for_economy_tree(self, element: dict) -> dict | None:
"""
Helper method to process an element for the economy tree using BFS.
Removes SVG elements and their children.
"""
# Skip SVG elements entirely
@staticmethod
def _process_element_for_economy_tree_legacy(element: dict) -> dict | None:
"""Legacy: recursive SVG removal (pre-V2)."""
if element.get("tagName", "").lower() == "svg":
return None
# Process children using BFS
if "children" in element:
new_children = []
for child in element["children"]:
processed_child = self._process_element_for_economy_tree(child)
processed_child = ScrapedPage._process_element_for_economy_tree_legacy(child)
if processed_child:
new_children.append(processed_child)
element["children"] = new_children
return element
@staticmethod
def _process_element_for_economy_tree(element: dict) -> dict | None:
"""
Process an element for the economy tree. V2 uses iterative BFS.
Removes SVG elements and their children.
"""
if not settings.ENABLE_DOM_PARSER_V2:
return ScrapedPage._process_element_for_economy_tree_legacy(element)
if element.get("tagName", "").lower() == "svg":
return None
# BFS to filter SVG children at every level
queue: deque[dict] = deque([element])
while queue:
node = queue.popleft()
children = node.get("children")
if not children:
continue
filtered = [c for c in children if c.get("tagName", "").lower() != "svg"]
node["children"] = filtered
queue.extend(filtered)
return element
async def refresh(self, draw_boxes: bool = True, scroll: bool = True, max_retries: int = 0) -> Self:
refreshed_page = await self._browser_state.scrape_website(
url=self.url,

View file

@ -1,7 +1,8 @@
import asyncio
import copy
import json
from collections import defaultdict
import time
from collections import defaultdict, deque
import structlog
from playwright._impl._errors import TimeoutError
@ -109,6 +110,7 @@ def clean_element_before_hashing(element: dict) -> dict:
def hash_element(element: dict) -> str:
hash_ready_element = clean_element_before_hashing(element)
# Sort the keys to ensure consistent ordering
# NOTE: Do NOT change separators — hashes are persisted to the database for cached action matching.
element_string = json.dumps(hash_ready_element, sort_keys=True)
return calculate_sha256(element_string)
@ -131,7 +133,10 @@ def build_element_dict(
id_to_frame_dict[element_id] = element["frame"]
element_hash = hash_element(element)
id_to_element_hash[element_id] = element_hash
hash_to_element_ids[element_hash] = hash_to_element_ids.get(element_hash, []) + [element_id]
if SettingsManager.get_settings().ENABLE_DOM_PARSER_V2:
hash_to_element_ids.setdefault(element_hash, []).append(element_id)
else:
hash_to_element_ids[element_hash] = hash_to_element_ids.get(element_hash, []) + [element_id]
return id_to_css_dict, id_to_element_dict, id_to_frame_dict, id_to_element_hash, hash_to_element_ids
@ -296,6 +301,7 @@ async def scrape_web_unsafe(
# We check if the scroll_y_px_old is the same as scroll_y_px to determine if we have reached the end of the page.
# This also solves the issue where we can't scroll due to a popup.(e.g. geico first popup on the homepage after
# clicking start my quote)
scrape_start_time = time.time()
url = page.url
if url == "about:blank" and not support_empty_page:
# Allow scraping if the page has child frames with meaningful content
@ -309,23 +315,46 @@ async def scrape_web_unsafe(
frame_count=len(meaningful_frames),
)
t0 = time.time()
skyvern_frame = await SkyvernFrame.create_instance(page)
js_inject_time = time.time() - t0
await skyvern_frame.safe_wait_for_animation_end()
if wait_seconds > 0:
LOG.info(f"Waiting for {wait_seconds} seconds before scraping the website.", wait_seconds=wait_seconds)
await asyncio.sleep(wait_seconds)
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude, must_included_tags)
t0 = time.time()
elements, element_tree, main_frame_time, child_frames_time, num_child_frames = await get_interactable_element_tree(
page, scrape_exclude, must_included_tags
)
element_tree_time = time.time() - t0
if not elements and not support_empty_page:
LOG.warning("No elements found on the page, wait and retry")
await empty_page_retry_wait()
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude, must_included_tags)
t0 = time.time()
(
elements,
element_tree,
main_frame_time,
child_frames_time,
num_child_frames,
) = await get_interactable_element_tree(page, scrape_exclude, must_included_tags)
element_tree_time = time.time() - t0
element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree))
# Deep copy once for cleanup (mutates in-place), then copy the cleaned result for trim.
# The second copy of the cleaned tree is cheaper since cleanup may remove elements.
t0 = time.time()
element_tree_copy = copy.deepcopy(element_tree)
element_tree = await cleanup_element_tree(page, url, element_tree_copy)
cleanup_time = time.time() - t0
t0 = time.time()
element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree))
trim_time = time.time() - t0
screenshots = []
screenshot_time = 0.0
if take_screenshots:
element_tree_trimmed_html_str = "".join(
json_to_html(element, need_skyvern_attrs=False) for element in element_tree_trimmed
@ -343,6 +372,7 @@ async def scrape_web_unsafe(
except Exception:
LOG.warning("Failed to get current x, y position of the page", exc_info=True)
t0 = time.time()
screenshots = await SkyvernFrame.take_split_screenshots(
page=page,
url=url,
@ -350,21 +380,26 @@ async def scrape_web_unsafe(
max_number=max_screenshot_number,
scroll=scroll,
)
screenshot_time = time.time() - t0
# scroll back to the original x, y position of the page
if x is not None and y is not None:
await skyvern_frame.safe_scroll_to_x_y(x, y)
LOG.debug("Scrolled back to the original x, y position of the page after scraping", x=x, y=y)
t0 = time.time()
id_to_css_dict, id_to_element_dict, id_to_frame_dict, id_to_element_hash, hash_to_element_ids = build_element_dict(
elements
)
build_dict_time = time.time() - t0
# if there are no elements, fail the scraping unless support_empty_page is True
if not elements and not support_empty_page:
raise NoElementFound()
t0 = time.time()
text_content = await get_frame_text(page.main_frame)
text_extraction_time = time.time() - t0
html = ""
window_dimension = None
@ -380,6 +415,25 @@ async def scrape_web_unsafe(
exc_info=True,
)
total_scrape_time = time.time() - scrape_start_time
LOG.debug(
"Scraping performance metrics",
url=url,
total_scrape_time=total_scrape_time,
js_inject_time=js_inject_time,
element_tree_time=element_tree_time,
main_frame_time=main_frame_time,
child_frames_time=child_frames_time,
num_child_frames=num_child_frames,
cleanup_time=cleanup_time,
trim_time=trim_time,
screenshot_time=screenshot_time,
build_dict_time=build_dict_time,
text_extraction_time=text_extraction_time,
num_elements=len(elements),
num_screenshots=len(screenshots),
)
return ScrapedPage(
elements=elements,
id_to_css_dict=id_to_css_dict,
@ -480,17 +534,20 @@ async def get_interactable_element_tree(
page: Page,
scrape_exclude: ScrapeExcludeFunc | None = None,
must_included_tags: list[str] | None = None,
) -> tuple[list[dict], list[dict]]:
) -> tuple[list[dict], list[dict], float, float, int]:
"""
Get the element tree of the page, including all the elements that are interactable.
:param page: Page instance to get the element tree from.
:return: Tuple containing the element tree and a map of element IDs to elements.
:return: Tuple of (elements, element_tree, main_frame_time, child_frames_time, num_child_frames).
"""
# main page index is 0
skyvern_page = await SkyvernFrame.create_instance(page)
t0 = time.time()
elements, element_tree = await skyvern_page.build_tree_from_body(
frame_name="main.frame", frame_index=0, must_included_tags=must_included_tags
)
main_frame_time = time.time() - t0
context = skyvern_context.ensure_context()
frames = await get_all_children_frames(page)
@ -502,6 +559,7 @@ async def get_interactable_element_tree(
frame_index = len(context.frame_index_map) + 1
context.frame_index_map[frame] = frame_index
t0 = time.time()
for frame in frames:
frame_index = context.frame_index_map[frame]
elements, element_tree = await add_frame_interactable_elements(
@ -511,8 +569,9 @@ async def get_interactable_element_tree(
element_tree,
must_included_tags,
)
child_frames_time = time.time() - t0
return elements, element_tree
return elements, element_tree, main_frame_time, child_frames_time, len(frames)
class IncrementalScrapePage(ElementTreeBuilder):
@ -557,7 +616,9 @@ class IncrementalScrapePage(ElementTreeBuilder):
self.elements = incremental_elements
incremental_tree = await cleanup_element_tree(frame, frame.url, copy.deepcopy(incremental_tree))
incremental_tree_copy = copy.deepcopy(incremental_tree)
incremental_tree = await cleanup_element_tree(frame, frame.url, incremental_tree_copy)
# Second copy of cleaned tree is cheaper since cleanup may have removed elements
trimmed_element_tree = trim_element_tree(copy.deepcopy(incremental_tree))
self.element_tree = incremental_tree
@ -699,7 +760,8 @@ def _should_keep_unique_id(element: dict) -> bool:
return element.get("interactable", False)
def trim_element(element: dict) -> dict:
def _trim_element_legacy(element: dict) -> dict:
"""Legacy: list-based BFS with two-pass attribute filtering (pre-V2)."""
queue = [element]
while queue:
queue_ele = queue.pop(0)
@ -725,7 +787,6 @@ def trim_element(element: dict) -> dict:
queue_ele["attributes"] = new_attributes
else:
del queue_ele["attributes"]
# remove the tag, don't need it in the HTML tree
if "keepAllAttr" in queue_ele:
del queue_ele["keepAllAttr"]
@ -754,6 +815,53 @@ def trim_element(element: dict) -> dict:
return element
def trim_element(element: dict) -> dict:
if not SettingsManager.get_settings().ENABLE_DOM_PARSER_V2:
return _trim_element_legacy(element)
queue: deque = deque([element])
while queue:
queue_ele = queue.popleft()
queue_ele.pop("frame", None)
queue_ele.pop("frame_index", None)
if "id" in queue_ele and not _should_keep_unique_id(queue_ele):
del queue_ele["id"]
# Single-pass attribute filtering: remove base64 data AND whitelist in one pass
keep_all = queue_ele.get("keepAllAttr", False)
attributes = queue_ele.get("attributes")
if attributes:
new_attributes = _filter_attributes(attributes, keep_all)
if new_attributes:
queue_ele["attributes"] = new_attributes
else:
queue_ele.pop("attributes", None)
queue_ele.pop("keepAllAttr", None)
children = queue_ele.get("children")
if children:
queue.extend(children)
else:
queue_ele.pop("children", None)
text = queue_ele.get("text")
if text is not None and not str(text).strip():
del queue_ele["text"]
# Remove empty pseudo text fields using pop-and-reinsert pattern to avoid double lookup
before = queue_ele.pop("beforePseudoText", None)
if before:
queue_ele["beforePseudoText"] = before
after = queue_ele.pop("afterPseudoText", None)
if after:
queue_ele["afterPseudoText"] = after
return element
def trim_element_tree(elements: list[dict]) -> list[dict]:
for element in elements:
trim_element(element)
@ -783,6 +891,24 @@ def _trimmed_attributes(attributes: dict) -> dict:
return new_attributes
def _filter_attributes(attributes: dict, keep_all: bool) -> dict:
"""Single-pass attribute filtering: removes base64 data, applies whitelist, and truncates long names."""
new_attributes: dict = {}
for key, value in attributes.items():
# Skip base64 data URIs
if key in BASE64_INCLUDE_ATTRIBUTES and isinstance(value, str) and "data:" in value:
continue
# Apply whitelist (unless keepAllAttr is True)
if keep_all or key in RESERVED_ATTRIBUTES:
# Truncate long name attributes in the same pass
if key == "name" and isinstance(value, str) and len(value) > 500:
value = value[:500]
new_attributes[key] = value
elif key == "role" and value in ("listbox", "option"):
new_attributes[key] = value
return new_attributes
def _remove_unique_id(element: dict) -> None:
if "attributes" not in element:
return

View file

@ -20,12 +20,9 @@ from skyvern.forge.sdk.trace import traced
LOG = structlog.get_logger()
def load_js_script() -> str:
# TODO: Handle file location better. This is a hacky way to find the file location.
path = f"{SKYVERN_DIR}/webeye/scraper/domUtils.js"
def load_js_script(filename: str = "domUtils.js") -> str:
path = f"{SKYVERN_DIR}/webeye/scraper/{filename}"
try:
# TODO: Implement TS of domUtils.js and use the complied JS file instead of the raw JS file.
# This will allow our code to be type safe.
with open(path, encoding="utf-8") as f:
return f.read()
except FileNotFoundError as e:
@ -33,7 +30,8 @@ def load_js_script() -> str:
raise e
JS_FUNCTION_DEFS = load_js_script()
JS_FUNCTION_DEFS = load_js_script("domUtils.js")
JS_FUNCTION_DEFS_LEGACY = load_js_script("domUtils_legacy.js")
class ScreenshotMode(StrEnum):
@ -379,8 +377,17 @@ class SkyvernFrame:
@classmethod
async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame:
instance = cls(frame=frame)
await cls.evaluate(frame=instance.frame, expression=JS_FUNCTION_DEFS)
if SettingsManager.get_settings().ENABLE_EXP_ALL_TEXTUAL_ELEMENTS_INTERACTABLE:
settings = SettingsManager.get_settings()
js_to_inject = JS_FUNCTION_DEFS if settings.ENABLE_DOM_PARSER_V2 else JS_FUNCTION_DEFS_LEGACY
t0 = time.time()
await cls.evaluate(frame=instance.frame, expression=js_to_inject)
js_inject_elapsed = time.time() - t0
LOG.debug(
"domUtils.js injection time",
elapsed_time=js_inject_elapsed,
dom_parser_v2=settings.ENABLE_DOM_PARSER_V2,
)
if settings.ENABLE_EXP_ALL_TEXTUAL_ELEMENTS_INTERACTABLE:
await instance.evaluate(
frame=instance.frame, expression="() => window.GlobalEnableAllTextualElements = true"
)
@ -543,12 +550,23 @@ class SkyvernFrame:
) -> tuple[list[dict], list[dict]]:
must_included_tags = must_included_tags or []
js_script = "async ([frame_name, frame_index, must_included_tags]) => await buildTreeFromBody(frame_name, frame_index, must_included_tags)"
return await self.evaluate(
t0 = time.time()
result = await self.evaluate(
frame=self.frame,
expression=js_script,
timeout_ms=timeout_ms,
arg=[frame_name, frame_index, must_included_tags],
)
elapsed = time.time() - t0
num_elements = len(result[0]) if result and len(result) > 0 else 0
LOG.debug(
"buildTreeFromBody JS execution time",
frame_name=frame_name,
frame_index=frame_index,
elapsed_time=elapsed,
num_elements=num_elements,
)
return result
@traced()
async def get_incremental_element_tree(

View file

@ -0,0 +1,46 @@
from __future__ import annotations
import logging
from types import SimpleNamespace
import skyvern._cli_bootstrap as cli_bootstrap
def test_bootstrap_defaults_to_warning_without_explicit_log_level(monkeypatch) -> None:
setup_calls: list[str] = []
fake_settings = SimpleNamespace(LOG_LEVEL="INFO", model_fields_set=set())
monkeypatch.setattr("skyvern.config.settings", fake_settings)
monkeypatch.setattr("skyvern.forge.sdk.forge_log.setup_logger", lambda: setup_calls.append("called"))
logger_names = ("", "skyvern", "httpx", "litellm", "playwright", "httpcore")
previous_levels = {name: logging.getLogger(name).level for name in logger_names}
try:
cli_bootstrap.configure_cli_bootstrap_logging()
assert setup_calls == ["called"]
assert fake_settings.LOG_LEVEL == "WARNING"
for name in logger_names:
assert logging.getLogger(name).level == logging.WARNING
finally:
for name, level in previous_levels.items():
logging.getLogger(name).setLevel(level)
def test_bootstrap_honors_explicit_log_level(monkeypatch) -> None:
setup_calls: list[str] = []
fake_settings = SimpleNamespace(LOG_LEVEL="DEBUG", model_fields_set={"LOG_LEVEL"})
monkeypatch.setattr("skyvern.config.settings", fake_settings)
monkeypatch.setattr("skyvern.forge.sdk.forge_log.setup_logger", lambda: setup_calls.append("called"))
logger_names = ("", "skyvern", "httpx", "litellm", "playwright", "httpcore")
previous_levels = {name: logging.getLogger(name).level for name in logger_names}
try:
cli_bootstrap.configure_cli_bootstrap_logging()
assert setup_calls == ["called"]
assert fake_settings.LOG_LEVEL == "DEBUG"
for name in logger_names:
assert logging.getLogger(name).level == logging.DEBUG
finally:
for name, level in previous_levels.items():
logging.getLogger(name).setLevel(level)

View file

@ -452,6 +452,32 @@ class TestWorkflowCommands:
assert parsed["ok"] is False
assert parsed["error"]["code"] == "RUN_NOT_FOUND"
def test_workflow_status_cli_preserves_full_detail_behavior(
self, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture
) -> None:
from skyvern.cli import workflow as workflow_cmd
tool = AsyncMock(
return_value={
"ok": True,
"action": "skyvern_workflow_status",
"browser_context": {"mode": "none", "session_id": None, "cdp_url": None},
"data": {"run_id": "wr_123", "recording_url": "https://example.com/recording"},
"artifacts": [],
"timing_ms": {},
"warnings": [],
"error": None,
}
)
monkeypatch.setattr(workflow_cmd, "tool_workflow_status", tool)
workflow_cmd.workflow_status(run_id="wr_123", json_output=True)
assert tool.await_args.kwargs == {"run_id": "wr_123", "verbosity": "full"}
parsed = json.loads(capsys.readouterr().out)
assert parsed["ok"] is True
assert parsed["data"]["recording_url"] == "https://example.com/recording"
def test_workflow_update_missing_definition_file_raises_bad_parameter(
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:

View file

@ -1,28 +1,17 @@
from __future__ import annotations
import logging
import skyvern.cli.commands as cli_commands
def test_configure_cli_logging_is_idempotent(monkeypatch) -> None:
setup_calls: list[str] = []
monkeypatch.setattr(cli_commands, "_setup_logger", lambda: setup_calls.append("called"))
monkeypatch.setattr(cli_commands, "_configure_cli_bootstrap_logging", lambda: setup_calls.append("called"))
monkeypatch.setattr(cli_commands, "_cli_logging_configured", False)
cli_commands.configure_cli_logging()
assert setup_calls == ["called"]
logger_names = ("skyvern", "httpx", "litellm", "playwright", "httpcore")
previous_levels = {name: logging.getLogger(name).level for name in logger_names}
try:
cli_commands.configure_cli_logging()
assert setup_calls == ["called"]
for name in logger_names:
assert logging.getLogger(name).level == logging.WARNING
cli_commands.configure_cli_logging()
assert setup_calls == ["called"]
finally:
for name, level in previous_levels.items():
logging.getLogger(name).setLevel(level)
cli_commands.configure_cli_logging()
assert setup_calls == ["called"]
def test_cli_callback_configures_logging(monkeypatch) -> None:

View file

@ -0,0 +1,255 @@
"""
Playwright-based benchmark for JavaScript DOM parsing (domUtils.js).
Tests run a real browser to measure actual buildTreeFromBody() performance
on a synthetic HTML page with many elements.
These tests require playwright to be installed with browser binaries.
They are automatically skipped if the browser binary is missing.
Run `playwright install chromium` to enable these tests.
"""
import pytest
from skyvern.webeye.scraper.scraper import JS_FUNCTION_DEFS
# Only run if playwright is available
playwright = pytest.importorskip("playwright")
def _launch_browser():
"""Try to launch a Chromium browser. Returns (browser, playwright_instance) or raises."""
from playwright.sync_api import sync_playwright
pw = sync_playwright().start()
try:
browser = pw.chromium.launch(headless=True)
return browser, pw
except Exception:
pw.stop()
raise
@pytest.fixture(scope="module")
def browser_context():
"""Launch a browser for JS benchmarks. Skips if browser binary is missing."""
try:
browser, pw = _launch_browser()
except Exception as e:
pytest.skip(f"Playwright browser not available: {e}")
context = browser.new_context()
yield context
context.close()
browser.close()
pw.stop()
def _generate_test_html(num_elements: int) -> str:
"""Generate an HTML page with the specified number of elements."""
elements = []
for i in range(num_elements):
tag = ["div", "span", "button", "input", "a"][i % 5]
if tag == "input":
elements.append(
f'<{tag} type="text" name="field_{i}" placeholder="Field {i}" '
f'class="form-control" aria-label="Field {i}">'
)
elif tag == "a":
elements.append(
f'<{tag} href="https://example.com/{i}" class="link-{i}" aria-label="Link {i}">Link text {i}</{tag}>'
)
elif tag == "button":
elements.append(f'<{tag} type="button" class="btn btn-{i}" aria-label="Button {i}">Click {i}</{tag}>')
else:
elements.append(f'<{tag} class="element-{i}" style="cursor: pointer;">Text content {i}</{tag}>')
body_content = "\n".join(elements)
return f"""
<!DOCTYPE html>
<html>
<head><title>Benchmark Page</title></head>
<body>
<div id="main-container">
{body_content}
</div>
</body>
</html>
"""
class TestDomUtilsJsBenchmark:
"""Benchmark buildTreeFromBody() in a real browser."""
@pytest.mark.parametrize("num_elements", [500, 2000, 5000])
def test_build_tree_performance(self, browser_context, num_elements: int):
page = browser_context.new_page()
html = _generate_test_html(num_elements)
page.set_content(html)
# Load domUtils.js
page.evaluate(JS_FUNCTION_DEFS)
# Benchmark buildTreeFromBody
result = page.evaluate("""
async () => {
const start = performance.now();
const [elements, tree] = await buildTreeFromBody('main.frame', 0);
const elapsed = performance.now() - start;
return {
elapsed_ms: elapsed,
element_count: elements.length,
tree_root_count: tree.length,
};
}
""")
elapsed_ms = result["elapsed_ms"]
element_count = result["element_count"]
tree_root_count = result["tree_root_count"]
print(f"\nbuildTreeFromBody({num_elements} DOM nodes):")
print(f" Time: {elapsed_ms:.1f}ms")
print(f" Elements found: {element_count}")
print(f" Tree roots: {tree_root_count}")
# Should complete within reasonable time
assert elapsed_ms < 10000, f"buildTreeFromBody took {elapsed_ms:.1f}ms for {num_elements} elements"
assert element_count > 0, "Should find at least some elements"
page.close()
def test_hover_styles_map_performance(self, browser_context):
"""Benchmark getHoverStylesMap() with many stylesheets."""
page = browser_context.new_page()
# Create a page with many CSS rules
css_rules = "\n".join([f".element-{i}:hover {{ cursor: pointer; background: #{i:06x}; }}" for i in range(500)])
html = f"""
<!DOCTYPE html>
<html>
<head>
<style>{css_rules}</style>
<style>
.non-hover-1 {{ color: red; }}
.non-hover-2 {{ color: blue; }}
</style>
</head>
<body><div>Test</div></body>
</html>
"""
page.set_content(html)
page.evaluate(JS_FUNCTION_DEFS)
result = page.evaluate("""
async () => {
const start = performance.now();
const hoverMap = await getHoverStylesMap();
const elapsed = performance.now() - start;
return {
elapsed_ms: elapsed,
hover_selectors: hoverMap.size,
};
}
""")
elapsed_ms = result["elapsed_ms"]
hover_selectors = result["hover_selectors"]
print("\ngetHoverStylesMap (500 hover rules + 2 non-hover rules):")
print(f" Time: {elapsed_ms:.1f}ms")
print(f" Hover selectors found: {hover_selectors}")
assert elapsed_ms < 5000, f"getHoverStylesMap took {elapsed_ms:.1f}ms"
assert hover_selectors > 0, "Should find hover selectors"
page.close()
def test_draw_bounding_boxes_performance(self, browser_context):
"""Benchmark drawBoundingBoxes() with visual grouping."""
page = browser_context.new_page()
html = _generate_test_html(1000)
page.set_content(html)
page.evaluate(JS_FUNCTION_DEFS)
result = page.evaluate("""
async () => {
// First build the tree (populates cache)
await buildTreeFromBody('main.frame', 0);
const start = performance.now();
await buildElementsAndDrawBoundingBoxes('main.frame', 0);
const elapsed = performance.now() - start;
// Count bounding boxes added
const container = document.querySelector('#boundingBoxContainer');
const boxCount = container ? container.children.length : 0;
return {
elapsed_ms: elapsed,
box_count: boxCount,
};
}
""")
elapsed_ms = result["elapsed_ms"]
box_count = result["box_count"]
print("\nbuildElementsAndDrawBoundingBoxes (1000 elements):")
print(f" Time: {elapsed_ms:.1f}ms")
print(f" Bounding boxes: {box_count}")
assert elapsed_ms < 10000, f"drawBoundingBoxes took {elapsed_ms:.1f}ms"
page.close()
def test_tree_correctness(self, browser_context):
"""Verify the tree structure is correct after optimizations."""
page = browser_context.new_page()
html = """
<!DOCTYPE html>
<html>
<body>
<div id="parent">
<button id="btn1">Click me</button>
<input type="text" name="field1" placeholder="Enter text">
<a href="https://example.com">Link</a>
<select name="dropdown">
<option value="a">Option A</option>
<option value="b">Option B</option>
</select>
</div>
</body>
</html>
"""
page.set_content(html)
page.evaluate(JS_FUNCTION_DEFS)
result = page.evaluate("""
async () => {
const [elements, tree] = await buildTreeFromBody('main.frame', 0);
return {
element_count: elements.length,
tree_root_count: tree.length,
// Check that interactable elements were found
interactable_count: elements.filter(e => e.interactable).length,
// Check a button was found
has_button: elements.some(e => e.tagName === 'button'),
// Check an input was found
has_input: elements.some(e => e.tagName === 'input'),
// Check a link was found
has_link: elements.some(e => e.tagName === 'a'),
// Check select was found with options
has_select: elements.some(e => e.tagName === 'select' && e.options && e.options.length === 2),
};
}
""")
assert result["element_count"] > 0
assert result["interactable_count"] >= 4 # button, input, a, select
assert result["has_button"]
assert result["has_input"]
assert result["has_link"]
assert result["has_select"]
page.close()

View file

@ -0,0 +1,405 @@
"""
Benchmark and regression tests for DOM parser pipeline performance.
Tests the Python-side processing: trim_element_tree, _filter_attributes,
json_to_html, and cleanup traversal patterns.
"""
import copy
import time
from collections import deque
import pytest
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
from skyvern.webeye.scraper.scraped_page import ScrapedPage, json_to_html
from skyvern.webeye.scraper.scraper import (
_filter_attributes,
_trimmed_attributes,
_trimmed_base64_data,
build_element_dict,
trim_element,
trim_element_tree,
)
@pytest.fixture(autouse=True)
def _setup_skyvern_context():
"""Ensure a SkyvernContext exists for tests that call json_to_html."""
ctx = SkyvernContext()
skyvern_context.set(ctx)
yield
skyvern_context.reset()
def _make_element(
element_id: str,
tag: str = "div",
interactable: bool = True,
text: str = "sample text",
num_attrs: int = 10,
children: list | None = None,
) -> dict:
"""Generate a realistic element dict for testing."""
attrs = {
"class": f"cls-{element_id}",
"id": f"html-id-{element_id}",
"data-testid": f"test-{element_id}",
"style": "display: flex; align-items: center;",
"aria-label": f"Element {element_id}",
"role": "button",
"type": "button",
"name": f"name-{element_id}",
"value": f"val-{element_id}",
"placeholder": "Enter value...",
}
# Add extra non-reserved attributes to test filtering
for i in range(max(0, num_attrs - len(attrs))):
attrs[f"data-extra-{i}"] = f"extra-value-{i}"
return {
"id": element_id,
"tagName": tag,
"interactable": interactable,
"text": text,
"attributes": attrs,
"children": children or [],
"frame": "main.frame",
"frame_index": 0,
"keepAllAttr": False,
"beforePseudoText": "",
"afterPseudoText": "",
"purgeable": False,
"rect": {"top": 0, "left": 0, "bottom": 100, "right": 200, "width": 200, "height": 100},
}
def _make_element_tree(num_elements: int, depth: int = 3) -> list[dict]:
"""Generate a tree of elements for benchmarking."""
element_counter = 0
children_per_node = max(1, num_elements // (depth + 1))
def build_level(current_depth: int, remaining: int) -> list[dict]:
nonlocal element_counter
level_elements = []
while remaining > 0 and element_counter < num_elements:
element_counter += 1
children = []
if current_depth < depth and remaining > 1:
child_count = min(children_per_node, remaining - 1)
children = build_level(current_depth + 1, child_count)
remaining -= len(children)
el = _make_element(
element_id=f"el_{element_counter:04d}",
tag=["div", "span", "button", "input", "a"][element_counter % 5],
interactable=element_counter % 3 == 0,
text=f"Text content for element {element_counter}" if element_counter % 2 == 0 else "",
children=children,
)
level_elements.append(el)
remaining -= 1
return level_elements
return build_level(0, num_elements)
class TestFilterAttributesMerged:
"""Test that the new merged _filter_attributes produces the same output as the two-pass approach."""
def test_basic_whitelist(self):
attrs = {"class": "foo", "aria-label": "bar", "name": "baz", "data-x": "y"}
result = _filter_attributes(attrs, keep_all=False)
assert "aria-label" in result
assert "name" in result
assert "class" not in result
assert "data-x" not in result
def test_base64_removal(self):
attrs = {"href": "data:image/png;base64,abc123", "name": "test", "src": "data:text/html;base64,xyz"}
result = _filter_attributes(attrs, keep_all=False)
assert "href" not in result
assert "src" not in result
assert result["name"] == "test"
def test_keep_all_attr(self):
attrs = {"class": "foo", "aria-label": "bar", "data-x": "y"}
result = _filter_attributes(attrs, keep_all=True)
# keepAllAttr=True should keep everything except base64
assert "class" in result
assert "data-x" in result
def test_role_listbox_option(self):
attrs = {"role": "listbox", "class": "foo"}
result = _filter_attributes(attrs, keep_all=False)
assert result["role"] == "listbox"
assert "class" not in result
def test_name_truncation_in_filter(self):
"""Name truncation should happen inside _filter_attributes (single pass)."""
attrs = {"name": "x" * 1000, "aria-label": "test"}
result = _filter_attributes(attrs, keep_all=False)
assert len(result["name"]) == 500
assert result["aria-label"] == "test"
def test_equivalence_with_old_approach(self):
"""The new _filter_attributes should produce the same result as the old two-pass approach."""
attrs = {
"class": "foo",
"aria-label": "bar",
"name": "baz",
"href": "data:image/png;base64,abc",
"src": "https://example.com/image.png",
"role": "option",
"data-x": "y",
"type": "button",
}
# Old approach: two passes
old_pass1 = _trimmed_base64_data(attrs)
old_result = _trimmed_attributes(old_pass1)
# New approach: single pass
new_result = _filter_attributes(attrs, keep_all=False)
assert old_result == new_result
class TestTrimElement:
"""Test trim_element correctness after optimization."""
def test_removes_frame_fields(self):
el = _make_element("test1")
trim_element(el)
assert "frame" not in el
assert "frame_index" not in el
def test_removes_keep_all_attr(self):
el = _make_element("test2")
trim_element(el)
assert "keepAllAttr" not in el
def test_removes_empty_text(self):
el = _make_element("test3", text="")
trim_element(el)
assert "text" not in el
def test_removes_empty_pseudo_text(self):
el = _make_element("test4")
el["beforePseudoText"] = ""
el["afterPseudoText"] = ""
trim_element(el)
assert "beforePseudoText" not in el
assert "afterPseudoText" not in el
def test_keeps_interactable_id(self):
el = _make_element("test5", interactable=True)
trim_element(el)
assert "id" in el
def test_removes_non_interactable_id(self):
el = _make_element("test6", interactable=False)
el["attributes"].pop("disabled", None)
el["attributes"].pop("aria-disabled", None)
el["attributes"].pop("readonly", None)
el["attributes"].pop("aria-readonly", None)
el.pop("hoverOnly", None)
trim_element(el)
assert "id" not in el
def test_filters_attributes(self):
el = _make_element("test7")
trim_element(el)
attrs = el.get("attributes", {})
# Non-reserved attributes should be removed
assert "class" not in attrs
assert "data-testid" not in attrs
assert "style" not in attrs
# Reserved attributes should remain
assert "aria-label" in attrs
assert "type" in attrs
def test_truncates_long_name(self):
el = _make_element("test8")
el["attributes"]["name"] = "x" * 1000
trim_element(el)
assert len(el["attributes"]["name"]) == 500
def test_processes_children(self):
child = _make_element("child1")
parent = _make_element("parent1", children=[child])
trim_element(parent)
assert "frame" not in child
assert "keepAllAttr" not in child
class TestTrimElementTreePerformance:
"""Benchmark tests for trim_element_tree at various scales."""
@pytest.mark.parametrize("num_elements", [100, 1000, 5000])
def test_trim_performance(self, num_elements: int):
tree = _make_element_tree(num_elements)
tree_copy = copy.deepcopy(tree)
start = time.perf_counter()
trim_element_tree(tree_copy)
elapsed = time.perf_counter() - start
# Log timing for visibility
print(f"\ntrim_element_tree({num_elements} elements): {elapsed:.4f}s")
# Should complete in reasonable time (< 1s for 5000 elements)
assert elapsed < 2.0, f"trim_element_tree took too long: {elapsed:.4f}s for {num_elements} elements"
class TestJsonToHtmlPerformance:
"""Benchmark json_to_html at various scales."""
@pytest.mark.parametrize("num_elements", [100, 1000, 5000])
def test_json_to_html_performance(self, num_elements: int):
tree = _make_element_tree(num_elements)
# Trim first (like the real pipeline)
tree = trim_element_tree(copy.deepcopy(tree))
start = time.perf_counter()
result = "".join(json_to_html(element) for element in tree)
elapsed = time.perf_counter() - start
print(f"\njson_to_html({num_elements} elements): {elapsed:.4f}s, output: {len(result)} chars")
assert elapsed < 2.0, f"json_to_html took too long: {elapsed:.4f}s for {num_elements} elements"
assert len(result) > 0
def test_json_to_html_correctness(self):
"""Verify basic HTML output structure."""
el = {
"tagName": "button",
"id": "btn1",
"interactable": True,
"text": "Click me",
"attributes": {"type": "submit", "aria-label": "Submit"},
"children": [],
}
html = json_to_html(el)
assert "<button" in html
assert "Click me" in html
assert 'type="submit"' in html
assert "</button>" in html
class TestDequeVsList:
"""Verify deque.popleft() is faster than list.pop(0) for BFS."""
def test_deque_faster_than_list(self):
n = 10000
items = list(range(n))
# List pop(0)
lst = list(items)
start = time.perf_counter()
while lst:
lst.pop(0)
list_time = time.perf_counter() - start
# Deque popleft
dq = deque(items)
start = time.perf_counter()
while dq:
dq.popleft()
deque_time = time.perf_counter() - start
print(f"\nlist.pop(0): {list_time:.6f}s, deque.popleft(): {deque_time:.6f}s")
# deque should be significantly faster
assert deque_time < list_time
class TestBuildElementDict:
"""Test build_element_dict correctness and hash collision handling."""
def test_basic_dict_building(self):
elements = [
{"id": "e1", "tagName": "button", "frame": "main", "attributes": {"type": "submit"}},
{"id": "e2", "tagName": "input", "frame": "main", "attributes": {"name": "email"}},
]
css_dict, elem_dict, frame_dict, hash_dict, hash_to_ids = build_element_dict(elements)
assert "e1" in css_dict
assert "e2" in css_dict
assert elem_dict["e1"]["tagName"] == "button"
assert frame_dict["e1"] == "main"
assert "e1" in hash_dict
assert "e2" in hash_dict
def test_hash_collision_uses_append(self):
"""Verify that hash_to_element_ids uses list append (not concat) for collisions."""
# Two identical elements (same tag, same attrs) will have the same hash
el1 = {"id": "e1", "tagName": "div", "frame": "main", "attributes": {}}
el2 = {"id": "e2", "tagName": "div", "frame": "main", "attributes": {}}
_, _, _, hash_dict, hash_to_ids = build_element_dict([el1, el2])
h1 = hash_dict["e1"]
h2 = hash_dict["e2"]
assert h1 == h2, "Identical elements should produce the same hash"
assert hash_to_ids[h1] == ["e1", "e2"]
class TestEconomyTreeProcessing:
"""Test the economy tree SVG filtering logic."""
def test_filters_svg_root(self):
svg_el = {"tagName": "svg", "children": [{"tagName": "path"}]}
result = ScrapedPage._process_element_for_economy_tree(svg_el)
assert result is None
def test_filters_svg_children(self):
tree = {
"tagName": "div",
"children": [
{"tagName": "button", "children": []},
{"tagName": "svg", "children": [{"tagName": "path"}]},
{"tagName": "span", "children": []},
],
}
result = ScrapedPage._process_element_for_economy_tree(tree)
assert result is not None
children = result["children"]
assert len(children) == 2
assert children[0]["tagName"] == "button"
assert children[1]["tagName"] == "span"
def test_filters_nested_svg(self):
tree = {
"tagName": "div",
"children": [
{
"tagName": "span",
"children": [
{"tagName": "svg", "children": []},
{"tagName": "a", "children": []},
],
},
],
}
result = ScrapedPage._process_element_for_economy_tree(tree)
assert result is not None
inner = result["children"][0]
assert len(inner["children"]) == 1
assert inner["children"][0]["tagName"] == "a"
def test_filters_svg_case_insensitive(self):
"""SVG filtering is case-insensitive via .lower()."""
tree = {
"tagName": "div",
"children": [
{"tagName": "SVG", "children": []},
{"tagName": "Svg", "children": []},
{"tagName": "button", "children": []},
],
}
result = ScrapedPage._process_element_for_economy_tree(tree)
assert result is not None
assert len(result["children"]) == 1
assert result["children"][0]["tagName"] == "button"
def test_no_children(self):
el = {"tagName": "input"}
result = ScrapedPage._process_element_for_economy_tree(el)
assert result is not None
assert result["tagName"] == "input"

View file

@ -2,10 +2,12 @@
from __future__ import annotations
import json
from collections.abc import Iterator
import pytest
import skyvern.cli.mcp_tools.workflow as workflow_tools
from skyvern.cli.core.result import Artifact, BrowserContext, make_result, set_concise_responses
@ -183,3 +185,70 @@ def test_verbose_returns_all_fields() -> None:
assert result["data"]["resolved_selector"] is not None
assert result["artifacts"] == []
assert result["warnings"] == []
def test_workflow_status_summary_stays_bounded_for_heavy_payload() -> None:
long_url = "https://artifacts.skyvern.example/" + ("x" * 1450)
heavy_run = {
"workflow_run_id": "wr_heavy",
"status": "terminated",
"failure_reason": "Execution terminated after repeated navigation failures",
"workflow_title": "Heavy workflow",
"recording_url": long_url,
"screenshot_urls": [f"{long_url}-{idx}" for idx in range(6)],
"downloaded_files": [{"url": f"{long_url}-download", "filename": "case-export.csv"}],
"outputs": {
"collect_customer_data": {
"task_screenshot_artifact_ids": [f"art_task_{idx}" for idx in range(12)],
"workflow_screenshot_artifact_ids": [f"art_workflow_{idx}" for idx in range(12)],
"task_screenshots": [f"{long_url}-task-{idx}" for idx in range(4)],
"workflow_screenshots": [f"{long_url}-workflow-{idx}" for idx in range(4)],
"extracted_information": [{"account_id": "acct_123", "status": "terminated"}],
},
"submit_case": [
{
"task_screenshot_artifact_ids": [f"art_nested_{idx}" for idx in range(6)],
"workflow_screenshot_artifact_ids": [f"art_followup_{idx}" for idx in range(6)],
"task_screenshots": [f"{long_url}-nested-task-{idx}" for idx in range(3)],
"workflow_screenshots": [f"{long_url}-nested-workflow-{idx}" for idx in range(4)],
}
],
"extracted_information": [{"duplicated_rollup": True}],
},
"run_with": "code",
}
full_payload = workflow_tools._serialize_run_full(heavy_run)
summary_payload = workflow_tools._serialize_run_summary(heavy_run)
assert len(json.dumps(full_payload)) > 20_000
assert len(json.dumps(summary_payload)) < 2_000
result = make_result("skyvern_workflow_status", data=summary_payload)
assert len(json.dumps(result)) < 2_200
assert "recording_url" not in result["data"]
assert "output" not in result["data"]
assert result["data"]["artifact_summary"]["artifact_id_count"] == 36
def test_workflow_status_summary_shrinks_simple_payload() -> None:
long_url = "https://artifacts.skyvern.example/" + ("y" * 1450)
simple_run = {
"workflow_run_id": "wr_simple",
"status": "completed",
"workflow_title": "Simple workflow",
"recording_url": long_url,
"screenshot_urls": [f"{long_url}-shot"],
"outputs": {
"result": "success",
"order_id": "ord_123",
},
"run_with": "code",
}
full_payload = workflow_tools._serialize_run_full(simple_run)
summary_payload = workflow_tools._serialize_run_summary(simple_run)
assert len(json.dumps(full_payload)) > 1_500
assert len(json.dumps(summary_payload)) < 800
assert summary_payload["output_summary"]["scalar_preview"] == {"result": "success", "order_id": "ord_123"}

View file

@ -6,8 +6,10 @@ from types import SimpleNamespace
from unittest.mock import AsyncMock, patch
import pytest
from fastmcp import Client
import skyvern.cli.mcp_tools.workflow as workflow_tools
from skyvern.cli.mcp_tools import mcp
def _fake_workflow_response() -> SimpleNamespace:
@ -26,6 +28,64 @@ def _fake_workflow_response() -> SimpleNamespace:
)
def _fake_http_response(payload: dict[str, object], status_code: int = 200) -> SimpleNamespace:
return SimpleNamespace(
status_code=status_code,
json=lambda: payload,
text=json.dumps(payload),
)
def _heavy_workflow_run_payload(*, include_expanded_outputs: bool = True) -> dict[str, object]:
long_url = "https://artifacts.skyvern.example/" + ("x" * 1450)
screenshot_urls = [f"{long_url}-{idx}" for idx in range(6)]
task_artifact_ids = [f"art_task_{idx}" for idx in range(12)]
workflow_artifact_ids = [f"art_workflow_{idx}" for idx in range(12)]
outputs: dict[str, object] = {
"collect_customer_data": {
"task_screenshot_artifact_ids": task_artifact_ids,
"workflow_screenshot_artifact_ids": workflow_artifact_ids,
"extracted_information": [{"account_id": "acct_123", "status": "terminated"}],
"status": "terminated",
},
"submit_case": [
{
"workflow_screenshot_artifact_ids": [f"art_followup_{idx}" for idx in range(6)],
"task_screenshot_artifact_ids": [f"art_nested_{idx}" for idx in range(6)],
"result": "retry_required",
}
],
}
if include_expanded_outputs:
outputs["collect_customer_data"] |= {
"task_screenshots": screenshot_urls[:4],
"workflow_screenshots": screenshot_urls[2:6],
}
outputs["submit_case"][0] |= {
"task_screenshots": screenshot_urls[:3],
"workflow_screenshots": screenshot_urls[1:5],
}
outputs["extracted_information"] = [{"duplicated_rollup": True}]
return {
"workflow_id": "wpid_heavy",
"workflow_run_id": "wr_heavy",
"status": "terminated",
"failure_reason": "Execution terminated after repeated navigation failures",
"workflow_title": "Heavy workflow",
"recording_url": long_url,
"screenshot_urls": screenshot_urls,
"downloaded_files": [
{
"url": f"{long_url}-download",
"filename": "case-export.csv",
}
],
"outputs": outputs,
"run_with": "code",
}
@pytest.mark.asyncio
async def test_workflow_create_normalizes_invalid_text_prompt_llm_key(monkeypatch: pytest.MonkeyPatch) -> None:
fake_client = SimpleNamespace(create_workflow=AsyncMock(return_value=_fake_workflow_response()))
@ -439,3 +499,134 @@ async def test_mcp_text_prompt_without_llm_key_stays_null(monkeypatch: pytest.Mo
assert result["ok"] is True
assert sent_block.llm_key is None
assert sent_block.model is None
@pytest.mark.asyncio
async def test_workflow_status_uses_workflow_run_route_for_wr_ids(monkeypatch: pytest.MonkeyPatch) -> None:
payload = _heavy_workflow_run_payload(include_expanded_outputs=False)
request = AsyncMock(return_value=_fake_http_response(payload))
fake_client = SimpleNamespace(
get_run=AsyncMock(),
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request)),
)
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
result = await workflow_tools.skyvern_workflow_status(run_id="wr_heavy")
assert result["ok"] is True
request.assert_awaited_once_with(
"api/v1/workflows/runs/wr_heavy",
method="GET",
params={"include_output_details": False},
)
fake_client.get_run.assert_not_awaited()
data = result["data"]
assert data["run_id"] == "wr_heavy"
assert data["run_type"] == "workflow_run"
assert "recording_url" not in data
assert "output" not in data
assert data["artifact_summary"]["recording_available"] is True
assert data["artifact_summary"]["artifact_id_count"] == 36
assert data["output_summary"]["nested_screenshot_count"] == 0
assert data["output_summary"]["has_extracted_information"] is True
@pytest.mark.asyncio
async def test_workflow_status_full_preserves_expanded_workflow_details(monkeypatch: pytest.MonkeyPatch) -> None:
payload = _heavy_workflow_run_payload(include_expanded_outputs=True)
request = AsyncMock(return_value=_fake_http_response(payload))
fake_client = SimpleNamespace(
get_run=AsyncMock(),
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request)),
)
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
result = await workflow_tools.skyvern_workflow_status(run_id="wr_heavy", verbosity="full")
assert result["ok"] is True
request.assert_awaited_once_with(
"api/v1/workflows/runs/wr_heavy",
method="GET",
params={"include_output_details": True},
)
data = result["data"]
assert data["run_id"] == "wr_heavy"
assert data["recording_url"] == payload["recording_url"]
assert data["output"] == payload["outputs"]
assert data["workflow_title"] == "Heavy workflow"
@pytest.mark.asyncio
async def test_workflow_status_task_runs_still_use_get_run(monkeypatch: pytest.MonkeyPatch) -> None:
task_run = SimpleNamespace(
run_id="tsk_v2_123",
status="completed",
run_type="task_v2",
output={"answer": "42"},
failure_reason=None,
step_count=4,
recording_url=None,
app_url=None,
browser_session_id=None,
run_with=None,
created_at=None,
modified_at=None,
started_at=None,
finished_at=None,
queued_at=None,
)
fake_client = SimpleNamespace(get_run=AsyncMock(return_value=task_run))
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
result = await workflow_tools.skyvern_workflow_status(run_id="tsk_v2_123")
fake_client.get_run.assert_awaited_once_with("tsk_v2_123")
data = result["data"]
assert data["run_id"] == "tsk_v2_123"
assert data["step_count"] == 4
assert data["output_summary"]["scalar_preview"] == {"answer": "42"}
@pytest.mark.asyncio
async def test_workflow_status_summary_via_mcp_client(monkeypatch: pytest.MonkeyPatch) -> None:
payload = _heavy_workflow_run_payload(include_expanded_outputs=False)
request = AsyncMock(return_value=_fake_http_response(payload))
fake_client = SimpleNamespace(
get_run=AsyncMock(),
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request)),
)
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
async with Client(mcp) as client:
result = await client.call_tool("skyvern_workflow_status", {"run_id": "wr_heavy"})
assert result.is_error is False
assert isinstance(result.data, dict)
assert result.data["ok"] is True
data = result.data["data"]
assert data["run_id"] == "wr_heavy"
assert data["artifact_summary"]["artifact_id_count"] == 36
assert "recording_url" not in data
assert "output" not in data
@pytest.mark.asyncio
async def test_workflow_status_full_via_mcp_client(monkeypatch: pytest.MonkeyPatch) -> None:
payload = _heavy_workflow_run_payload(include_expanded_outputs=True)
request = AsyncMock(return_value=_fake_http_response(payload))
fake_client = SimpleNamespace(
get_run=AsyncMock(),
_client_wrapper=SimpleNamespace(httpx_client=SimpleNamespace(request=request)),
)
monkeypatch.setattr(workflow_tools, "get_skyvern", lambda: fake_client)
async with Client(mcp) as client:
result = await client.call_tool("skyvern_workflow_status", {"run_id": "wr_heavy", "verbosity": "full"})
assert result.is_error is False
assert isinstance(result.data, dict)
assert result.data["ok"] is True
data = result.data["data"]
assert data["run_id"] == "wr_heavy"
assert data["recording_url"] == payload["recording_url"]
assert data["output"] == payload["outputs"]