Skyvern/skyvern/utils/css_selector.py
pedrohsdb 42c7348ef9
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run
fix: add settle delay before page.complete() verification + action timestamps (#5400)
2026-04-06 17:16:51 -07:00

199 lines
7.1 KiB
Python

"""Compute stable CSS selectors from element data for the script reviewer.
When the AI agent successfully interacts with an element during fallback,
we capture the element's attributes. This module computes a robust CSS
selector from those attributes so the script reviewer can write code that
targets the same element without relying on ephemeral unique_ids.
"""
from __future__ import annotations
import re
from typing import TYPE_CHECKING
from urllib.parse import urlparse
if TYPE_CHECKING:
from skyvern.webeye.actions.actions import Action
def compute_stable_selector(element_data: dict | None) -> str | None:
"""Derive a stable CSS selector from a scraped element dictionary.
Priority order (highest confidence first):
1. #id — unique by definition
2. [data-testid="..."] — stable test attribute
3. tag[aria-label="..."] — accessibility attribute
4. tag[name="..."] — form element name
5. tag[placeholder="..."] — visible hint text
6. tag:has-text("...") — visible text content
7. tag[role="..."] — ARIA role
Returns None if no reliable selector can be built.
"""
if not element_data:
return None
tag = (element_data.get("tagName") or "").lower()
attrs = element_data.get("attributes") or {}
text = (element_data.get("text") or "").strip()
# 1. ID attribute (strongest — unique per page by spec)
elem_id = attrs.get("id", "").strip()
if elem_id and not _looks_dynamic(elem_id):
return f"#{_css_escape(elem_id)}"
# 2. data-testid (stable testing attribute)
testid = attrs.get("data-testid", "").strip()
if testid:
return f'[data-testid="{_css_escape_attr(testid)}"]'
# 3. aria-label + tag
aria = attrs.get("aria-label", "").strip()
if aria and tag:
return f'{tag}[aria-label="{_css_escape_attr(aria)}"]'
# 4. name + tag (for form elements)
name = attrs.get("name", "").strip()
if name and tag in ("input", "select", "textarea", "button"):
return f'{tag}[name="{_css_escape_attr(name)}"]'
# 5. placeholder + tag
placeholder = attrs.get("placeholder", "").strip()
if placeholder and tag in ("input", "textarea"):
return f'{tag}[placeholder="{_css_escape_attr(placeholder)}"]'
# 6. Visible text content + tag (for buttons, links)
if text and tag in ("button", "a") and len(text) <= 50:
# Use :has-text() which is a case-insensitive substring match
clean_text = text.replace("\n", " ").replace("\r", "").strip()
safe_text = _css_escape_attr(clean_text)
return f'{tag}:has-text("{safe_text}")'
# 7. role + tag
role = attrs.get("role", "").strip()
if role and tag:
return f'{tag}[role="{_css_escape_attr(role)}"]'
# 8. type + tag (for inputs — weak but better than nothing)
input_type = attrs.get("type", "").strip()
if input_type and tag == "input" and input_type not in ("text", "hidden"):
return f'input[type="{_css_escape_attr(input_type)}"]'
return None
def _looks_dynamic(value: str) -> bool:
"""Heuristic: IDs that are likely auto-generated and will change across runs."""
# Long hex strings (ember123, react-456, el_abc123def)
if re.search(r"[0-9a-f]{8,}", value, re.IGNORECASE):
return True
# Word-digit patterns (uid-1234, el_5678)
if re.search(r"^\w+[-_]\d{4,}$", value):
return True
return False
def _css_escape(s: str) -> str:
"""Escape a string for use as a CSS ID selector."""
return re.sub(r'([!"#$%&\'()*+,./:;<=>?@[\\\]^`{|}~])', r"\\\1", s)
def _css_escape_attr(s: str) -> str:
"""Escape a string for use inside a CSS attribute value."""
return s.replace("\\", "\\\\").replace('"', '\\"')
# ---------------------------------------------------------------------------
# Action summary builder (shared between workflow service & script service)
# ---------------------------------------------------------------------------
# Attributes safe to pass to the script reviewer (excludes noisy/dynamic attrs)
REVIEWER_SAFE_ATTRS = frozenset(
{
"name",
"id",
"placeholder",
"aria-label",
"type",
"role",
"data-testid",
"data-test-id",
"data-cy",
"data-qa",
"href",
"for",
"alt",
"title",
"action",
"method",
"autocomplete",
"inputmode",
"pattern",
"maxlength",
"aria-describedby",
"aria-labelledby",
"aria-haspopup",
"value", # useful for pre-selected state
}
)
def build_action_summary(action: Action) -> dict:
"""Build a rich action summary dict for the script reviewer.
Includes a computed CSS selector suggestion so the reviewer can write
reliable selectors without guessing from sparse attributes.
Kept in this module (rather than workflow/service.py) so both the workflow
service and script_service can use it without circular imports.
"""
elem = action.skyvern_element_data or {}
attrs = elem.get("attributes") or {}
useful_attrs = {k: v for k, v in attrs.items() if k in REVIEWER_SAFE_ATTRS and v}
return {
"action_type": action.action_type,
"intention": action.intention,
"reasoning": action.reasoning,
"status": action.status,
# Strip query params from URL — they can contain OAuth tokens, email
# addresses, session IDs. The reviewer only needs host+path for redirect detection.
"page_url": (
f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
if (raw_url := elem.get("page_url")) and (parsed := urlparse(raw_url)).netloc
else None
),
"field": (action.input_or_select_context.field if action.input_or_select_context else None),
# Legacy: 6 core attributes (kept for backward compat with older templates)
"element_attributes": (
{k: v for k, v in attrs.items() if k in ("name", "id", "placeholder", "aria-label", "type", "role") and v}
if attrs
else None
),
# Element context for better selector generation
"element_tag": elem.get("tagName"),
"element_text": (elem.get("text") or "")[:100] or None,
"all_attributes": useful_attrs or None,
"css_suggestion": compute_stable_selector(elem),
}
def build_action_summaries_with_timing(actions: list[Action]) -> list[dict]:
"""Build action summaries with time deltas between consecutive actions.
Wraps build_action_summary and adds seconds_since_previous (the delta
in seconds from the previous action) for all actions with timestamps.
The Jinja template filters to only render deltas > 3 seconds.
"""
summaries = []
prev_ts = None
for a in actions[:20]:
summary = build_action_summary(a)
if a.created_at and prev_ts:
delta = (a.created_at - prev_ts).total_seconds()
summary["seconds_since_previous"] = round(max(delta, 0), 1)
if a.created_at:
prev_ts = a.created_at
summaries.append(summary)
return summaries