Skyvern/skyvern/utils/css_selector.py

"""Compute stable CSS selectors from element data for the script reviewer.

When the AI agent successfully interacts with an element during fallback,
we capture the element's attributes. This module computes a robust CSS
selector from those attributes so the script reviewer can write code that
targets the same element without relying on ephemeral unique_ids.
"""

from __future__ import annotations

import re
from typing import TYPE_CHECKING
from urllib.parse import urlparse

if TYPE_CHECKING:
    from skyvern.webeye.actions.actions import Action


def compute_stable_selector(element_data: dict | None) -> str | None:
    """Derive a stable CSS selector from a scraped element dictionary.

    Priority order (highest confidence first):
      1. #id                        — unique by definition
      2. [data-testid="..."]        — stable test attribute
      3. tag[aria-label="..."]      — accessibility attribute
      4. tag[name="..."]            — form element name
      5. tag[placeholder="..."]     — visible hint text
      6. tag:has-text("...")        — visible text content
      7. tag[role="..."]           — ARIA role

    Returns None if no reliable selector can be built.
    """
    if not element_data:
        return None

    tag = (element_data.get("tagName") or "").lower()
    attrs = element_data.get("attributes") or {}
    text = (element_data.get("text") or "").strip()

    # 1. ID attribute (strongest — unique per page by spec)
    elem_id = attrs.get("id", "").strip()
    if elem_id and not _looks_dynamic(elem_id):
        return f"#{_css_escape(elem_id)}"

    # 2. data-testid (stable testing attribute)
    testid = attrs.get("data-testid", "").strip()
    if testid:
        return f'[data-testid="{_css_escape_attr(testid)}"]'

    # 3. aria-label + tag
    aria = attrs.get("aria-label", "").strip()
    if aria and tag:
        return f'{tag}[aria-label="{_css_escape_attr(aria)}"]'

    # 4. name + tag (for form elements)
    name = attrs.get("name", "").strip()
    if name and tag in ("input", "select", "textarea", "button"):
        return f'{tag}[name="{_css_escape_attr(name)}"]'

    # 5. placeholder + tag
    placeholder = attrs.get("placeholder", "").strip()
    if placeholder and tag in ("input", "textarea"):
        return f'{tag}[placeholder="{_css_escape_attr(placeholder)}"]'

    # 6. Visible text content + tag (for buttons, links)
    if text and tag in ("button", "a") and len(text) <= 50:
        # Use :has-text() which is a case-insensitive substring match
        clean_text = text.replace("\n", " ").replace("\r", "").strip()
        safe_text = _css_escape_attr(clean_text)
        return f'{tag}:has-text("{safe_text}")'

    # 7. role + tag
    role = attrs.get("role", "").strip()
    if role and tag:
        return f'{tag}[role="{_css_escape_attr(role)}"]'

    # 8. type + tag (for inputs — weak but better than nothing)
    input_type = attrs.get("type", "").strip()
    if input_type and tag == "input" and input_type not in ("text", "hidden"):
        return f'input[type="{_css_escape_attr(input_type)}"]'

    return None


def _looks_dynamic(value: str) -> bool:
    """Heuristic: IDs that are likely auto-generated and will change across runs."""
    # Long hex strings (ember123, react-456, el_abc123def)
    if re.search(r"[0-9a-f]{8,}", value, re.IGNORECASE):
        return True
    # Word-digit patterns (uid-1234, el_5678)
    if re.search(r"^\w+[-_]\d{4,}$", value):
        return True
    return False


def _css_escape(s: str) -> str:
    """Escape a string for use as a CSS ID selector."""
    return re.sub(r'([!"#$%&\'()*+,./:;<=>?@[\\\]^`{|}~])', r"\\\1", s)


def _css_escape_attr(s: str) -> str:
    """Escape a string for use inside a CSS attribute value."""
    return s.replace("\\", "\\\\").replace('"', '\\"')


# ---------------------------------------------------------------------------
# Action summary builder (shared between workflow service & script service)
# ---------------------------------------------------------------------------

# Attributes safe to pass to the script reviewer (excludes noisy/dynamic attrs)
REVIEWER_SAFE_ATTRS = frozenset(
    {
        "name",
        "id",
        "placeholder",
        "aria-label",
        "type",
        "role",
        "data-testid",
        "data-test-id",
        "data-cy",
        "data-qa",
        "href",
        "for",
        "alt",
        "title",
        "action",
        "method",
        "autocomplete",
        "inputmode",
        "pattern",
        "maxlength",
        "aria-describedby",
        "aria-labelledby",
        "aria-haspopup",
        "value",  # useful for pre-selected state
    }
)


def build_action_summary(action: Action) -> dict:
    """Build a rich action summary dict for the script reviewer.

    Includes a computed CSS selector suggestion so the reviewer can write
    reliable selectors without guessing from sparse attributes.

    Kept in this module (rather than workflow/service.py) so both the workflow
    service and script_service can use it without circular imports.
    """
    elem = action.skyvern_element_data or {}
    attrs = elem.get("attributes") or {}

    useful_attrs = {k: v for k, v in attrs.items() if k in REVIEWER_SAFE_ATTRS and v}

    return {
        "action_type": action.action_type,
        "intention": action.intention,
        "reasoning": action.reasoning,
        "status": action.status,
        # Strip query params from URL — they can contain OAuth tokens, email
        # addresses, session IDs. The reviewer only needs host+path for redirect detection.
        "page_url": (
            f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
            if (raw_url := elem.get("page_url")) and (parsed := urlparse(raw_url)).netloc
            else None
        ),
        "field": (action.input_or_select_context.field if action.input_or_select_context else None),
        # Legacy: 6 core attributes (kept for backward compat with older templates)
        "element_attributes": (
            {k: v for k, v in attrs.items() if k in ("name", "id", "placeholder", "aria-label", "type", "role") and v}
            if attrs
            else None
        ),
        # Element context for better selector generation
        "element_tag": elem.get("tagName"),
        "element_text": (elem.get("text") or "")[:100] or None,
        "all_attributes": useful_attrs or None,
        "css_suggestion": compute_stable_selector(elem),
    }


def build_action_summaries_with_timing(actions: list[Action]) -> list[dict]:
    """Build action summaries with time deltas between consecutive actions.

    Wraps build_action_summary and adds seconds_since_previous (the delta
    in seconds from the previous action) for all actions with timestamps.
    The Jinja template filters to only render deltas > 3 seconds.
    """
    summaries = []
    prev_ts = None
    for a in actions[:20]:
        summary = build_action_summary(a)
        if a.created_at and prev_ts:
            delta = (a.created_at - prev_ts).total_seconds()
            summary["seconds_since_previous"] = round(max(delta, 0), 1)
        if a.created_at:
            prev_ts = a.created_at
        summaries.append(summary)
    return summaries