mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
fix: fail immediately on non-retriable navigation errors (SKY-8875) (#4976)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
fbc8e2245d
commit
ef1665bfe4
8 changed files with 283 additions and 76 deletions
|
|
@ -16,6 +16,18 @@ DOWNLOAD_FILE_PREFIX = "downloads"
|
|||
SAVE_DOWNLOADED_FILES_TIMEOUT = 180
|
||||
GET_DOWNLOADED_FILES_TIMEOUT = 30
|
||||
NAVIGATION_MAX_RETRY_TIME = 5
|
||||
PERMANENT_NAV_ERRORS = ("net::ERR_INVALID_URL",)
|
||||
PROXY_SENSITIVE_NAV_ERRORS = (
|
||||
"net::ERR_NAME_NOT_RESOLVED",
|
||||
"net::ERR_NAME_RESOLUTION_FAILED",
|
||||
"net::ERR_CERT_",
|
||||
"net::ERR_SSL_",
|
||||
)
|
||||
# Errors that should not be retried within the same browser context/proxy.
|
||||
# The outer context-recreation retry in get_or_create_page may still attempt
|
||||
# recovery for proxy-sensitive errors by picking a different proxy node.
|
||||
SKIP_INNER_NAV_RETRY_ERRORS = PERMANENT_NAV_ERRORS + PROXY_SENSITIVE_NAV_ERRORS
|
||||
|
||||
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 3
|
||||
DROPDOWN_MENU_MAX_DISTANCE = 100
|
||||
BROWSER_DOWNLOADING_SUFFIX = ".crdownload"
|
||||
|
|
|
|||
|
|
@ -904,7 +904,9 @@ class BaseTaskBlock(Block):
|
|||
# self.block_type (which does not change between retries).
|
||||
warn_if_file_download_max_steps_low(self, workflow_run_id=workflow_run_id)
|
||||
_is_file_download = self.block_type == BlockType.FILE_DOWNLOAD
|
||||
_navigate_wait_until = "domcontentloaded" if _is_file_download else "load"
|
||||
_navigate_wait_until: Literal["load", "domcontentloaded", "commit"] = (
|
||||
"domcontentloaded" if _is_file_download else "load"
|
||||
)
|
||||
|
||||
# TODO (kerem) we should always retry on terminated. We should make a distinction between retriable and
|
||||
# non-retryable terminations
|
||||
|
|
|
|||
|
|
@ -1136,12 +1136,8 @@ def _parse_single_action(action_str: str) -> dict[str, Any] | None:
|
|||
# Get arguments
|
||||
action_inputs = {}
|
||||
for kw in call.keywords:
|
||||
if kw.arg and isinstance(kw.value, (ast.Constant, ast.Str)):
|
||||
if isinstance(kw.value, ast.Constant):
|
||||
value = kw.value.value
|
||||
else: # ast.Str for older Python versions
|
||||
value = kw.value.s
|
||||
action_inputs[kw.arg] = value
|
||||
if kw.arg and isinstance(kw.value, ast.Constant):
|
||||
action_inputs[kw.arg] = kw.value.value
|
||||
|
||||
return {
|
||||
"action_type": func_name,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
from typing import Literal, Protocol
|
||||
|
||||
from playwright.async_api import BrowserContext, Page, Playwright
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ class BrowserState(Protocol):
|
|||
page: Page,
|
||||
url: str,
|
||||
retry_times: int = NAVIGATION_MAX_RETRY_TIME,
|
||||
wait_until: str = "load",
|
||||
wait_until: Literal["load", "domcontentloaded", "commit"] = "load",
|
||||
) -> None: ...
|
||||
|
||||
async def get_or_create_page(
|
||||
|
|
|
|||
86
skyvern/webeye/navigation.py
Normal file
86
skyvern/webeye/navigation.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Literal
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.constants import PERMANENT_NAV_ERRORS, SKIP_INNER_NAV_RETRY_ERRORS
|
||||
from skyvern.exceptions import FailedToNavigateToUrl
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
NavigateFunc = Callable[[str], Awaitable[object]]
|
||||
SettleFunc = Callable[[], Awaitable[None]]
|
||||
SleepFunc = Callable[[float], Awaitable[None]]
|
||||
|
||||
# Progressive wait_until degradation. Degrading to `domcontentloaded` and
|
||||
# then `commit` lets navigation succeed once the DOM or response is ready.
|
||||
_DEGRADATION_MAP: dict[str, list[str]] = {
|
||||
"load": ["load", "domcontentloaded", "commit"],
|
||||
"domcontentloaded": ["domcontentloaded", "commit"],
|
||||
"commit": ["commit"],
|
||||
}
|
||||
|
||||
|
||||
def is_skip_inner_retry_error(error_message: str) -> bool:
|
||||
return any(pattern in error_message for pattern in SKIP_INNER_NAV_RETRY_ERRORS)
|
||||
|
||||
|
||||
def is_permanent_navigation_error(error_message: str) -> bool:
|
||||
return any(pattern in error_message for pattern in PERMANENT_NAV_ERRORS)
|
||||
|
||||
|
||||
async def navigate_with_retry(
|
||||
navigate: NavigateFunc,
|
||||
url: str,
|
||||
retry_times: int,
|
||||
settle: SettleFunc,
|
||||
wait_until: Literal["load", "domcontentloaded", "commit"] = "load",
|
||||
sleep: SleepFunc = asyncio.sleep,
|
||||
) -> None:
|
||||
degradation = _DEGRADATION_MAP.get(wait_until, [wait_until])
|
||||
|
||||
for attempt in range(retry_times):
|
||||
strategy = degradation[min(attempt, len(degradation) - 1)]
|
||||
LOG.info("Trying to navigate to url", url=url, retry_time=attempt, wait_until=strategy)
|
||||
try:
|
||||
start_time = time.monotonic()
|
||||
await navigate(strategy)
|
||||
elapsed = time.monotonic() - start_time
|
||||
LOG.info("Page loading time", loading_time=elapsed, url=url, wait_until=strategy)
|
||||
await settle()
|
||||
LOG.info("Successfully navigated to url", url=url, retry_time=attempt, wait_until=strategy)
|
||||
return
|
||||
|
||||
except Exception as error:
|
||||
error_str = str(error)
|
||||
|
||||
if is_skip_inner_retry_error(error_str):
|
||||
LOG.warning(
|
||||
"Non-retriable navigation error, failing immediately",
|
||||
url=url,
|
||||
error=error_str,
|
||||
)
|
||||
raise FailedToNavigateToUrl(url=url, error_message=error_str) from error
|
||||
|
||||
if attempt >= retry_times - 1:
|
||||
LOG.exception(
|
||||
"Failed to navigate after retries",
|
||||
url=url,
|
||||
retry_times=retry_times,
|
||||
error=error_str,
|
||||
)
|
||||
raise FailedToNavigateToUrl(url=url, error_message=error_str) from error
|
||||
|
||||
LOG.warning(
|
||||
"Error while navigating to url, retrying",
|
||||
exc_info=True,
|
||||
url=url,
|
||||
retry_time=attempt,
|
||||
wait_until=strategy,
|
||||
error=error_str,
|
||||
)
|
||||
await sleep(1)
|
||||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
import asyncio
|
||||
import random
|
||||
import time
|
||||
from typing import Literal
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import structlog
|
||||
|
|
@ -21,6 +22,7 @@ from skyvern.schemas.runs import ProxyLocationInput
|
|||
from skyvern.webeye.browser_artifacts import BrowserArtifacts, VideoArtifact
|
||||
from skyvern.webeye.browser_factory import BrowserCleanupFunc, BrowserContextFactory
|
||||
from skyvern.webeye.browser_state import BrowserState
|
||||
from skyvern.webeye.navigation import is_permanent_navigation_error, navigate_with_retry
|
||||
from skyvern.webeye.scraper import scraper
|
||||
from skyvern.webeye.scraper.scraped_page import CleanupElementTreeFunc, ScrapedPage, ScrapeExcludeFunc
|
||||
from skyvern.webeye.utils.page import ScreenshotMode, SkyvernFrame
|
||||
|
|
@ -135,71 +137,15 @@ class RealBrowserState(BrowserState):
|
|||
page: Page,
|
||||
url: str,
|
||||
retry_times: int = NAVIGATION_MAX_RETRY_TIME,
|
||||
wait_until: str = "load",
|
||||
wait_until: Literal["load", "domcontentloaded", "commit"] = "load",
|
||||
) -> None:
|
||||
# SKY-8818: progressive wait_until degradation. Many pages never fire
|
||||
# `load` because a subresource stalls; degrading to `domcontentloaded` and
|
||||
# then `commit` lets navigation succeed once the DOM or response is ready.
|
||||
# Monotonic: a retry never upgrades to a STRONGER wait state than the caller asked for.
|
||||
_degradation_map: dict[str, list[str]] = {
|
||||
"load": ["load", "domcontentloaded", "commit"],
|
||||
"domcontentloaded": ["domcontentloaded", "commit"],
|
||||
"commit": ["commit"],
|
||||
}
|
||||
degradation = _degradation_map.get(wait_until, [wait_until])
|
||||
|
||||
try:
|
||||
for retry_time in range(retry_times):
|
||||
strategy = degradation[min(retry_time, len(degradation) - 1)]
|
||||
LOG.info(
|
||||
"Trying to navigate to url",
|
||||
url=url,
|
||||
retry_time=retry_time,
|
||||
wait_until=strategy,
|
||||
)
|
||||
try:
|
||||
start_time = time.time()
|
||||
await page.goto(
|
||||
url,
|
||||
timeout=settings.BROWSER_LOADING_TIMEOUT_MS,
|
||||
wait_until=strategy,
|
||||
)
|
||||
end_time = time.time()
|
||||
LOG.info(
|
||||
"Page loading time",
|
||||
loading_time=end_time - start_time,
|
||||
url=url,
|
||||
wait_until=strategy,
|
||||
)
|
||||
await self._wait_for_settle()
|
||||
LOG.info(
|
||||
"Successfully navigated to url",
|
||||
url=url,
|
||||
retry_time=retry_time,
|
||||
wait_until=strategy,
|
||||
)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
if retry_time >= retry_times - 1:
|
||||
raise FailedToNavigateToUrl(url=url, error_message=str(e))
|
||||
|
||||
LOG.warning(
|
||||
f"Error while navigating to url: {str(e)}",
|
||||
exc_info=True,
|
||||
url=url,
|
||||
retry_time=retry_time,
|
||||
wait_until=strategy,
|
||||
)
|
||||
# Wait for 1 seconds before retrying
|
||||
await asyncio.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
LOG.exception(
|
||||
f"Failed to navigate to {url} after {retry_times} retries: {str(e)}",
|
||||
url=url,
|
||||
)
|
||||
raise e
|
||||
await navigate_with_retry(
|
||||
navigate=lambda strategy: page.goto(url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS, wait_until=strategy),
|
||||
url=url,
|
||||
retry_times=retry_times,
|
||||
settle=self._wait_for_settle,
|
||||
wait_until=wait_until,
|
||||
)
|
||||
|
||||
async def get_working_page(self) -> Page | None:
|
||||
# HACK: currently, assuming the last page is always the working page.
|
||||
|
|
@ -348,12 +294,14 @@ class RealBrowserState(BrowserState):
|
|||
browser_profile_id=browser_profile_id,
|
||||
)
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
error_message = e.error_message if isinstance(e, FailedToNavigateToUrl) else str(e)
|
||||
if is_permanent_navigation_error(error_message):
|
||||
raise
|
||||
if "net::ERR" not in error_message:
|
||||
raise e
|
||||
raise
|
||||
if not await self.close_current_open_page():
|
||||
LOG.warning("Failed to close the current open page")
|
||||
raise e
|
||||
raise
|
||||
await self.check_and_fix_state(
|
||||
url=url,
|
||||
proxy_location=proxy_location,
|
||||
|
|
|
|||
0
tests/unit_tests/webeye/__init__.py
Normal file
0
tests/unit_tests/webeye/__init__.py
Normal file
163
tests/unit_tests/webeye/test_navigation_retry.py
Normal file
163
tests/unit_tests/webeye/test_navigation_retry.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from skyvern.exceptions import FailedToNavigateToUrl
|
||||
from skyvern.webeye.navigation import navigate_with_retry
|
||||
from skyvern.webeye.real_browser_state import RealBrowserState
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"error_message",
|
||||
[
|
||||
pytest.param("net::ERR_NAME_NOT_RESOLVED", id="dns-not-resolved"),
|
||||
pytest.param("net::ERR_NAME_RESOLUTION_FAILED", id="dns-resolution-failed"),
|
||||
pytest.param("net::ERR_INVALID_URL", id="invalid-url"),
|
||||
pytest.param("net::ERR_CERT_AUTHORITY_INVALID", id="cert-authority-invalid"),
|
||||
pytest.param("net::ERR_CERT_DATE_INVALID", id="cert-date-invalid"),
|
||||
pytest.param("net::ERR_SSL_PROTOCOL_ERROR", id="ssl-protocol-error"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_skip_inner_retry_error_fails_immediately(error_message: str) -> None:
|
||||
page = AsyncMock()
|
||||
page.goto = AsyncMock(side_effect=Exception(error_message))
|
||||
settle = AsyncMock()
|
||||
sleep = AsyncMock()
|
||||
|
||||
with pytest.raises(FailedToNavigateToUrl):
|
||||
await navigate_with_retry(
|
||||
navigate=lambda strategy: page.goto("http://example.invalid", timeout=30000, wait_until=strategy),
|
||||
url="http://example.invalid",
|
||||
retry_times=5,
|
||||
settle=settle,
|
||||
sleep=sleep,
|
||||
)
|
||||
|
||||
assert page.goto.call_count == 1
|
||||
settle.assert_not_awaited()
|
||||
sleep.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"error_message, retry_times",
|
||||
[
|
||||
pytest.param("net::ERR_TIMED_OUT", 3, id="timeout"),
|
||||
pytest.param("net::ERR_CONNECTION_RESET", 2, id="connection-reset"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_retriable_error_exhausts_all_attempts(
|
||||
error_message: str,
|
||||
retry_times: int,
|
||||
) -> None:
|
||||
page = AsyncMock()
|
||||
page.goto = AsyncMock(side_effect=Exception(error_message))
|
||||
settle = AsyncMock()
|
||||
sleep = AsyncMock()
|
||||
|
||||
with pytest.raises(FailedToNavigateToUrl):
|
||||
await navigate_with_retry(
|
||||
navigate=lambda strategy: page.goto("http://example.com", timeout=30000, wait_until=strategy),
|
||||
url="http://example.com",
|
||||
retry_times=retry_times,
|
||||
settle=settle,
|
||||
sleep=sleep,
|
||||
)
|
||||
|
||||
assert page.goto.call_count == retry_times
|
||||
assert sleep.await_count == retry_times - 1
|
||||
settle.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transient_error_recovers_on_retry() -> None:
|
||||
page = AsyncMock()
|
||||
page.goto = AsyncMock(side_effect=[Exception("net::ERR_CONNECTION_RESET"), None])
|
||||
settle = AsyncMock()
|
||||
sleep = AsyncMock()
|
||||
|
||||
await navigate_with_retry(
|
||||
navigate=lambda strategy: page.goto("http://example.com", timeout=30000, wait_until=strategy),
|
||||
url="http://example.com",
|
||||
retry_times=3,
|
||||
settle=settle,
|
||||
sleep=sleep,
|
||||
)
|
||||
|
||||
assert page.goto.call_count == 2
|
||||
assert sleep.await_count == 1
|
||||
settle.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_or_create_page_does_not_retry_permanent_failed_navigation() -> None:
|
||||
browser_state = RealBrowserState(pw=AsyncMock())
|
||||
browser_state.get_working_page = AsyncMock(return_value=None)
|
||||
browser_state.check_and_fix_state = AsyncMock(
|
||||
side_effect=FailedToNavigateToUrl(
|
||||
url="http://example.invalid",
|
||||
error_message="net::ERR_INVALID_URL",
|
||||
)
|
||||
)
|
||||
browser_state.close_current_open_page = AsyncMock(return_value=True)
|
||||
|
||||
with pytest.raises(FailedToNavigateToUrl):
|
||||
await browser_state.get_or_create_page(url="http://example.invalid")
|
||||
|
||||
assert browser_state.check_and_fix_state.await_count == 1
|
||||
browser_state.close_current_open_page.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_or_create_page_retries_dns_error_with_context_recreation() -> None:
|
||||
page = AsyncMock()
|
||||
browser_state = RealBrowserState(pw=AsyncMock())
|
||||
browser_state.get_working_page = AsyncMock(side_effect=[None, page])
|
||||
browser_state.check_and_fix_state = AsyncMock(
|
||||
side_effect=[
|
||||
FailedToNavigateToUrl(
|
||||
url="http://example.com",
|
||||
error_message="net::ERR_NAME_NOT_RESOLVED",
|
||||
),
|
||||
None,
|
||||
]
|
||||
)
|
||||
browser_state.close_current_open_page = AsyncMock(return_value=True)
|
||||
browser_state.validate_browser_context = AsyncMock(return_value=True)
|
||||
browser_state._RealBrowserState__assert_page = AsyncMock(return_value=page)
|
||||
|
||||
result = await browser_state.get_or_create_page(url="http://example.com")
|
||||
|
||||
assert result is page
|
||||
assert browser_state.check_and_fix_state.await_count == 2
|
||||
browser_state.close_current_open_page.assert_awaited_once()
|
||||
browser_state.validate_browser_context.assert_awaited_once_with(page)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_or_create_page_retries_retriable_failed_navigation() -> None:
|
||||
page = AsyncMock()
|
||||
browser_state = RealBrowserState(pw=AsyncMock())
|
||||
browser_state.get_working_page = AsyncMock(side_effect=[None, page])
|
||||
browser_state.check_and_fix_state = AsyncMock(
|
||||
side_effect=[
|
||||
FailedToNavigateToUrl(
|
||||
url="http://example.com",
|
||||
error_message="net::ERR_CONNECTION_RESET",
|
||||
),
|
||||
None,
|
||||
]
|
||||
)
|
||||
browser_state.close_current_open_page = AsyncMock(return_value=True)
|
||||
browser_state.validate_browser_context = AsyncMock(return_value=True)
|
||||
browser_state._RealBrowserState__assert_page = AsyncMock(return_value=page)
|
||||
|
||||
result = await browser_state.get_or_create_page(url="http://example.com")
|
||||
|
||||
assert result is page
|
||||
assert browser_state.check_and_fix_state.await_count == 2
|
||||
browser_state.close_current_open_page.assert_awaited_once()
|
||||
browser_state.validate_browser_context.assert_awaited_once_with(page)
|
||||
Loading…
Add table
Add a link
Reference in a new issue