fix: use native scrollIntoView to handle nested scroll containers (#5401)
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run

This commit is contained in:
LawyZheng 2026-04-07 14:15:23 +08:00 committed by GitHub
parent 42c7348ef9
commit bced984d4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 62 additions and 31 deletions

View file

@ -777,19 +777,18 @@ async def handle_click_action(
)
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
# Skip scroll_into_view when a page-level SCROLL just completed on THIS element.
# The scroll positioned the page at the bottom to enable T&C buttons;
# scroll_into_view() would use programmatic window.scroll() to center the
# element, moving the page away from the bottom and re-disabling the button.
# Skip scroll_into_view when a SCROLL action just completed on THIS element.
# The scroll may have positioned the page or a container at the bottom to enable
# T&C buttons; element.scrollIntoView() would undo that positioning.
# Uses element ID matching (not a boolean) so unrelated clicks aren't affected.
skip_scroll_into_view = await page.evaluate(
"(id) => { const v = window.__skyvernPageScrolledElementId;"
" window.__skyvernPageScrolledElementId = null; return v === id; }",
"(id) => { const v = window.__skyvernScrolledElementId;"
" window.__skyvernScrolledElementId = null; return v === id; }",
action.element_id,
)
if skip_scroll_into_view:
LOG.info(
"Skipping scroll_into_view after page-level scroll to preserve scroll position",
"Skipping scroll_into_view after deliberate scroll action to preserve scroll position",
element_id=skyvern_element.get_id(),
)
else:
@ -2328,17 +2327,28 @@ async def handle_scroll_action(
# Wait for page JS to process scroll events (e.g. enabling buttons)
await page.wait_for_timeout(500)
# Record which element was just page-level scrolled. The click handler
# Record which element was just deliberately scrolled. The click handler
# checks this to skip scroll_into_view() for the SAME element, which
# would use programmatic window.scroll() to center it — undoing the
# would use element.scrollIntoView() to center it — undoing the
# scroll position that enables buttons on T&C pages. Using the element
# ID (not a boolean) ensures unrelated clicks aren't affected.
await page.evaluate(
"(id) => { window.__skyvernPageScrolledElementId = id; }",
"(id) => { window.__skyvernScrolledElementId = id; }",
action.element_id,
)
return [ActionSuccess(data={"page_level_scroll": True})]
elif not scroll_result:
elif scroll_result:
# Sub-container was scrolled successfully. Record the element ID so
# the click handler skips scroll_into_view() for this element — same
# protection as page-level scrolls. Without this, element.scrollIntoView()
# would re-center the container and undo the deliberate scroll (e.g.,
# scrolling a T&C modal to the bottom to enable an accept button).
await page.evaluate(
"(id) => { window.__skyvernScrolledElementId = id; }",
action.element_id,
)
return [ActionSuccess(data={"container_scroll": True})]
else:
LOG.warning(
"Could not find scrollable container near element, falling back to mouse wheel",
element_id=action.element_id,

View file

@ -892,34 +892,49 @@ class SkyvernElement:
if not await self.is_visible():
return
# Step 1: Use native element.scrollIntoView() which handles both window scrolling
# AND nested scrollable containers (e.g., SPA app shells with overflow-y: auto).
# See SKY-8748 for the motivating case. Falls back to window-level scroll if the
# native call fails (e.g., detached elements).
try:
target_x: int | None = None
target_y: int | None = None
rect = await self.get_rect(timeout=timeout)
element_x: int | None = None
element_y: int | None = None
if rect is not None:
element_x = rect["x"] if rect["x"] > 0 else None
element_y = rect["y"] if rect["y"] > 0 else None
# calculating y to move the element to the middle of the viewport
if element_y is not None:
target_y = max(int(element_y - (settings.BROWSER_HEIGHT / 2)), 0)
if element_x is not None:
target_x = max(int(element_x - (settings.BROWSER_WIDTH / 2)), 0)
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
if target_x is not None and target_y is not None:
await skyvern_frame.safe_scroll_to_x_y(target_x, target_y)
element_handler = await self.get_element_handler(timeout=timeout)
await skyvern_frame.scroll_into_view(element_handler)
except Exception:
LOG.info(
"Failed to calculate the y to move the element to the middle of the viewport, ignore it",
"Failed to scrollIntoView via native JS, falling back to window scroll",
exc_info=True,
element_id=self.get_id(),
)
try:
target_x: int | None = None
target_y: int | None = None
rect = await self.get_rect(timeout=timeout)
element_x: int | None = None
element_y: int | None = None
if rect is not None:
element_x = rect["x"] if rect["x"] > 0 else None
element_y = rect["y"] if rect["y"] > 0 else None
if element_y is not None:
target_y = max(int(element_y - (settings.BROWSER_HEIGHT / 2)), 0)
if element_x is not None:
target_x = max(int(element_x - (settings.BROWSER_WIDTH / 2)), 0)
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
if target_x is not None and target_y is not None:
await skyvern_frame.safe_scroll_to_x_y(target_x, target_y)
except Exception:
LOG.info(
"Fallback window scroll also failed, ignoring",
exc_info=True,
element_id=self.get_id(),
)
# Step 2: Playwright actionability confirmation. After Step 1, the element should
# already be in the viewport so this check passes quickly.
try:
element_handler = await self.get_element_handler(timeout=timeout)
await element_handler.scroll_into_view_if_needed(timeout=timeout)

View file

@ -414,6 +414,12 @@ class SkyvernFrame:
except Exception:
LOG.warning("Failed to scroll to x, y, ignore it", x=x, y=y, exc_info=True)
async def scroll_into_view(self, element: ElementHandle) -> None:
"""Scroll all ancestor containers (including nested ones with overflow-y: auto)
so that the element is centered in the viewport."""
js_script = "(element) => element.scrollIntoView({block: 'center', inline: 'center', behavior: 'instant'})"
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
async def scroll_to_element_bottom(self, element: ElementHandle, page_by_page: bool = False) -> None:
js_script = "([element, page_by_page]) => scrollToElementBottom(element, page_by_page)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=[element, page_by_page])