Skyvern/tests/unit/test_forloop_child_caching_tracking.py

158 lines
6 KiB
Python

"""Tests that for-loop child blocks are tracked in blocks_to_update.
When a ForLoopBlock completes and its child blocks (e.g., file_download)
are not yet in script_blocks_by_label, they should be added to
blocks_to_update so the script generator produces cached functions for them.
This was the root cause of file_download blocks inside for-loops never
getting cached: they executed via block.py's execute_loop_helper() which
bypasses the blocks_to_update tracking in service.py's _execute_single_block().
"""
from datetime import datetime, timezone
from skyvern.forge.sdk.workflow.models.block import (
BlockType,
FileDownloadBlock,
ForLoopBlock,
)
from skyvern.forge.sdk.workflow.models.parameter import OutputParameter
from skyvern.forge.sdk.workflow.service import BLOCK_TYPES_THAT_SHOULD_BE_CACHED
def _make_output_param(label: str) -> OutputParameter:
now = datetime.now(tz=timezone.utc)
return OutputParameter(
key=f"{label}_output",
parameter_type="output",
output_parameter_id=f"op_{label}",
workflow_id="wf_test",
created_at=now,
modified_at=now,
)
def test_file_download_is_cacheable():
"""FILE_DOWNLOAD must be in BLOCK_TYPES_THAT_SHOULD_BE_CACHED."""
assert BlockType.FILE_DOWNLOAD in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
def test_forloop_child_labels_can_be_collected():
"""Verify we can iterate ForLoopBlock.loop_blocks to find uncached children."""
download_block = FileDownloadBlock(
label="download_file",
output_parameter=_make_output_param("download_file"),
url="http://example.com",
navigation_goal="Download the file",
)
loop_block = ForLoopBlock(
label="download_loop",
output_parameter=_make_output_param("download_loop"),
loop_blocks=[download_block],
)
# Simulate the tracking logic from service.py
script_blocks_by_label: dict[str, object] = {} # empty = nothing cached
blocks_to_update: set[str] = set()
for loop_child in loop_block.loop_blocks:
if (
loop_child.label
and loop_child.label not in script_blocks_by_label
and loop_child.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
):
blocks_to_update.add(loop_child.label)
assert "download_file" in blocks_to_update
def test_forloop_child_already_cached_not_tracked():
"""If the child block is already cached, it should NOT be added to blocks_to_update."""
download_block = FileDownloadBlock(
label="download_file",
output_parameter=_make_output_param("download_file"),
url="http://example.com",
navigation_goal="Download the file",
)
loop_block = ForLoopBlock(
label="download_loop",
output_parameter=_make_output_param("download_loop"),
loop_blocks=[download_block],
)
# Simulate: download_file is already in script_blocks_by_label
script_blocks_by_label = {"download_file": object()}
blocks_to_update: set[str] = set()
for loop_child in loop_block.loop_blocks:
if (
loop_child.label
and loop_child.label not in script_blocks_by_label
and loop_child.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
):
blocks_to_update.add(loop_child.label)
assert "download_file" not in blocks_to_update
class TestDownloadSelectorCascade:
"""Test the download_selector cascade strategies."""
def test_url_value_returns_href_selector(self):
"""URL values should produce a[href*=filename] selector."""
from unittest.mock import patch
from skyvern.core.script_generations.skyvern_page import RunContext
rc = RunContext.__new__(RunContext)
loop_val = {"title": "Report", "url": "https://example.com/files/report-2025.pdf"}
with patch.object(type(rc), "loop_value", new_callable=lambda: property(lambda self: loop_val)):
result = rc.download_selector()
assert result == 'a[href*="report-2025.pdf"]'
def test_text_value_returns_has_text_selector(self):
"""Non-URL text values should produce a:has-text selector with the longest text."""
from unittest.mock import patch
from skyvern.core.script_generations.skyvern_page import RunContext
rc = RunContext.__new__(RunContext)
loop_val = {"title": "Annual Report 2025", "unique_identifier": "DOC-2025-001"}
with patch.object(type(rc), "loop_value", new_callable=lambda: property(lambda self: loop_val)):
result = rc.download_selector()
assert result == 'a:has-text("Annual Report 2025")'
def test_no_value_returns_none(self):
"""Empty or None loop_value should return None."""
from unittest.mock import patch
from skyvern.core.script_generations.skyvern_page import RunContext
rc = RunContext.__new__(RunContext)
with patch.object(type(rc), "loop_value", new_callable=lambda: property(lambda self: None)):
result = rc.download_selector()
assert result is None
def test_short_text_skipped(self):
"""Very short text values (< 3 chars) should be skipped."""
from unittest.mock import patch
from skyvern.core.script_generations.skyvern_page import RunContext
rc = RunContext.__new__(RunContext)
loop_val = {"id": "AB"}
with patch.object(type(rc), "loop_value", new_callable=lambda: property(lambda self: loop_val)):
result = rc.download_selector()
assert result is None
def test_url_takes_priority_over_text(self):
"""When both URL and text values exist, URL-based selector wins."""
from unittest.mock import patch
from skyvern.core.script_generations.skyvern_page import RunContext
rc = RunContext.__new__(RunContext)
loop_val = {"title": "Annual Report", "download_url": "https://example.com/report.pdf"}
with patch.object(type(rc), "loop_value", new_callable=lambda: property(lambda self: loop_val)):
result = rc.download_selector()
assert result == 'a[href*="report.pdf"]'