Skyvern/tests/unit/test_taskv2_block_downloaded_files.py
Shuchang Zheng 76b10eb007
Fix OSS frontend build: add useFeatureFlag stub (#5042)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 23:36:42 -07:00

140 lines
5 KiB
Python

"""Tests for TaskV2Block downloaded_files output with loop-scoped filtering (SKY-7005)."""
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.workflow.loop_download_filter import filter_downloaded_files_for_current_iteration
def _file(url: str, filename: str, checksum: str) -> FileInfo:
return FileInfo(url=url, filename=filename, checksum=checksum)
def test_taskv2_output_includes_downloaded_files_filtered_by_loop() -> None:
"""When inside a loop, TaskV2Block output should include only THIS iteration's downloaded files."""
# Simulate baseline: iteration started with a.pdf already downloaded
loop_state = {
"downloaded_file_signatures_before_iteration": [
["a.pdf", "abc", "https://files/a.pdf"],
],
}
# Storage returns all files (a.pdf from before + b.pdf downloaded this iteration)
all_files = [
_file("https://files/a.pdf?sig=old", "a.pdf", "abc"),
_file("https://files/b.pdf?sig=new", "b.pdf", "def"),
]
# Apply the same filter that TaskV2Block now uses
filtered = filter_downloaded_files_for_current_iteration(all_files, loop_state)
# Build output dict the same way TaskV2Block does
task_v2_output = {
"task_id": "oc_test",
"status": "completed",
"summary": None,
"extracted_information": None,
"failure_reason": None,
"downloaded_files": [fi.model_dump() for fi in filtered],
"downloaded_file_urls": [fi.url for fi in filtered],
"task_screenshot_artifact_ids": [],
"workflow_screenshot_artifact_ids": [],
}
assert len(task_v2_output["downloaded_files"]) == 1
assert task_v2_output["downloaded_files"][0]["filename"] == "b.pdf"
assert task_v2_output["downloaded_file_urls"] == ["https://files/b.pdf?sig=new"]
def test_taskv2_output_includes_all_files_outside_loop() -> None:
"""Outside a loop (no loop_internal_state), all downloaded files should be included."""
all_files = [
_file("https://files/a.pdf", "a.pdf", "abc"),
_file("https://files/b.pdf", "b.pdf", "def"),
]
filtered = filter_downloaded_files_for_current_iteration(all_files, None)
task_v2_output = {
"downloaded_files": [fi.model_dump() for fi in filtered],
"downloaded_file_urls": [fi.url for fi in filtered],
}
assert len(task_v2_output["downloaded_files"]) == 2
assert task_v2_output["downloaded_file_urls"] == [
"https://files/a.pdf",
"https://files/b.pdf",
]
def test_taskv2_output_empty_when_no_new_downloads_in_iteration() -> None:
"""If no new files were downloaded in this iteration, both lists should be empty."""
loop_state = {
"downloaded_file_signatures_before_iteration": [
["a.pdf", "abc", "https://files/a.pdf"],
],
}
all_files = [
_file("https://files/a.pdf?sig=old", "a.pdf", "abc"),
]
filtered = filter_downloaded_files_for_current_iteration(all_files, loop_state)
task_v2_output = {
"downloaded_files": [fi.model_dump() for fi in filtered],
"downloaded_file_urls": [fi.url for fi in filtered],
}
assert task_v2_output["downloaded_files"] == []
assert task_v2_output["downloaded_file_urls"] == []
def test_taskv2_context_loop_state_available_after_task_execution() -> None:
"""Verify that loop_internal_state on the context survives TaskV2Block's context reset
and is available for the downloaded_files filtering step."""
loop_state = {
"downloaded_file_signatures_before_iteration": [
["a.pdf", "abc", "https://files/a.pdf"],
],
}
# Set context with loop state (simulating what ForLoopBlock does before executing child)
skyvern_context.set(
SkyvernContext(
organization_id="org_1",
workflow_run_id="wr_1",
run_id="wr_1",
loop_internal_state=loop_state,
)
)
# Simulate TaskV2Block's finally block preserving loop_internal_state
context = skyvern_context.current()
preserved_loop_state = context.loop_internal_state if context else None
skyvern_context.set(
SkyvernContext(
organization_id="org_1",
workflow_run_id="wr_1",
run_id=context.run_id if context else "wr_1",
loop_internal_state=preserved_loop_state,
)
)
# Now read context the way the new downloaded_files code does
current_context = skyvern_context.current()
assert current_context is not None
assert current_context.loop_internal_state == loop_state
# Filter should work correctly
all_files = [
_file("https://files/a.pdf?sig=old", "a.pdf", "abc"),
_file("https://files/b.pdf?sig=new", "b.pdf", "def"),
]
filtered = filter_downloaded_files_for_current_iteration(
all_files,
current_context.loop_internal_state,
)
assert [f.filename for f in filtered] == ["b.pdf"]
skyvern_context.reset()