🔄 synced local 'skyvern/' with remote 'skyvern/'

This commit is contained in:
Shuchang Zheng 2026-04-26 22:17:54 +00:00
parent 632462a9da
commit 6f09c387b6
4 changed files with 52 additions and 0 deletions

View file

@ -31,6 +31,7 @@ def _file_infos_from_download_artifacts(artifacts: list[Artifact]) -> list[FileI
checksum=artifact.checksum,
filename=filename,
modified_at=artifact.created_at,
artifact_id=artifact.artifact_id,
)
)
return infos

View file

@ -10,3 +10,8 @@ class FileInfo(BaseModel):
checksum: str | None = Field(None, description="SHA-256 checksum of the file")
filename: str | None = Field(None, description="Original filename")
modified_at: datetime | None = Field(None, description="Modified time of the file")
# Optional: when the FileInfo is built from a DOWNLOAD Artifact row, the
# row's id is carried through so persisted snapshots (e.g. block outputs)
# can rebuild fresh signed URLs at API-fetch time even if the snapshot's
# ``url`` was minted before the artifact-first read existed.
artifact_id: str | None = Field(None, description="Artifact row id for refresh-on-read")

View file

@ -415,6 +415,11 @@ class TaskOutput(BaseModel):
failure_category: list[dict[str, Any]] | None = None
downloaded_files: list[FileInfo] | None = None
downloaded_file_urls: list[str] | None = None # For backward compatibility
# Persisted artifact ids for refresh-on-read (mirrors task_screenshot_artifact_ids).
# When set, the workflow-run-status response rebuilds ``downloaded_files`` and
# ``downloaded_file_urls`` from these IDs every API fetch so a presigned URL
# captured at execution time never makes it to the client.
downloaded_file_artifact_ids: list[str] | None = None
task_screenshots: list[str] | None = None
workflow_screenshots: list[str] | None = None
task_screenshot_artifact_ids: list[str] | None = None
@ -429,6 +434,10 @@ class TaskOutput(BaseModel):
) -> TaskOutput:
# For backward compatibility, extract just the URLs from FileInfo objects
downloaded_file_urls = [file_info.url for file_info in downloaded_files] if downloaded_files else None
# Carry artifact ids through so the API can rebuild fresh signed URLs.
downloaded_file_artifact_ids = (
[fi.artifact_id for fi in downloaded_files if fi.artifact_id] if downloaded_files else None
) or None
return TaskOutput(
task_id=task.task_id,
@ -439,6 +448,7 @@ class TaskOutput(BaseModel):
failure_category=task.failure_category,
downloaded_files=downloaded_files,
downloaded_file_urls=downloaded_file_urls,
downloaded_file_artifact_ids=downloaded_file_artifact_ids,
task_screenshot_artifact_ids=task_screenshot_artifact_ids,
workflow_screenshot_artifact_ids=workflow_screenshot_artifact_ids,
)

View file

@ -54,6 +54,7 @@ from skyvern.forge import app
from skyvern.forge.failure_classifier import classify_from_failure_reason
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import _file_infos_from_download_artifacts
from skyvern.forge.sdk.cache import extraction_cache
from skyvern.forge.sdk.cache.factory import CacheFactory
from skyvern.forge.sdk.core import skyvern_context
@ -514,6 +515,28 @@ class WorkflowService:
urls = await app.ARTIFACT_MANAGER.get_share_links_with_bundle_support(artifacts)
return [u for u in urls if u is not None]
async def _file_infos_from_download_artifact_ids(
self,
artifact_ids: list[str],
organization_id: str | None,
) -> list[FileInfo]:
"""Rebuild ``FileInfo`` objects from DOWNLOAD artifact IDs.
Used to refresh persisted block-output ``downloaded_files`` snapshots:
the URL captured at execution time may be a legacy presigned S3 URL,
but the artifact row has everything we need to mint a fresh signed
``/v1/artifacts/{id}/content`` URL on each API fetch.
"""
if not artifact_ids or not organization_id:
return []
artifacts = await app.DATABASE.artifacts.get_artifacts_by_ids(artifact_ids, organization_id)
if not artifacts:
return []
# Preserve the input order so block outputs render files in save order.
by_id = {a.artifact_id: a for a in artifacts}
ordered = [by_id[aid] for aid in artifact_ids if aid in by_id]
return _file_infos_from_download_artifacts(ordered)
async def _refresh_output_screenshot_urls(
self,
value: Any,
@ -529,6 +552,11 @@ class WorkflowService:
For backwards compatibility with old data that stored URLs directly (now expired),
we also check for task_id and regenerate URLs using the task_id lookup.
Also refreshes ``downloaded_files`` / ``downloaded_file_urls`` from
``downloaded_file_artifact_ids`` so block outputs return short signed
``/v1/artifacts/{id}/content`` URLs even when the URL captured at
execution time was a legacy presigned S3 URL.
"""
if isinstance(value, dict):
# Check if this looks like a TaskOutput with screenshot artifact IDs (new format)
@ -548,6 +576,14 @@ class WorkflowService:
value["workflow_screenshot_artifact_ids"],
organization_id,
)
if value.get("downloaded_file_artifact_ids"):
refreshed = await self._file_infos_from_download_artifact_ids(
value["downloaded_file_artifact_ids"],
organization_id,
)
if refreshed:
value["downloaded_files"] = [fi.model_dump(mode="json") for fi in refreshed]
value["downloaded_file_urls"] = [fi.url for fi in refreshed]
elif has_old_format:
# Old format (backwards compat): regenerate URLs using task_id lookup
task_id = value.get("task_id")