diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index 4bbded38a..16ac807c0 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -31,6 +31,7 @@ def _file_infos_from_download_artifacts(artifacts: list[Artifact]) -> list[FileI checksum=artifact.checksum, filename=filename, modified_at=artifact.created_at, + artifact_id=artifact.artifact_id, ) ) return infos diff --git a/skyvern/forge/sdk/schemas/files.py b/skyvern/forge/sdk/schemas/files.py index eaf96946d..9877da836 100644 --- a/skyvern/forge/sdk/schemas/files.py +++ b/skyvern/forge/sdk/schemas/files.py @@ -10,3 +10,8 @@ class FileInfo(BaseModel): checksum: str | None = Field(None, description="SHA-256 checksum of the file") filename: str | None = Field(None, description="Original filename") modified_at: datetime | None = Field(None, description="Modified time of the file") + # Optional: when the FileInfo is built from a DOWNLOAD Artifact row, the + # row's id is carried through so persisted snapshots (e.g. block outputs) + # can rebuild fresh signed URLs at API-fetch time even if the snapshot's + # ``url`` was minted before the artifact-first read existed. + artifact_id: str | None = Field(None, description="Artifact row id for refresh-on-read") diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index a3009bc95..3e7d62dbe 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -415,6 +415,11 @@ class TaskOutput(BaseModel): failure_category: list[dict[str, Any]] | None = None downloaded_files: list[FileInfo] | None = None downloaded_file_urls: list[str] | None = None # For backward compatibility + # Persisted artifact ids for refresh-on-read (mirrors task_screenshot_artifact_ids). + # When set, the workflow-run-status response rebuilds ``downloaded_files`` and + # ``downloaded_file_urls`` from these IDs every API fetch so a presigned URL + # captured at execution time never makes it to the client. + downloaded_file_artifact_ids: list[str] | None = None task_screenshots: list[str] | None = None workflow_screenshots: list[str] | None = None task_screenshot_artifact_ids: list[str] | None = None @@ -429,6 +434,10 @@ class TaskOutput(BaseModel): ) -> TaskOutput: # For backward compatibility, extract just the URLs from FileInfo objects downloaded_file_urls = [file_info.url for file_info in downloaded_files] if downloaded_files else None + # Carry artifact ids through so the API can rebuild fresh signed URLs. + downloaded_file_artifact_ids = ( + [fi.artifact_id for fi in downloaded_files if fi.artifact_id] if downloaded_files else None + ) or None return TaskOutput( task_id=task.task_id, @@ -439,6 +448,7 @@ class TaskOutput(BaseModel): failure_category=task.failure_category, downloaded_files=downloaded_files, downloaded_file_urls=downloaded_file_urls, + downloaded_file_artifact_ids=downloaded_file_artifact_ids, task_screenshot_artifact_ids=task_screenshot_artifact_ids, workflow_screenshot_artifact_ids=workflow_screenshot_artifact_ids, ) diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index ddd8d7395..1b7afc307 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -54,6 +54,7 @@ from skyvern.forge import app from skyvern.forge.failure_classifier import classify_from_failure_reason from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType +from skyvern.forge.sdk.artifact.storage.base import _file_infos_from_download_artifacts from skyvern.forge.sdk.cache import extraction_cache from skyvern.forge.sdk.cache.factory import CacheFactory from skyvern.forge.sdk.core import skyvern_context @@ -514,6 +515,28 @@ class WorkflowService: urls = await app.ARTIFACT_MANAGER.get_share_links_with_bundle_support(artifacts) return [u for u in urls if u is not None] + async def _file_infos_from_download_artifact_ids( + self, + artifact_ids: list[str], + organization_id: str | None, + ) -> list[FileInfo]: + """Rebuild ``FileInfo`` objects from DOWNLOAD artifact IDs. + + Used to refresh persisted block-output ``downloaded_files`` snapshots: + the URL captured at execution time may be a legacy presigned S3 URL, + but the artifact row has everything we need to mint a fresh signed + ``/v1/artifacts/{id}/content`` URL on each API fetch. + """ + if not artifact_ids or not organization_id: + return [] + artifacts = await app.DATABASE.artifacts.get_artifacts_by_ids(artifact_ids, organization_id) + if not artifacts: + return [] + # Preserve the input order so block outputs render files in save order. + by_id = {a.artifact_id: a for a in artifacts} + ordered = [by_id[aid] for aid in artifact_ids if aid in by_id] + return _file_infos_from_download_artifacts(ordered) + async def _refresh_output_screenshot_urls( self, value: Any, @@ -529,6 +552,11 @@ class WorkflowService: For backwards compatibility with old data that stored URLs directly (now expired), we also check for task_id and regenerate URLs using the task_id lookup. + + Also refreshes ``downloaded_files`` / ``downloaded_file_urls`` from + ``downloaded_file_artifact_ids`` so block outputs return short signed + ``/v1/artifacts/{id}/content`` URLs even when the URL captured at + execution time was a legacy presigned S3 URL. """ if isinstance(value, dict): # Check if this looks like a TaskOutput with screenshot artifact IDs (new format) @@ -548,6 +576,14 @@ class WorkflowService: value["workflow_screenshot_artifact_ids"], organization_id, ) + if value.get("downloaded_file_artifact_ids"): + refreshed = await self._file_infos_from_download_artifact_ids( + value["downloaded_file_artifact_ids"], + organization_id, + ) + if refreshed: + value["downloaded_files"] = [fi.model_dump(mode="json") for fi in refreshed] + value["downloaded_file_urls"] = [fi.url for fi in refreshed] elif has_old_format: # Old format (backwards compat): regenerate URLs using task_id lookup task_id = value.get("task_id")