🔄 synced local 'skyvern/' with remote 'skyvern/'

This commit is contained in:
Shuchang Zheng 2026-04-27 15:42:13 +00:00
parent 9cca36204e
commit 6b3fdfd06d
11 changed files with 189 additions and 17 deletions

View file

@ -12,7 +12,12 @@ import structlog
from skyvern.config import settings
from skyvern.forge import app
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.artifact.signing import parse_keyring, sign_artifact_url
from skyvern.forge.sdk.artifact.signing import (
ARTIFACT_URL_EXPIRY_SECONDS,
effective_artifact_url_expiry_seconds,
parse_keyring,
sign_artifact_url,
)
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.db.id import generate_artifact_id
from skyvern.forge.sdk.db.models import ArtifactModel
@ -940,6 +945,7 @@ class ArtifactManager:
artifact_id: str,
artifact_name: str | None = None,
artifact_type: str | None = None,
expiry_seconds: int | None = None,
) -> str:
"""Return a signed ``/v1/artifacts/{id}/content`` URL for any artifact.
@ -947,18 +953,37 @@ class ArtifactManager:
``STORAGE.get_share_link``. This method always builds the Skyvern-origin
signed URL regardless of ``bundle_key`` used for DOWNLOAD artifacts
so webhook payloads stay short and clients hit our origin.
``expiry_seconds`` overrides the URL's TTL; when None, the global
default applies. Callers with an organization in scope should resolve
the per-org override via :meth:`resolve_artifact_url_expiry_seconds`
once and pass the result here.
"""
return self._bundle_content_url(
artifact_id=artifact_id,
artifact_name=artifact_name,
artifact_type=artifact_type,
expiry_seconds=expiry_seconds,
)
async def resolve_artifact_url_expiry_seconds(self, organization_id: str | None) -> int:
"""Look up the org's artifact-URL TTL override; fall back to the global default.
One DB hit per call typically resolved once per batch of URLs (e.g.
all downloads for a run) and passed into ``build_signed_content_url``.
"""
if organization_id is None:
return ARTIFACT_URL_EXPIRY_SECONDS
org = await app.DATABASE.organizations.get_organization(organization_id=organization_id)
per_org = org.artifact_url_expiry_seconds if org else None
return effective_artifact_url_expiry_seconds(per_org)
def _bundle_content_url(
self,
artifact_id: str,
artifact_name: str | None = None,
artifact_type: str | None = None,
expiry_seconds: int | None = None,
) -> str:
"""Return an absolute URL for a bundled artifact served via the content endpoint.
@ -978,6 +1003,7 @@ class ArtifactManager:
keyring=keyring,
artifact_name=artifact_name,
artifact_type=artifact_type,
expiry_seconds=expiry_seconds,
)
path = f"{base}/v1/artifacts/{artifact_id}/content"
extra: dict[str, str] = {}
@ -989,10 +1015,12 @@ class ArtifactManager:
async def get_share_link(self, artifact: Artifact) -> str | None:
if artifact.bundle_key:
expiry_seconds = await self.resolve_artifact_url_expiry_seconds(artifact.organization_id)
return self._bundle_content_url(
artifact.artifact_id,
artifact_name=artifact.bundle_key,
artifact_type=artifact.artifact_type,
expiry_seconds=expiry_seconds,
)
return await app.STORAGE.get_share_link(artifact)
@ -1011,12 +1039,19 @@ class ArtifactManager:
non_bundle_indices: list[int] = []
non_bundle_artifacts: list[Artifact] = []
# Resolve the per-org TTL once. All artifacts in a single batch share an
# org (callers always look up by run/workflow scope), so one DB hit
# covers every bundled URL we mint below.
organization_id = artifacts[0].organization_id if artifacts else None
bundled_expiry_seconds = await self.resolve_artifact_url_expiry_seconds(organization_id)
for i, artifact in enumerate(artifacts):
if artifact.bundle_key:
result[i] = self._bundle_content_url(
artifact.artifact_id,
artifact_name=artifact.bundle_key,
artifact_type=artifact.artifact_type,
expiry_seconds=bundled_expiry_seconds,
)
else:
non_bundle_indices.append(i)

View file

@ -31,10 +31,35 @@ from urllib.parse import urlencode
from pydantic import BaseModel, model_validator
ARTIFACT_URL_EXPIRY_SECONDS = 12 * 60 * 60 # 12 hours
ARTIFACT_URL_EXPIRY_SECONDS = 12 * 60 * 60 # 12 hours — global default when no per-org override.
# Bounds for the per-org override. 1 hour minimum keeps URLs useful for webhook
# consumers that retry across short outages; 7 days maximum follows the AWS S3
# presigned-URL cap so customers used to S3 don't get surprised. The route
# validates inbound values against these and the helper below clamps to them
# defensively in case a stray DB write puts garbage in the column.
ARTIFACT_URL_EXPIRY_SECONDS_MIN = 60 * 60 # 1 hour
ARTIFACT_URL_EXPIRY_SECONDS_MAX = 7 * 24 * 60 * 60 # 7 days
_ARTIFACT_CONTENT_PATH_TEMPLATE = "/v1/artifacts/{artifact_id}/content"
def effective_artifact_url_expiry_seconds(per_org_value: int | None) -> int:
"""Return the TTL to mint a signed artifact URL with.
Falls back to ``ARTIFACT_URL_EXPIRY_SECONDS`` when ``per_org_value`` is
None. Clamps to ``[ARTIFACT_URL_EXPIRY_SECONDS_MIN, ARTIFACT_URL_EXPIRY_SECONDS_MAX]``
so an out-of-range stored value still produces a sane URL rather than
refusing to serve.
"""
if per_org_value is None:
return ARTIFACT_URL_EXPIRY_SECONDS
return max(
ARTIFACT_URL_EXPIRY_SECONDS_MIN,
min(ARTIFACT_URL_EXPIRY_SECONDS_MAX, per_org_value),
)
class HmacKeyEntry(BaseModel):
secret: str
"""HMAC secret as a plain string."""
@ -90,23 +115,29 @@ def sign_artifact_url(
keyring: ArtifactHmacKeyring,
artifact_name: str | None = None,
artifact_type: str | None = None,
expiry_seconds: int | None = None,
) -> str:
"""Return a fully-signed URL for the artifact content endpoint.
Signs with keyring.current_kid. The URL is valid for
ARTIFACT_URL_EXPIRY_SECONDS (12 hours) from the time this function is called.
Signs with ``keyring.current_kid``. The URL is valid for ``expiry_seconds``
from the time this function is called. When ``expiry_seconds`` is None,
falls back to the global ``ARTIFACT_URL_EXPIRY_SECONDS`` (12 hours).
Callers wanting a per-org override should resolve it via
:func:`effective_artifact_url_expiry_seconds` before calling here.
The signature is URL-safe base64 (no padding), 43 characters for SHA-256.
artifact_name and artifact_type are appended as informational query params
for client use only they are not part of the canonical string or signature.
``artifact_name`` and ``artifact_type`` are appended as informational query
params for client use only they are not part of the canonical string or
signature.
"""
kid = keyring.current_kid
secret_bytes = keyring.get_secret_bytes(kid)
if secret_bytes is None:
raise ValueError(f"No secret found for kid '{kid}'")
expiry = int(time.time()) + ARTIFACT_URL_EXPIRY_SECONDS
ttl = expiry_seconds if expiry_seconds is not None else ARTIFACT_URL_EXPIRY_SECONDS
expiry = int(time.time()) + ttl
path = _ARTIFACT_CONTENT_PATH_TEMPLATE.format(artifact_id=artifact_id)
canonical = _canonical_string("GET", path, expiry, kid)
sig = _hmac_b64(secret_bytes, canonical)

View file

@ -306,7 +306,7 @@ class AzureStorage(BaseStorage):
# show completed downloads. Mirrors s3.py.
artifacts = [a for a in artifacts if a.uri and not a.uri.endswith(BROWSER_DOWNLOADING_SUFFIX)]
if artifacts:
return _file_infos_from_download_artifacts(artifacts)
return await _file_infos_from_download_artifacts(artifacts)
return await self._get_shared_downloaded_files_in_browser_session_via_listing(
organization_id=organization_id, browser_session_id=browser_session_id
@ -534,7 +534,7 @@ class AzureStorage(BaseStorage):
if run_id is not None and settings.ARTIFACT_CONTENT_HMAC_KEYRING:
artifacts = await self._list_download_artifacts_safe(organization_id=organization_id, run_id=run_id)
if artifacts:
return _file_infos_from_download_artifacts(artifacts)
return await _file_infos_from_download_artifacts(artifacts)
return await self._get_downloaded_files_via_blob_listing(organization_id=organization_id, run_id=run_id)

View file

@ -10,13 +10,21 @@ from skyvern.forge.sdk.schemas.task_v2 import TaskV2, Thought
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
def _file_infos_from_download_artifacts(artifacts: list[Artifact]) -> list[FileInfo]:
async def _file_infos_from_download_artifacts(artifacts: list[Artifact]) -> list[FileInfo]:
"""Build the API-shaped ``FileInfo`` list from DOWNLOAD artifact rows.
Filename is the URI basename (the save site writes ``{base_uri}/{file}``);
checksum and modified_at come straight from the row, so retrieval needs
zero S3 round-trips.
All artifacts in a single batch share the same organization (downloads are
scoped to a run, which is scoped to an org), so the per-org URL TTL is
resolved once and applied to every URL.
"""
if not artifacts:
return []
organization_id = artifacts[0].organization_id
expiry_seconds = await app.ARTIFACT_MANAGER.resolve_artifact_url_expiry_seconds(organization_id)
infos: list[FileInfo] = []
for artifact in artifacts:
filename = artifact.uri.rsplit("/", 1)[-1] if artifact.uri else ""
@ -24,6 +32,7 @@ def _file_infos_from_download_artifacts(artifacts: list[Artifact]) -> list[FileI
artifact_id=artifact.artifact_id,
artifact_name=filename,
artifact_type=ArtifactType.DOWNLOAD.value,
expiry_seconds=expiry_seconds,
)
infos.append(
FileInfo(

View file

@ -362,7 +362,7 @@ class S3Storage(BaseStorage):
# rows so the agent can detect "still downloading" via DB query.
artifacts = [a for a in artifacts if a.uri and not a.uri.endswith(BROWSER_DOWNLOADING_SUFFIX)]
if artifacts:
return _file_infos_from_download_artifacts(artifacts)
return await _file_infos_from_download_artifacts(artifacts)
return await self._get_shared_downloaded_files_in_browser_session_via_listing(
organization_id=organization_id, browser_session_id=browser_session_id
@ -601,7 +601,7 @@ class S3Storage(BaseStorage):
if run_id is not None and settings.ARTIFACT_CONTENT_HMAC_KEYRING:
artifacts = await self._list_download_artifacts_safe(organization_id=organization_id, run_id=run_id)
if artifacts:
return _file_infos_from_download_artifacts(artifacts)
return await _file_infos_from_download_artifacts(artifacts)
# Legacy fallback — runs predating SKY-8861 (no artifact rows) and
# OSS-default deployments without HMAC signing both arrive here.

View file

@ -173,6 +173,7 @@ class OrganizationModel(Base):
domain = Column(String, nullable=True, index=True)
bw_organization_id = Column(String, nullable=True, default=None)
bw_collection_ids = Column(JSON, nullable=True, default=None)
artifact_url_expiry_seconds = Column(Integer, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
modified_at = Column(
DateTime,

View file

@ -154,6 +154,8 @@ class OrganizationsRepository(BaseRepository):
webhook_callback_url: str | None = None,
max_steps_per_run: int | None = None,
max_retries_per_step: int | None = None,
artifact_url_expiry_seconds: int | None = None,
clear_artifact_url_expiry_seconds: bool = False,
) -> Organization:
async with self.Session() as session:
organization = (
@ -169,6 +171,13 @@ class OrganizationsRepository(BaseRepository):
organization.max_steps_per_run = max_steps_per_run
if max_retries_per_step:
organization.max_retries_per_step = max_retries_per_step
# ``clear_*`` decouples "don't update" (None) from "explicitly clear":
# callers pass ``clear_artifact_url_expiry_seconds=True`` to reset
# the value to NULL, falling back to the global default.
if clear_artifact_url_expiry_seconds:
organization.artifact_url_expiry_seconds = None
elif artifact_url_expiry_seconds is not None:
organization.artifact_url_expiry_seconds = artifact_url_expiry_seconds
await session.commit()
await session.refresh(organization)
return Organization.model_validate(organization)

View file

@ -315,6 +315,7 @@ def convert_to_organization(org_model: OrganizationModel) -> Organization:
domain=org_model.domain,
bw_organization_id=org_model.bw_organization_id,
bw_collection_ids=org_model.bw_collection_ids,
artifact_url_expiry_seconds=org_model.artifact_url_expiry_seconds,
created_at=org_model.created_at,
modified_at=org_model.modified_at,
)

View file

@ -1,5 +1,6 @@
import asyncio
import json
import time
import unicodedata
from enum import Enum
from typing import Annotated, Any
@ -36,7 +37,13 @@ from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.signing import ARTIFACT_URL_EXPIRY_SECONDS, parse_keyring, verify_artifact_signature
from skyvern.forge.sdk.artifact.signing import (
ARTIFACT_URL_EXPIRY_SECONDS,
ARTIFACT_URL_EXPIRY_SECONDS_MAX,
ARTIFACT_URL_EXPIRY_SECONDS_MIN,
parse_keyring,
verify_artifact_signature,
)
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.curl_converter import curl_to_http_request_block_params
from skyvern.forge.sdk.core.permissions.permission_checker_factory import PermissionCheckerFactory
@ -1547,16 +1554,34 @@ def _artifact_response_config(artifact: Artifact) -> tuple[str, str]:
return media_type, "inline"
def _artifact_content_response_headers(*, disposition: str, is_signed: bool) -> dict[str, str]:
def _artifact_content_response_headers(
*,
disposition: str,
is_signed: bool,
signed_expiry_unix: int | None = None,
) -> dict[str, str]:
"""Response headers for the artifact content endpoint.
For signed URLs, ``Cache-Control: max-age`` is set to the URL's remaining
lifetime derived from the ``expiry`` query parameter rather than the
global default so per-org TTL overrides flow through to caches. Caches
must not retain a body past the URL's own expiry.
Includes ``X-Content-Type-Options: nosniff`` as defence-in-depth for
SKY-8862: even if something upstream strips the attachment disposition,
the browser will not sniff the octet-stream body back into HTML/PDF.
"""
if is_signed:
if signed_expiry_unix is not None:
remaining = max(0, signed_expiry_unix - int(time.time()))
else:
remaining = ARTIFACT_URL_EXPIRY_SECONDS
cache_control = f"private, max-age={remaining}"
else:
cache_control = "private, no-cache"
return {
"Content-Disposition": disposition,
"Cache-Control": f"private, max-age={ARTIFACT_URL_EXPIRY_SECONDS}" if is_signed else "private, no-cache",
"Cache-Control": cache_control,
"X-Content-Type-Options": "nosniff",
}
@ -1629,12 +1654,19 @@ async def get_artifact_content(
)
media_type, content_disposition = _artifact_response_config(artifact)
is_signed = sig is not None and expiry is not None and kid is not None
signed_expiry_unix: int | None = None
if is_signed and expiry is not None:
try:
signed_expiry_unix = int(expiry)
except ValueError:
signed_expiry_unix = None
return Response(
content=content,
media_type=media_type,
headers=_artifact_content_response_headers(
disposition=content_disposition,
is_signed=is_signed,
signed_expiry_unix=signed_expiry_unix,
),
)
@ -3212,9 +3244,36 @@ async def update_organization(
org_update: OrganizationUpdate,
current_org: Organization = Depends(org_auth_service.get_current_org),
) -> Organization:
# Validate the per-org artifact URL expiry against the same bounds the
# signing helper clamps to. Reject out-of-range values at the API edge so
# users see a clear 400 instead of a silently clamped value persisting in
# the DB. The clear flag and a non-null override are mutually exclusive —
# the repo prefers the clear flag, but reject the ambiguity here too.
if org_update.artifact_url_expiry_seconds is not None and not org_update.clear_artifact_url_expiry_seconds:
if (
org_update.artifact_url_expiry_seconds < ARTIFACT_URL_EXPIRY_SECONDS_MIN
or org_update.artifact_url_expiry_seconds > ARTIFACT_URL_EXPIRY_SECONDS_MAX
):
raise HTTPException(
status_code=http_status.HTTP_400_BAD_REQUEST,
detail=(
f"artifact_url_expiry_seconds must be between "
f"{ARTIFACT_URL_EXPIRY_SECONDS_MIN} and {ARTIFACT_URL_EXPIRY_SECONDS_MAX} seconds"
),
)
if org_update.clear_artifact_url_expiry_seconds and org_update.artifact_url_expiry_seconds is not None:
raise HTTPException(
status_code=http_status.HTTP_400_BAD_REQUEST,
detail=(
"clear_artifact_url_expiry_seconds cannot be combined with a non-null "
"artifact_url_expiry_seconds — pick one"
),
)
return await app.DATABASE.organizations.update_organization(
current_org.organization_id,
max_steps_per_run=org_update.max_steps_per_run,
artifact_url_expiry_seconds=org_update.artifact_url_expiry_seconds,
clear_artifact_url_expiry_seconds=org_update.clear_artifact_url_expiry_seconds,
)

View file

@ -16,6 +16,15 @@ class Organization(BaseModel):
domain: str | None = None
bw_organization_id: str | None = None
bw_collection_ids: list[str] | None = None
artifact_url_expiry_seconds: int | None = Field(
None,
description=(
"Per-org override for the lifetime of signed /v1/artifacts/{id}/content URLs, "
"in seconds. None means use the global default (12 hours). When set, every signed "
"URL minted for artifacts owned by this org is valid for this many seconds. "
"Bounded between 1 hour (3600) and 7 days (604800)."
),
)
created_at: datetime
modified_at: datetime
@ -168,3 +177,21 @@ class GetOrganizationAPIKeysResponse(BaseModel):
class OrganizationUpdate(BaseModel):
max_steps_per_run: int | None = None
artifact_url_expiry_seconds: int | None = Field(
None,
description=(
"Per-org override for the lifetime of signed /v1/artifacts/{id}/content URLs, "
"in seconds. Bounded between 1 hour (3600) and 7 days (604800). Pass null to "
"leave the current value unchanged. To explicitly clear the override (and fall "
"back to the global 12-hour default) set ``clear_artifact_url_expiry_seconds`` "
"to true."
),
)
clear_artifact_url_expiry_seconds: bool = Field(
False,
description=(
"When true, resets ``artifact_url_expiry_seconds`` to NULL — the org will use "
"the global 12-hour default. Mutually exclusive with a non-null value in "
"``artifact_url_expiry_seconds`` (the clear flag wins)."
),
)

View file

@ -535,7 +535,7 @@ class WorkflowService:
# Preserve the input order so block outputs render files in save order.
by_id = {a.artifact_id: a for a in artifacts}
ordered = [by_id[aid] for aid in artifact_ids if aid in by_id]
return _file_infos_from_download_artifacts(ordered)
return await _file_infos_from_download_artifacts(ordered)
async def _file_infos_for_workflow_run_filtered_by_filenames(
self,
@ -582,7 +582,7 @@ class WorkflowService:
if basename in filenames and basename not in seen:
matched.append(artifact)
seen.add(basename)
return _file_infos_from_download_artifacts(matched)
return await _file_infos_from_download_artifacts(matched)
async def _refresh_output_urls(
self,