mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-29 12:10:28 +00:00
Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com> Co-authored-by: LawyZheng <lawyzheng1106@gmail.com> Co-authored-by: Nick Fisher <nick.fisher@avinium.com>
118 lines
6.4 KiB
Python
118 lines
6.4 KiB
Python
import os
|
|
import shutil
|
|
from datetime import datetime
|
|
|
|
from skyvern.config import settings
|
|
from skyvern.constants import DOWNLOAD_FILE_PREFIX
|
|
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
|
from skyvern.forge.sdk.api.files import (
|
|
create_named_temporary_file,
|
|
get_download_dir,
|
|
get_skyvern_temp_dir,
|
|
make_temp_directory,
|
|
unzip_files,
|
|
)
|
|
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
|
|
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
|
|
from skyvern.forge.sdk.models import Step
|
|
from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought
|
|
|
|
|
|
class S3Storage(BaseStorage):
|
|
def __init__(self, bucket: str | None = None) -> None:
|
|
self.async_client = AsyncAWSClient()
|
|
self.bucket = bucket or settings.AWS_S3_BUCKET_ARTIFACTS
|
|
|
|
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
|
|
file_ext = FILE_EXTENTSION_MAP[artifact_type]
|
|
return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
|
|
|
|
def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str:
|
|
file_ext = FILE_EXTENTSION_MAP[artifact_type]
|
|
return f"s3://{self.bucket}/{settings.ENV}/logs/{log_entity_type}/{log_entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}"
|
|
|
|
def build_observer_thought_uri(
|
|
self, artifact_id: str, observer_thought: ObserverThought, artifact_type: ArtifactType
|
|
) -> str:
|
|
file_ext = FILE_EXTENTSION_MAP[artifact_type]
|
|
return f"s3://{self.bucket}/{settings.ENV}/observers/{observer_thought.observer_cruise_id}/{observer_thought.observer_thought_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
|
|
|
|
def build_observer_cruise_uri(
|
|
self, artifact_id: str, observer_cruise: ObserverCruise, artifact_type: ArtifactType
|
|
) -> str:
|
|
file_ext = FILE_EXTENTSION_MAP[artifact_type]
|
|
return f"s3://{self.bucket}/{settings.ENV}/observers/{observer_cruise.observer_cruise_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
|
|
|
|
async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
|
|
await self.async_client.upload_file(artifact.uri, data)
|
|
|
|
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
|
|
return await self.async_client.download_file(artifact.uri)
|
|
|
|
async def get_share_link(self, artifact: Artifact) -> str | None:
|
|
share_urls = await self.async_client.create_presigned_urls([artifact.uri])
|
|
return share_urls[0] if share_urls else None
|
|
|
|
async def get_share_links(self, artifacts: list[Artifact]) -> list[str] | None:
|
|
return await self.async_client.create_presigned_urls([artifact.uri for artifact in artifacts])
|
|
|
|
async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
|
|
await self.async_client.upload_file_from_path(artifact.uri, path)
|
|
|
|
async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
|
|
from_path = f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}"
|
|
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
|
|
await self.async_client.upload_file_from_path(to_path, from_path)
|
|
|
|
async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
|
|
path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
|
|
return await self.async_client.download_file(path, log_exception=False)
|
|
|
|
async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None:
|
|
# Zip the directory to a temp file
|
|
temp_zip_file = create_named_temporary_file()
|
|
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
|
|
browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip"
|
|
await self.async_client.upload_file_from_path(browser_session_uri, zip_file_path)
|
|
|
|
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
|
|
browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip"
|
|
downloaded_zip_bytes = await self.async_client.download_file(browser_session_uri, log_exception=True)
|
|
if not downloaded_zip_bytes:
|
|
return None
|
|
temp_zip_file = create_named_temporary_file(delete=False)
|
|
temp_zip_file.write(downloaded_zip_bytes)
|
|
temp_zip_file_path = temp_zip_file.name
|
|
|
|
temp_dir = make_temp_directory(prefix="skyvern_browser_session_")
|
|
unzip_files(temp_zip_file_path, temp_dir)
|
|
temp_zip_file.close()
|
|
return temp_dir
|
|
|
|
async def save_downloaded_files(
|
|
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
|
|
) -> None:
|
|
download_dir = get_download_dir(workflow_run_id=workflow_run_id, task_id=task_id)
|
|
files = os.listdir(download_dir)
|
|
for file in files:
|
|
fpath = os.path.join(download_dir, file)
|
|
if os.path.isfile(fpath):
|
|
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}/{file}"
|
|
# TODO: use coroutine to speed up uploading if too many files
|
|
await self.async_client.upload_file_from_path(uri, fpath)
|
|
|
|
async def get_downloaded_files(
|
|
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
|
|
) -> list[str]:
|
|
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}"
|
|
object_keys = await self.async_client.list_files(uri=uri)
|
|
if len(object_keys) == 0:
|
|
return []
|
|
object_uris: list[str] = []
|
|
for key in object_keys:
|
|
object_uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{key}"
|
|
object_uris.append(object_uri)
|
|
presigned_urils = await self.async_client.create_presigned_urls(object_uris)
|
|
if presigned_urils is None:
|
|
return []
|
|
return presigned_urils
|