use skyvern temp to save tempfiles (#1262)

This commit is contained in:
LawyZheng 2024-11-26 12:27:58 +08:00 committed by GitHub
parent 6b417d0e83
commit af25aeed77
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 50 additions and 17 deletions

1
.gitignore vendored
View file

@ -166,6 +166,7 @@ traces/
har/ har/
postgres-data postgres-data
files/ files/
temp/
# Streamlit ignores # Streamlit ignores
**/secrets*.toml **/secrets*.toml

View file

@ -14,6 +14,7 @@ class Settings(BaseSettings):
VIDEO_PATH: str | None = None VIDEO_PATH: str | None = None
HAR_PATH: str | None = "./har" HAR_PATH: str | None = "./har"
LOG_PATH: str = "./log" LOG_PATH: str = "./log"
TEMP_PATH: str = "./temp"
BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000 BROWSER_LOADING_TIMEOUT_MS: int = 120000
@ -74,9 +75,6 @@ class Settings(BaseSettings):
WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY: str = "SKYVERN_DOWNLOAD_DIRECTORY" WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY: str = "SKYVERN_DOWNLOAD_DIRECTORY"
WORKFLOW_WAIT_BLOCK_MAX_SEC: int = 30 * 60 WORKFLOW_WAIT_BLOCK_MAX_SEC: int = 30 * 60
# streaming settings
STREAMING_FILE_BASE_PATH: str = "/tmp"
# Saved browser session settings # Saved browser session settings
BROWSER_SESSION_BASE_PATH: str = f"{constants.REPO_ROOT_DIR}/browser_sessions" BROWSER_SESSION_BASE_PATH: str = f"{constants.REPO_ROOT_DIR}/browser_sessions"

View file

@ -14,13 +14,14 @@ from multidict import CIMultiDictProxy
from skyvern.constants import REPO_ROOT_DIR from skyvern.constants import REPO_ROOT_DIR
from skyvern.exceptions import DownloadFileMaxSizeExceeded from skyvern.exceptions import DownloadFileMaxSizeExceeded
from skyvern.forge.sdk.api.aws import AsyncAWSClient from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.settings_manager import SettingsManager
LOG = structlog.get_logger() LOG = structlog.get_logger()
async def download_from_s3(client: AsyncAWSClient, s3_uri: str) -> str: async def download_from_s3(client: AsyncAWSClient, s3_uri: str) -> str:
downloaded_bytes = await client.download_file(uri=s3_uri) downloaded_bytes = await client.download_file(uri=s3_uri)
file_path = tempfile.NamedTemporaryFile(delete=False) file_path = create_named_temporary_file(delete=False)
file_path.write(downloaded_bytes) file_path.write(downloaded_bytes)
return file_path.name return file_path.name
@ -56,7 +57,7 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str:
a = urlparse(url) a = urlparse(url)
# Get the file name # Get the file name
temp_dir = tempfile.mkdtemp(prefix="skyvern_downloads_") temp_dir = make_temp_directory(prefix="skyvern_downloads_")
file_name = os.path.basename(a.path) file_name = os.path.basename(a.path)
# if no suffix in the URL, we need to parse it from HTTP headers # if no suffix in the URL, we need to parse it from HTTP headers
@ -151,3 +152,31 @@ def calculate_sha256_for_file(file_path: str) -> str:
for byte_block in iter(lambda: f.read(4096), b""): for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block) sha256_hash.update(byte_block)
return sha256_hash.hexdigest() return sha256_hash.hexdigest()
def create_folder_if_not_exist(dir: str) -> None:
path = Path(dir)
if path.exists():
return
path.mkdir(parents=True)
def get_skyvern_temp_dir() -> str:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return temp_dir
def make_temp_directory(
suffix: str | None = None,
prefix: str | None = None,
) -> str:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=temp_dir)
def create_named_temporary_file(delete: bool = True) -> tempfile._TemporaryFileWrapper:
temp_dir = SettingsManager.get_settings().TEMP_PATH
create_folder_if_not_exist(temp_dir)
return tempfile.NamedTemporaryFile(dir=temp_dir, delete=delete)

View file

@ -6,6 +6,7 @@ from urllib.parse import unquote, urlparse
import structlog import structlog
from skyvern.forge.sdk.api.files import get_skyvern_temp_dir
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.models import Step
@ -73,9 +74,9 @@ class LocalStorage(BaseStorage):
return return
async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None: async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/skyvern_screenshot.png") file_path = Path(f"{get_skyvern_temp_dir()}/skyvern_screenshot.png")
if not use_default: if not use_default:
file_path = Path(f"{SettingsManager.get_settings().STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}") file_path = Path(f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}")
try: try:
with open(file_path, "rb") as f: with open(file_path, "rb") as f:
return f.read() return f.read()

View file

@ -1,10 +1,14 @@
import shutil import shutil
import tempfile
from datetime import datetime from datetime import datetime
from skyvern.config import settings from skyvern.config import settings
from skyvern.forge.sdk.api.aws import AsyncAWSClient from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import unzip_files from skyvern.forge.sdk.api.files import (
create_named_temporary_file,
get_skyvern_temp_dir,
make_temp_directory,
unzip_files,
)
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.models import Step
@ -36,7 +40,7 @@ class S3Storage(BaseStorage):
await self.async_client.upload_file_from_path(artifact.uri, path) await self.async_client.upload_file_from_path(artifact.uri, path)
async def save_streaming_file(self, organization_id: str, file_name: str) -> None: async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}" from_path = f"{get_skyvern_temp_dir()}/{organization_id}/{file_name}"
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}" to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
await self.async_client.upload_file_from_path(to_path, from_path) await self.async_client.upload_file_from_path(to_path, from_path)
@ -46,7 +50,7 @@ class S3Storage(BaseStorage):
async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None: async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None:
# Zip the directory to a temp file # Zip the directory to a temp file
temp_zip_file = tempfile.NamedTemporaryFile() temp_zip_file = create_named_temporary_file()
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory) zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip" browser_session_uri = f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/{workflow_permanent_id}.zip"
await self.async_client.upload_file_from_path(browser_session_uri, zip_file_path) await self.async_client.upload_file_from_path(browser_session_uri, zip_file_path)
@ -56,11 +60,11 @@ class S3Storage(BaseStorage):
downloaded_zip_bytes = await self.async_client.download_file(browser_session_uri, log_exception=True) downloaded_zip_bytes = await self.async_client.download_file(browser_session_uri, log_exception=True)
if not downloaded_zip_bytes: if not downloaded_zip_bytes:
return None return None
temp_zip_file = tempfile.NamedTemporaryFile(delete=False) temp_zip_file = create_named_temporary_file(delete=False)
temp_zip_file.write(downloaded_zip_bytes) temp_zip_file.write(downloaded_zip_bytes)
temp_zip_file_path = temp_zip_file.name temp_zip_file_path = temp_zip_file.name
temp_dir = tempfile.mkdtemp(prefix="skyvern_browser_session_") temp_dir = make_temp_directory(prefix="skyvern_browser_session_")
unzip_files(temp_zip_file_path, temp_dir) unzip_files(temp_zip_file_path, temp_dir)
temp_zip_file.close() temp_zip_file.close()
return temp_dir return temp_dir

View file

@ -11,7 +11,6 @@ from dataclasses import dataclass
from email.message import EmailMessage from email.message import EmailMessage
from enum import StrEnum from enum import StrEnum
from pathlib import Path from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Annotated, Any, Literal, Union from typing import Annotated, Any, Literal, Union
import filetype import filetype
@ -36,6 +35,7 @@ from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.aws import AsyncAWSClient from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import ( from skyvern.forge.sdk.api.files import (
calculate_sha256_for_file, calculate_sha256_for_file,
create_named_temporary_file,
download_file, download_file,
download_from_s3, download_from_s3,
get_path_for_workflow_download_directory, get_path_for_workflow_download_directory,
@ -1056,7 +1056,7 @@ class SendEmailBlock(Block):
async def _download_from_s3(self, s3_uri: str) -> str: async def _download_from_s3(self, s3_uri: str) -> str:
client = self.get_async_aws_client() client = self.get_async_aws_client()
downloaded_bytes = await client.download_file(uri=s3_uri) downloaded_bytes = await client.download_file(uri=s3_uri)
file_path = NamedTemporaryFile(delete=False) file_path = create_named_temporary_file(delete=False)
file_path.write(downloaded_bytes) file_path.write(downloaded_bytes)
return file_path.name return file_path.name

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import asyncio import asyncio
import os import os
import tempfile
import time import time
import uuid import uuid
from datetime import datetime from datetime import datetime
@ -24,6 +23,7 @@ from skyvern.exceptions import (
UnknownBrowserType, UnknownBrowserType,
UnknownErrorWhileCreatingBrowserContext, UnknownErrorWhileCreatingBrowserContext,
) )
from skyvern.forge.sdk.api.files import make_temp_directory
from skyvern.forge.sdk.core.skyvern_context import current from skyvern.forge.sdk.core.skyvern_context import current
from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.forge.sdk.settings_manager import SettingsManager
@ -153,7 +153,7 @@ class BrowserContextFactory:
video_dir = f"{SettingsManager.get_settings().VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}" video_dir = f"{SettingsManager.get_settings().VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}"
har_dir = f"{SettingsManager.get_settings().HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har" har_dir = f"{SettingsManager.get_settings().HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har"
return { return {
"user_data_dir": tempfile.mkdtemp(prefix="skyvern_browser_"), "user_data_dir": make_temp_directory(prefix="skyvern_browser_"),
"locale": SettingsManager.get_settings().BROWSER_LOCALE, "locale": SettingsManager.get_settings().BROWSER_LOCALE,
"timezone_id": SettingsManager.get_settings().BROWSER_TIMEZONE, "timezone_id": SettingsManager.get_settings().BROWSER_TIMEZONE,
"color_scheme": "no-preference", "color_scheme": "no-preference",