Workflow Fixes (#156)

2025-09-16 10:19:42 +00:00 · 2024-04-04 19:09:19 -07:00 · 2024-04-04 19:09:19 -07:00 · 0800990627
commit 0800990627
parent 8117395d73
11 changed files with 350 additions and 108 deletions
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@ -3,6 +3,7 @@ import json
 import os
 import smtplib
 import uuid
+from dataclasses import dataclass
 from email.message import EmailMessage
 from enum import StrEnum
 from pathlib import Path
@ -23,7 +24,7 @@ from skyvern.exceptions import (
 from skyvern.forge import app
 from skyvern.forge.prompts import prompt_engine
 from skyvern.forge.sdk.api.aws import AsyncAWSClient
-from skyvern.forge.sdk.api.files import download_file
+from skyvern.forge.sdk.api.files import download_file, get_path_for_workflow_download_directory
 from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
 from skyvern.forge.sdk.schemas.tasks import TaskStatus
 from skyvern.forge.sdk.settings_manager import SettingsManager
@ -46,9 +47,17 @@ class BlockType(StrEnum):
    CODE = "code"
    TEXT_PROMPT = "text_prompt"
    DOWNLOAD_TO_S3 = "download_to_s3"
+    UPLOAD_TO_S3 = "upload_to_s3"
    SEND_EMAIL = "send_email"


+@dataclass(frozen=True)
+class BlockResult:
+    success: bool
+    output_parameter: OutputParameter | None = None
+    output_parameter_value: dict[str, Any] | list | str | None = None
+
+
 class Block(BaseModel, abc.ABC):
    # Must be unique within workflow definition
    label: str
@ -68,12 +77,26 @@ class Block(BaseModel, abc.ABC):
        return app.WORKFLOW_CONTEXT_MANAGER.aws_client

    @abc.abstractmethod
-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        pass

+    async def execute_safe(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
+        try:
+            return await self.execute(workflow_run_id, **kwargs)
+        except Exception:
+            LOG.exception(
+                "Block execution failed",
+                exc_info=True,
+                workflow_run_id=workflow_run_id,
+                block_label=self.label,
+                block_type=self.block_type,
+            )
+            return BlockResult(success=False)
+
    @abc.abstractmethod
    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
        pass

@ -93,8 +116,15 @@ class TaskBlock(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
-        return self.parameters
+        parameters = self.parameters
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+
+        if self.url and workflow_run_context.has_parameter(self.url):
+            parameters.append(workflow_run_context.get_parameter(self.url))
+
+        return parameters

    @staticmethod
    async def get_task_order(workflow_run_id: str, current_retry: int) -> tuple[int, int]:
@ -126,7 +156,7 @@ class TaskBlock(Block):

        return order, retry + 1

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        current_retry = 0
        # initial value for will_retry is True, so that the loop runs at least once
@ -198,7 +228,6 @@ class TaskBlock(Block):
            if not updated_task.status.is_final():
                raise UnexpectedTaskStatus(task_id=updated_task.task_id, status=updated_task.status)
            if updated_task.status == TaskStatus.completed:
-                will_retry = False
                LOG.info(
                    f"Task completed",
                    task_id=updated_task.task_id,
@ -224,7 +253,12 @@ class TaskBlock(Block):
                        workflow_id=workflow.workflow_id,
                        task_id=updated_task.task_id,
                    )
-                    return self.output_parameter
+                    return BlockResult(
+                        success=True,
+                        output_parameter=self.output_parameter,
+                        output_parameter_value=updated_task.extracted_information,
+                    )
+                return BlockResult(success=True)
            else:
                current_retry += 1
                will_retry = current_retry <= self.max_retries
@ -239,7 +273,8 @@ class TaskBlock(Block):
                    current_retry=current_retry,
                    max_retries=self.max_retries,
                )
-        return None
+
+        return BlockResult(success=False)


 class ForLoopBlock(Block):
@ -251,15 +286,16 @@ class ForLoopBlock(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
-        return self.loop_block.get_all_parameters() + [self.loop_over]
+        return self.loop_block.get_all_parameters(workflow_run_id) + [self.loop_over]

    def get_loop_block_context_parameters(self, workflow_run_id: str, loop_data: Any) -> list[ContextParameter]:
        if not isinstance(loop_data, dict):
            # TODO (kerem): Should we add support for other types?
            raise ValueError("loop_data should be a dictionary")

-        loop_block_parameters = self.loop_block.get_all_parameters()
+        loop_block_parameters = self.loop_block.get_all_parameters(workflow_run_id)
        context_parameters = [
            parameter for parameter in loop_block_parameters if isinstance(parameter, ContextParameter)
        ]
@ -286,7 +322,7 @@ class ForLoopBlock(Block):
            # TODO (kerem): Implement this for context parameters
            raise NotImplementedError

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        loop_over_values = self.get_loop_over_parameter_values(workflow_run_context)
        LOG.info(
@ -296,20 +332,29 @@ class ForLoopBlock(Block):
            num_loop_over_values=len(loop_over_values),
        )
        outputs_with_loop_values = []
+        block_outputs = []
        for loop_over_value in loop_over_values:
            context_parameters_with_value = self.get_loop_block_context_parameters(workflow_run_id, loop_over_value)
            for context_parameter in context_parameters_with_value:
                workflow_run_context.set_value(context_parameter.key, context_parameter.value)
-            await self.loop_block.execute(workflow_run_id=workflow_run_id)
-            if self.loop_block.output_parameter:
+            try:
+                block_output = await self.loop_block.execute(workflow_run_id=workflow_run_id)
+                block_outputs.append(block_output)
+            except Exception as e:
+                LOG.error("ForLoopBlock: Failed to execute loop block", exc_info=True)
+                raise e
+            if block_output.output_parameter:
                outputs_with_loop_values.append(
                    {
                        "loop_value": loop_over_value,
-                        "output_parameter": self.loop_block.output_parameter,
-                        "output_value": workflow_run_context.get_value(self.loop_block.output_parameter.key),
+                        "output_parameter": block_output.output_parameter,
+                        "output_value": workflow_run_context.get_value(block_output.output_parameter.key),
                    }
                )

+        # If all block outputs are successful, the loop is successful
+        success = all([block_output.success for block_output in block_outputs])
+
        if self.output_parameter:
            await workflow_run_context.register_output_parameter_value_post_execution(
                parameter=self.output_parameter,
@ -320,9 +365,11 @@ class ForLoopBlock(Block):
                output_parameter_id=self.output_parameter.output_parameter_id,
                value=outputs_with_loop_values,
            )
-            return self.output_parameter
+            return BlockResult(
+                success=success, output_parameter=self.output_parameter, output_parameter_value=outputs_with_loop_values
+            )

-        return None
+        return BlockResult(success=success)


 class CodeBlock(Block):
@ -333,10 +380,11 @@ class CodeBlock(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
        return self.parameters

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        # get workflow run context
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        # get all parameters into a dictionary
@ -362,9 +410,9 @@ class CodeBlock(Block):
                output_parameter_id=self.output_parameter.output_parameter_id,
                value=result,
            )
-            return self.output_parameter
+            return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=result)

-        return None
+        return BlockResult(success=True)


 class TextPromptBlock(Block):
@ -377,6 +425,7 @@ class TextPromptBlock(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
        return self.parameters

@ -406,7 +455,7 @@ class TextPromptBlock(Block):
        LOG.info("TextPromptBlock: Received response from LLM", response=response)
        return response

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        # get workflow run context
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        # get all parameters into a dictionary
@ -430,9 +479,9 @@ class TextPromptBlock(Block):
                output_parameter_id=self.output_parameter.output_parameter_id,
                value=response,
            )
-            return self.output_parameter
+            return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=response)

-        return None
+        return BlockResult(success=True)


 class DownloadToS3Block(Block):
@ -442,7 +491,13 @@ class DownloadToS3Block(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+
+        if self.url and workflow_run_context.has_parameter(self.url):
+            return [workflow_run_context.get_parameter(self.url)]
+
        return []

    async def _upload_file_to_s3(self, uri: str, file_path: str) -> None:
@ -453,7 +508,7 @@ class DownloadToS3Block(Block):
            # Clean up the temporary file since it's created with delete=False
            os.unlink(file_path)

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        # get workflow run context
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        # get all parameters into a dictionary
@ -493,10 +548,82 @@ class DownloadToS3Block(Block):
                output_parameter_id=self.output_parameter.output_parameter_id,
                value=uri,
            )
-            return self.output_parameter
+            return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=uri)

        LOG.info("DownloadToS3Block: No output parameter defined, returning None")
-        return None
+        return BlockResult(success=True)
+
+
+class UploadToS3Block(Block):
+    block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3
+
+    # TODO (kerem): A directory upload is supported but we should also support a list of files
+    path: str | None = None
+
+    def get_all_parameters(
+        self,
+        workflow_run_id: str,
+    ) -> list[PARAMETER_TYPE]:
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+
+        if self.path and workflow_run_context.has_parameter(self.path):
+            return [workflow_run_context.get_parameter(self.path)]
+
+        return []
+
+    @staticmethod
+    def _get_s3_uri(workflow_run_id: str, path: str) -> str:
+        s3_bucket = SettingsManager.get_settings().AWS_S3_BUCKET_UPLOADS
+        s3_key = f"{SettingsManager.get_settings().ENV}/{workflow_run_id}/{uuid.uuid4()}_{Path(path).name}"
+        return f"s3://{s3_bucket}/{s3_key}"
+
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
+        # get workflow run context
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+        # get all parameters into a dictionary
+        if self.path and workflow_run_context.has_parameter(self.path) and workflow_run_context.has_value(self.path):
+            file_path_parameter_value = workflow_run_context.get_value(self.path)
+            if file_path_parameter_value:
+                LOG.info(
+                    "UploadToS3Block: File path is parameterized, using parameter value",
+                    file_path_parameter_value=file_path_parameter_value,
+                    file_path_parameter_key=self.path,
+                )
+                self.path = file_path_parameter_value
+        # if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use the download directory for the workflow run
+        elif self.path == SettingsManager.get_settings().WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
+            self.path = str(get_path_for_workflow_download_directory(workflow_run_id).absolute())
+
+        if not self.path or not os.path.exists(self.path):
+            raise FileNotFoundError(f"UploadToS3Block: File not found at path: {self.path}")
+
+        try:
+            client = self.get_async_aws_client()
+            # is the file path a file or a directory?
+            if os.path.isdir(self.path):
+                # get all files in the directory, if there are more than 10 files, we will not upload them
+                files = os.listdir(self.path)
+                if len(files) > 10:
+                    raise ValueError("Too many files in the directory, not uploading")
+                for file in files:
+                    # if the file is a directory, we will not upload it
+                    if os.path.isdir(os.path.join(self.path, file)):
+                        LOG.warning("UploadToS3Block: Skipping directory", file=file)
+                        continue
+                    file_path = os.path.join(self.path, file)
+                    await client.upload_file_from_path(
+                        uri=self._get_s3_uri(workflow_run_id, file_path), file_path=file_path
+                    )
+            else:
+                await client.upload_file_from_path(
+                    uri=self._get_s3_uri(workflow_run_id, self.path), file_path=self.path
+                )
+        except Exception as e:
+            LOG.exception("UploadToS3Block: Failed to upload file to S3", file_path=self.path, exc_info=True)
+            raise e
+
+        LOG.info("UploadToS3Block: File(s) uploaded to S3", file_path=self.path)
+        return BlockResult(success=True)


 class SendEmailBlock(Block):
@ -515,8 +642,23 @@ class SendEmailBlock(Block):

    def get_all_parameters(
        self,
+        workflow_run_id: str,
    ) -> list[PARAMETER_TYPE]:
-        return [self.smtp_host, self.smtp_port, self.smtp_username, self.smtp_password]
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+        parameters = [self.smtp_host, self.smtp_port, self.smtp_username, self.smtp_password]
+
+        if self.file_attachments:
+            for file_path in self.file_attachments:
+                if workflow_run_context.has_parameter(file_path):
+                    parameters.append(workflow_run_context.get_parameter(file_path))
+
+        if self.subject and workflow_run_context.has_parameter(self.subject):
+            parameters.append(workflow_run_context.get_parameter(self.subject))
+
+        if self.body and workflow_run_context.has_parameter(self.body):
+            parameters.append(workflow_run_context.get_parameter(self.body))
+
+        return parameters

    def _decrypt_smtp_parameters(self, workflow_run_context: WorkflowRunContext) -> tuple[str, int, str, str]:
        obfuscated_smtp_host_value = workflow_run_context.get_value(self.smtp_host.key)
@ -545,21 +687,44 @@ class SendEmailBlock(Block):

        return smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value

-    def _get_file_paths(self, workflow_run_context: WorkflowRunContext) -> list[str]:
+    def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run_id: str) -> list[str]:
        file_paths = []
-        for file_path in self.file_attachments:
-            if not workflow_run_context.has_parameter(file_path):
-                file_paths.append(file_path)
-                continue
+        for path in self.file_attachments:
+            # if the file path is a parameter, get the value from the workflow run context first
+            if workflow_run_context.has_parameter(path):
+                file_path_parameter_value = workflow_run_context.get_value(path)
+                # if the file path is a secret, get the original secret value from the workflow run context
+                file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
+                    file_path_parameter_value
+                )
+                if file_path_parameter_secret_value:
+                    path = file_path_parameter_secret_value
+                else:
+                    path = file_path_parameter_value

-            file_path_parameter_value = workflow_run_context.get_value(file_path)
-            file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
-                file_path_parameter_value
-            )
-            if file_path_parameter_secret_value:
-                file_paths.append(file_path_parameter_secret_value)
+            if path == SettingsManager.get_settings().WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
+                # if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use download directory for the workflow run
+                path = str(get_path_for_workflow_download_directory(workflow_run_id).absolute())
+                LOG.info(
+                    "SendEmailBlock: Using download directory for the workflow run",
+                    workflow_run_id=workflow_run_id,
+                    file_path=path,
+                )
+
+            # if the file path is a directory, add all files in the directory, skip directories, limit to 10 files
+            if os.path.exists(path) and os.path.isdir(path):
+                if len(os.listdir(path)) > 10:
+                    LOG.warning("SendEmailBlock: Too many files in the directory, not attaching to email")
+                    continue
+                for file in os.listdir(path):
+                    if os.path.isdir(os.path.join(path, file)):
+                        LOG.warning("SendEmailBlock: Skipping directory", file=file)
+                        continue
+                    file_path = os.path.join(path, file)
+                    file_paths.append(file_path)
            else:
-                file_paths.append(file_path_parameter_value)
+                # covers the case where the file path is a single file, a url, or an S3 uri
+                file_paths.append(path)

        return file_paths

@ -577,9 +742,14 @@ class SendEmailBlock(Block):
        msg["Subject"] = self.subject + f" - Workflow Run ID: {workflow_run_id}"
        msg["To"] = ", ".join(self.recipients)
        msg["From"] = self.sender
-        msg.set_content(self.body)
+        if self.body and workflow_run_context.has_parameter(self.body) and workflow_run_context.has_value(self.body):
+            # We're purposely not decrypting the body parameter value here because we don't want to expose secrets
+            body_parameter_value = workflow_run_context.get_value(self.body)
+            msg.set_content(body_parameter_value)
+        else:
+            msg.set_content(self.body)

-        for filename in self._get_file_paths(workflow_run_context):
+        for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
            path = None
            try:
                if filename.startswith("s3://"):
@ -587,7 +757,7 @@ class SendEmailBlock(Block):
                elif filename.startswith("http://") or filename.startswith("https://"):
                    path = await download_file(filename)
                else:
-                    LOG.error("SendEmailBlock: Looking for file locally", filename=filename)
+                    LOG.info("SendEmailBlock: Looking for file locally", filename=filename)
                    if not os.path.exists(filename):
                        raise FileNotFoundError(f"File not found: {filename}")
                    if not os.path.isfile(filename):
@ -635,7 +805,7 @@ class SendEmailBlock(Block):

        return msg

-    async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
+    async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
        smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value = self._decrypt_smtp_parameters(
            workflow_run_context
@ -652,47 +822,43 @@ class SendEmailBlock(Block):
            smtp_host.send_message(message)
            LOG.info("SendEmailBlock: Email sent")
        except Exception as e:
-            LOG.error("SendEmailBlock: Failed to send email", error=str(e))
+            LOG.error("SendEmailBlock: Failed to send email", exc_info=True)
            if self.output_parameter:
+                result_dict = {"success": False, "error": str(e)}
                await workflow_run_context.register_output_parameter_value_post_execution(
                    parameter=self.output_parameter,
-                    value={
-                        "success": False,
-                        "error": str(e),
-                    },
+                    value=result_dict,
                )
                await app.DATABASE.create_workflow_run_output_parameter(
                    workflow_run_id=workflow_run_id,
                    output_parameter_id=self.output_parameter.output_parameter_id,
-                    value={
-                        "success": False,
-                        "error": str(e),
-                    },
+                    value=result_dict,
+                )
+                return BlockResult(
+                    success=False, output_parameter=self.output_parameter, output_parameter_value=result_dict
                )
-                return self.output_parameter
            raise e
        finally:
            if smtp_host:
                smtp_host.quit()

+        result_dict = {"success": True}
        if self.output_parameter:
            await workflow_run_context.register_output_parameter_value_post_execution(
                parameter=self.output_parameter,
-                value={
-                    "success": True,
-                },
+                value=result_dict,
            )
            await app.DATABASE.create_workflow_run_output_parameter(
                workflow_run_id=workflow_run_id,
                output_parameter_id=self.output_parameter.output_parameter_id,
-                value={
-                    "success": True,
-                },
+                value=result_dict,
            )
-            return self.output_parameter
+            return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=result_dict)

-        return None
+        return BlockResult(success=True)


-BlockSubclasses = Union[ForLoopBlock, TaskBlock, CodeBlock, TextPromptBlock, DownloadToS3Block, SendEmailBlock]
+BlockSubclasses = Union[
+    ForLoopBlock, TaskBlock, CodeBlock, TextPromptBlock, DownloadToS3Block, UploadToS3Block, SendEmailBlock
+]
 BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]