Workflow Fixes (#156)

This commit is contained in:
Kerem Yilmaz 2024-04-04 19:09:19 -07:00 committed by GitHub
parent 8117395d73
commit 0800990627
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 350 additions and 108 deletions

View file

@ -3,6 +3,7 @@ import json
import os
import smtplib
import uuid
from dataclasses import dataclass
from email.message import EmailMessage
from enum import StrEnum
from pathlib import Path
@ -23,7 +24,7 @@ from skyvern.exceptions import (
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import download_file
from skyvern.forge.sdk.api.files import download_file, get_path_for_workflow_download_directory
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
from skyvern.forge.sdk.schemas.tasks import TaskStatus
from skyvern.forge.sdk.settings_manager import SettingsManager
@ -46,9 +47,17 @@ class BlockType(StrEnum):
CODE = "code"
TEXT_PROMPT = "text_prompt"
DOWNLOAD_TO_S3 = "download_to_s3"
UPLOAD_TO_S3 = "upload_to_s3"
SEND_EMAIL = "send_email"
@dataclass(frozen=True)
class BlockResult:
success: bool
output_parameter: OutputParameter | None = None
output_parameter_value: dict[str, Any] | list | str | None = None
class Block(BaseModel, abc.ABC):
# Must be unique within workflow definition
label: str
@ -68,12 +77,26 @@ class Block(BaseModel, abc.ABC):
return app.WORKFLOW_CONTEXT_MANAGER.aws_client
@abc.abstractmethod
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
pass
async def execute_safe(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
try:
return await self.execute(workflow_run_id, **kwargs)
except Exception:
LOG.exception(
"Block execution failed",
exc_info=True,
workflow_run_id=workflow_run_id,
block_label=self.label,
block_type=self.block_type,
)
return BlockResult(success=False)
@abc.abstractmethod
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
pass
@ -93,8 +116,15 @@ class TaskBlock(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
parameters = self.parameters
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.url and workflow_run_context.has_parameter(self.url):
parameters.append(workflow_run_context.get_parameter(self.url))
return parameters
@staticmethod
async def get_task_order(workflow_run_id: str, current_retry: int) -> tuple[int, int]:
@ -126,7 +156,7 @@ class TaskBlock(Block):
return order, retry + 1
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
current_retry = 0
# initial value for will_retry is True, so that the loop runs at least once
@ -198,7 +228,6 @@ class TaskBlock(Block):
if not updated_task.status.is_final():
raise UnexpectedTaskStatus(task_id=updated_task.task_id, status=updated_task.status)
if updated_task.status == TaskStatus.completed:
will_retry = False
LOG.info(
f"Task completed",
task_id=updated_task.task_id,
@ -224,7 +253,12 @@ class TaskBlock(Block):
workflow_id=workflow.workflow_id,
task_id=updated_task.task_id,
)
return self.output_parameter
return BlockResult(
success=True,
output_parameter=self.output_parameter,
output_parameter_value=updated_task.extracted_information,
)
return BlockResult(success=True)
else:
current_retry += 1
will_retry = current_retry <= self.max_retries
@ -239,7 +273,8 @@ class TaskBlock(Block):
current_retry=current_retry,
max_retries=self.max_retries,
)
return None
return BlockResult(success=False)
class ForLoopBlock(Block):
@ -251,15 +286,16 @@ class ForLoopBlock(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.loop_block.get_all_parameters() + [self.loop_over]
return self.loop_block.get_all_parameters(workflow_run_id) + [self.loop_over]
def get_loop_block_context_parameters(self, workflow_run_id: str, loop_data: Any) -> list[ContextParameter]:
if not isinstance(loop_data, dict):
# TODO (kerem): Should we add support for other types?
raise ValueError("loop_data should be a dictionary")
loop_block_parameters = self.loop_block.get_all_parameters()
loop_block_parameters = self.loop_block.get_all_parameters(workflow_run_id)
context_parameters = [
parameter for parameter in loop_block_parameters if isinstance(parameter, ContextParameter)
]
@ -286,7 +322,7 @@ class ForLoopBlock(Block):
# TODO (kerem): Implement this for context parameters
raise NotImplementedError
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
loop_over_values = self.get_loop_over_parameter_values(workflow_run_context)
LOG.info(
@ -296,20 +332,29 @@ class ForLoopBlock(Block):
num_loop_over_values=len(loop_over_values),
)
outputs_with_loop_values = []
block_outputs = []
for loop_over_value in loop_over_values:
context_parameters_with_value = self.get_loop_block_context_parameters(workflow_run_id, loop_over_value)
for context_parameter in context_parameters_with_value:
workflow_run_context.set_value(context_parameter.key, context_parameter.value)
await self.loop_block.execute(workflow_run_id=workflow_run_id)
if self.loop_block.output_parameter:
try:
block_output = await self.loop_block.execute(workflow_run_id=workflow_run_id)
block_outputs.append(block_output)
except Exception as e:
LOG.error("ForLoopBlock: Failed to execute loop block", exc_info=True)
raise e
if block_output.output_parameter:
outputs_with_loop_values.append(
{
"loop_value": loop_over_value,
"output_parameter": self.loop_block.output_parameter,
"output_value": workflow_run_context.get_value(self.loop_block.output_parameter.key),
"output_parameter": block_output.output_parameter,
"output_value": workflow_run_context.get_value(block_output.output_parameter.key),
}
)
# If all block outputs are successful, the loop is successful
success = all([block_output.success for block_output in block_outputs])
if self.output_parameter:
await workflow_run_context.register_output_parameter_value_post_execution(
parameter=self.output_parameter,
@ -320,9 +365,11 @@ class ForLoopBlock(Block):
output_parameter_id=self.output_parameter.output_parameter_id,
value=outputs_with_loop_values,
)
return self.output_parameter
return BlockResult(
success=success, output_parameter=self.output_parameter, output_parameter_value=outputs_with_loop_values
)
return None
return BlockResult(success=success)
class CodeBlock(Block):
@ -333,10 +380,11 @@ class CodeBlock(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
@ -362,9 +410,9 @@ class CodeBlock(Block):
output_parameter_id=self.output_parameter.output_parameter_id,
value=result,
)
return self.output_parameter
return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=result)
return None
return BlockResult(success=True)
class TextPromptBlock(Block):
@ -377,6 +425,7 @@ class TextPromptBlock(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
@ -406,7 +455,7 @@ class TextPromptBlock(Block):
LOG.info("TextPromptBlock: Received response from LLM", response=response)
return response
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
@ -430,9 +479,9 @@ class TextPromptBlock(Block):
output_parameter_id=self.output_parameter.output_parameter_id,
value=response,
)
return self.output_parameter
return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=response)
return None
return BlockResult(success=True)
class DownloadToS3Block(Block):
@ -442,7 +491,13 @@ class DownloadToS3Block(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.url and workflow_run_context.has_parameter(self.url):
return [workflow_run_context.get_parameter(self.url)]
return []
async def _upload_file_to_s3(self, uri: str, file_path: str) -> None:
@ -453,7 +508,7 @@ class DownloadToS3Block(Block):
# Clean up the temporary file since it's created with delete=False
os.unlink(file_path)
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
@ -493,10 +548,82 @@ class DownloadToS3Block(Block):
output_parameter_id=self.output_parameter.output_parameter_id,
value=uri,
)
return self.output_parameter
return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=uri)
LOG.info("DownloadToS3Block: No output parameter defined, returning None")
return None
return BlockResult(success=True)
class UploadToS3Block(Block):
block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3
# TODO (kerem): A directory upload is supported but we should also support a list of files
path: str | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.path and workflow_run_context.has_parameter(self.path):
return [workflow_run_context.get_parameter(self.path)]
return []
@staticmethod
def _get_s3_uri(workflow_run_id: str, path: str) -> str:
s3_bucket = SettingsManager.get_settings().AWS_S3_BUCKET_UPLOADS
s3_key = f"{SettingsManager.get_settings().ENV}/{workflow_run_id}/{uuid.uuid4()}_{Path(path).name}"
return f"s3://{s3_bucket}/{s3_key}"
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
if self.path and workflow_run_context.has_parameter(self.path) and workflow_run_context.has_value(self.path):
file_path_parameter_value = workflow_run_context.get_value(self.path)
if file_path_parameter_value:
LOG.info(
"UploadToS3Block: File path is parameterized, using parameter value",
file_path_parameter_value=file_path_parameter_value,
file_path_parameter_key=self.path,
)
self.path = file_path_parameter_value
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use the download directory for the workflow run
elif self.path == SettingsManager.get_settings().WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
self.path = str(get_path_for_workflow_download_directory(workflow_run_id).absolute())
if not self.path or not os.path.exists(self.path):
raise FileNotFoundError(f"UploadToS3Block: File not found at path: {self.path}")
try:
client = self.get_async_aws_client()
# is the file path a file or a directory?
if os.path.isdir(self.path):
# get all files in the directory, if there are more than 10 files, we will not upload them
files = os.listdir(self.path)
if len(files) > 10:
raise ValueError("Too many files in the directory, not uploading")
for file in files:
# if the file is a directory, we will not upload it
if os.path.isdir(os.path.join(self.path, file)):
LOG.warning("UploadToS3Block: Skipping directory", file=file)
continue
file_path = os.path.join(self.path, file)
await client.upload_file_from_path(
uri=self._get_s3_uri(workflow_run_id, file_path), file_path=file_path
)
else:
await client.upload_file_from_path(
uri=self._get_s3_uri(workflow_run_id, self.path), file_path=self.path
)
except Exception as e:
LOG.exception("UploadToS3Block: Failed to upload file to S3", file_path=self.path, exc_info=True)
raise e
LOG.info("UploadToS3Block: File(s) uploaded to S3", file_path=self.path)
return BlockResult(success=True)
class SendEmailBlock(Block):
@ -515,8 +642,23 @@ class SendEmailBlock(Block):
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return [self.smtp_host, self.smtp_port, self.smtp_username, self.smtp_password]
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
parameters = [self.smtp_host, self.smtp_port, self.smtp_username, self.smtp_password]
if self.file_attachments:
for file_path in self.file_attachments:
if workflow_run_context.has_parameter(file_path):
parameters.append(workflow_run_context.get_parameter(file_path))
if self.subject and workflow_run_context.has_parameter(self.subject):
parameters.append(workflow_run_context.get_parameter(self.subject))
if self.body and workflow_run_context.has_parameter(self.body):
parameters.append(workflow_run_context.get_parameter(self.body))
return parameters
def _decrypt_smtp_parameters(self, workflow_run_context: WorkflowRunContext) -> tuple[str, int, str, str]:
obfuscated_smtp_host_value = workflow_run_context.get_value(self.smtp_host.key)
@ -545,21 +687,44 @@ class SendEmailBlock(Block):
return smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value
def _get_file_paths(self, workflow_run_context: WorkflowRunContext) -> list[str]:
def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run_id: str) -> list[str]:
file_paths = []
for file_path in self.file_attachments:
if not workflow_run_context.has_parameter(file_path):
file_paths.append(file_path)
continue
for path in self.file_attachments:
# if the file path is a parameter, get the value from the workflow run context first
if workflow_run_context.has_parameter(path):
file_path_parameter_value = workflow_run_context.get_value(path)
# if the file path is a secret, get the original secret value from the workflow run context
file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
file_path_parameter_value
)
if file_path_parameter_secret_value:
path = file_path_parameter_secret_value
else:
path = file_path_parameter_value
file_path_parameter_value = workflow_run_context.get_value(file_path)
file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
file_path_parameter_value
)
if file_path_parameter_secret_value:
file_paths.append(file_path_parameter_secret_value)
if path == SettingsManager.get_settings().WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use download directory for the workflow run
path = str(get_path_for_workflow_download_directory(workflow_run_id).absolute())
LOG.info(
"SendEmailBlock: Using download directory for the workflow run",
workflow_run_id=workflow_run_id,
file_path=path,
)
# if the file path is a directory, add all files in the directory, skip directories, limit to 10 files
if os.path.exists(path) and os.path.isdir(path):
if len(os.listdir(path)) > 10:
LOG.warning("SendEmailBlock: Too many files in the directory, not attaching to email")
continue
for file in os.listdir(path):
if os.path.isdir(os.path.join(path, file)):
LOG.warning("SendEmailBlock: Skipping directory", file=file)
continue
file_path = os.path.join(path, file)
file_paths.append(file_path)
else:
file_paths.append(file_path_parameter_value)
# covers the case where the file path is a single file, a url, or an S3 uri
file_paths.append(path)
return file_paths
@ -577,9 +742,14 @@ class SendEmailBlock(Block):
msg["Subject"] = self.subject + f" - Workflow Run ID: {workflow_run_id}"
msg["To"] = ", ".join(self.recipients)
msg["From"] = self.sender
msg.set_content(self.body)
if self.body and workflow_run_context.has_parameter(self.body) and workflow_run_context.has_value(self.body):
# We're purposely not decrypting the body parameter value here because we don't want to expose secrets
body_parameter_value = workflow_run_context.get_value(self.body)
msg.set_content(body_parameter_value)
else:
msg.set_content(self.body)
for filename in self._get_file_paths(workflow_run_context):
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
path = None
try:
if filename.startswith("s3://"):
@ -587,7 +757,7 @@ class SendEmailBlock(Block):
elif filename.startswith("http://") or filename.startswith("https://"):
path = await download_file(filename)
else:
LOG.error("SendEmailBlock: Looking for file locally", filename=filename)
LOG.info("SendEmailBlock: Looking for file locally", filename=filename)
if not os.path.exists(filename):
raise FileNotFoundError(f"File not found: {filename}")
if not os.path.isfile(filename):
@ -635,7 +805,7 @@ class SendEmailBlock(Block):
return msg
async def execute(self, workflow_run_id: str, **kwargs: dict) -> OutputParameter | None:
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value = self._decrypt_smtp_parameters(
workflow_run_context
@ -652,47 +822,43 @@ class SendEmailBlock(Block):
smtp_host.send_message(message)
LOG.info("SendEmailBlock: Email sent")
except Exception as e:
LOG.error("SendEmailBlock: Failed to send email", error=str(e))
LOG.error("SendEmailBlock: Failed to send email", exc_info=True)
if self.output_parameter:
result_dict = {"success": False, "error": str(e)}
await workflow_run_context.register_output_parameter_value_post_execution(
parameter=self.output_parameter,
value={
"success": False,
"error": str(e),
},
value=result_dict,
)
await app.DATABASE.create_workflow_run_output_parameter(
workflow_run_id=workflow_run_id,
output_parameter_id=self.output_parameter.output_parameter_id,
value={
"success": False,
"error": str(e),
},
value=result_dict,
)
return BlockResult(
success=False, output_parameter=self.output_parameter, output_parameter_value=result_dict
)
return self.output_parameter
raise e
finally:
if smtp_host:
smtp_host.quit()
result_dict = {"success": True}
if self.output_parameter:
await workflow_run_context.register_output_parameter_value_post_execution(
parameter=self.output_parameter,
value={
"success": True,
},
value=result_dict,
)
await app.DATABASE.create_workflow_run_output_parameter(
workflow_run_id=workflow_run_id,
output_parameter_id=self.output_parameter.output_parameter_id,
value={
"success": True,
},
value=result_dict,
)
return self.output_parameter
return BlockResult(success=True, output_parameter=self.output_parameter, output_parameter_value=result_dict)
return None
return BlockResult(success=True)
BlockSubclasses = Union[ForLoopBlock, TaskBlock, CodeBlock, TextPromptBlock, DownloadToS3Block, SendEmailBlock]
BlockSubclasses = Union[
ForLoopBlock, TaskBlock, CodeBlock, TextPromptBlock, DownloadToS3Block, UploadToS3Block, SendEmailBlock
]
BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]