mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-02 02:30:07 +00:00
1974 lines
89 KiB
Python
1974 lines
89 KiB
Python
import asyncio
|
|
import json
|
|
from datetime import UTC, datetime
|
|
from typing import Any
|
|
|
|
import httpx
|
|
import structlog
|
|
|
|
from skyvern import analytics
|
|
from skyvern.config import settings
|
|
from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT
|
|
from skyvern.exceptions import (
|
|
FailedToSendWebhook,
|
|
MissingValueForParameter,
|
|
SkyvernException,
|
|
WorkflowNotFound,
|
|
WorkflowRunNotFound,
|
|
)
|
|
from skyvern.forge import app
|
|
from skyvern.forge.sdk.artifact.models import ArtifactType
|
|
from skyvern.forge.sdk.core import skyvern_context
|
|
from skyvern.forge.sdk.core.security import generate_skyvern_webhook_headers
|
|
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
|
|
from skyvern.forge.sdk.db.enums import TaskType
|
|
from skyvern.forge.sdk.models import Step, StepStatus
|
|
from skyvern.forge.sdk.schemas.files import FileInfo
|
|
from skyvern.forge.sdk.schemas.organizations import Organization
|
|
from skyvern.forge.sdk.schemas.tasks import Task
|
|
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock, WorkflowRunTimeline, WorkflowRunTimelineType
|
|
from skyvern.forge.sdk.workflow.exceptions import (
|
|
ContextParameterSourceNotDefined,
|
|
InvalidWaitBlockTime,
|
|
InvalidWorkflowDefinition,
|
|
WorkflowDefinitionHasDuplicateParameterKeys,
|
|
WorkflowDefinitionHasReservedParameterKeys,
|
|
WorkflowParameterMissingRequiredValue,
|
|
)
|
|
from skyvern.forge.sdk.workflow.models.block import (
|
|
ActionBlock,
|
|
BlockStatus,
|
|
BlockType,
|
|
BlockTypeVar,
|
|
CodeBlock,
|
|
DownloadToS3Block,
|
|
ExtractionBlock,
|
|
FileDownloadBlock,
|
|
FileParserBlock,
|
|
FileUploadBlock,
|
|
ForLoopBlock,
|
|
LoginBlock,
|
|
NavigationBlock,
|
|
PDFParserBlock,
|
|
SendEmailBlock,
|
|
TaskBlock,
|
|
TaskV2Block,
|
|
TextPromptBlock,
|
|
UploadToS3Block,
|
|
UrlBlock,
|
|
ValidationBlock,
|
|
WaitBlock,
|
|
)
|
|
from skyvern.forge.sdk.workflow.models.parameter import (
|
|
PARAMETER_TYPE,
|
|
AWSSecretParameter,
|
|
BitwardenCreditCardDataParameter,
|
|
BitwardenLoginCredentialParameter,
|
|
BitwardenSensitiveInformationParameter,
|
|
ContextParameter,
|
|
CredentialParameter,
|
|
OutputParameter,
|
|
Parameter,
|
|
ParameterType,
|
|
WorkflowParameter,
|
|
WorkflowParameterType,
|
|
)
|
|
from skyvern.forge.sdk.workflow.models.workflow import (
|
|
Workflow,
|
|
WorkflowDefinition,
|
|
WorkflowRequestBody,
|
|
WorkflowRun,
|
|
WorkflowRunOutputParameter,
|
|
WorkflowRunParameter,
|
|
WorkflowRunResponse,
|
|
WorkflowRunStatus,
|
|
WorkflowStatus,
|
|
)
|
|
from skyvern.forge.sdk.workflow.models.yaml import (
|
|
BLOCK_YAML_TYPES,
|
|
ForLoopBlockYAML,
|
|
WorkflowCreateYAMLRequest,
|
|
WorkflowDefinitionYAML,
|
|
)
|
|
from skyvern.schemas.runs import ProxyLocation
|
|
from skyvern.webeye.browser_factory import BrowserState
|
|
|
|
LOG = structlog.get_logger()
|
|
|
|
|
|
class WorkflowService:
|
|
async def setup_workflow_run(
|
|
self,
|
|
request_id: str | None,
|
|
workflow_request: WorkflowRequestBody,
|
|
workflow_permanent_id: str,
|
|
organization_id: str,
|
|
is_template_workflow: bool = False,
|
|
version: int | None = None,
|
|
max_steps_override: int | None = None,
|
|
parent_workflow_run_id: str | None = None,
|
|
) -> WorkflowRun:
|
|
"""
|
|
Create a workflow run and its parameters. Validate the workflow and the organization. If there are missing
|
|
parameters with no default value, mark the workflow run as failed.
|
|
:param request_id: The request id for the workflow run.
|
|
:param workflow_request: The request body for the workflow run, containing the parameters and the config.
|
|
:param workflow_id: The workflow id to run.
|
|
:param organization_id: The organization id for the workflow.
|
|
:param max_steps_override: The max steps override for the workflow run, if any.
|
|
:return: The created workflow run.
|
|
"""
|
|
# Validate the workflow and the organization
|
|
workflow = await self.get_workflow_by_permanent_id(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
organization_id=None if is_template_workflow else organization_id,
|
|
version=version,
|
|
)
|
|
if workflow is None:
|
|
LOG.error(f"Workflow {workflow_permanent_id} not found", workflow_version=version)
|
|
raise WorkflowNotFound(workflow_permanent_id=workflow_permanent_id, version=version)
|
|
workflow_id = workflow.workflow_id
|
|
if workflow_request.proxy_location is None and workflow.proxy_location is not None:
|
|
workflow_request.proxy_location = workflow.proxy_location
|
|
if workflow_request.webhook_callback_url is None and workflow.webhook_callback_url is not None:
|
|
workflow_request.webhook_callback_url = workflow.webhook_callback_url
|
|
# Create the workflow run and set skyvern context
|
|
workflow_run = await self.create_workflow_run(
|
|
workflow_request=workflow_request,
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
workflow_id=workflow_id,
|
|
organization_id=organization_id,
|
|
parent_workflow_run_id=parent_workflow_run_id,
|
|
)
|
|
LOG.info(
|
|
f"Created workflow run {workflow_run.workflow_run_id} for workflow {workflow.workflow_id}",
|
|
request_id=request_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
workflow_id=workflow.workflow_id,
|
|
organization_id=workflow.organization_id,
|
|
proxy_location=workflow_request.proxy_location,
|
|
webhook_callback_url=workflow_request.webhook_callback_url,
|
|
)
|
|
skyvern_context.set(
|
|
SkyvernContext(
|
|
organization_id=organization_id,
|
|
request_id=request_id,
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
max_steps_override=max_steps_override,
|
|
)
|
|
)
|
|
|
|
# Create all the workflow run parameters, AWSSecretParameter won't have workflow run parameters created.
|
|
all_workflow_parameters = await self.get_workflow_parameters(workflow_id=workflow.workflow_id)
|
|
try:
|
|
for workflow_parameter in all_workflow_parameters:
|
|
if workflow_request.data and workflow_parameter.key in workflow_request.data:
|
|
request_body_value = workflow_request.data[workflow_parameter.key]
|
|
await self.create_workflow_run_parameter(
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
workflow_parameter=workflow_parameter,
|
|
value=request_body_value,
|
|
)
|
|
elif workflow_parameter.default_value is not None:
|
|
await self.create_workflow_run_parameter(
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
workflow_parameter=workflow_parameter,
|
|
value=workflow_parameter.default_value,
|
|
)
|
|
else:
|
|
raise MissingValueForParameter(
|
|
parameter_key=workflow_parameter.key,
|
|
workflow_id=workflow.workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
except Exception as e:
|
|
LOG.exception(
|
|
f"Error while setting up workflow run {workflow_run.workflow_run_id}",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
|
|
failure_reason = f"Setup workflow failed due to an unexpected exception: {str(e)}"
|
|
if isinstance(e, SkyvernException):
|
|
failure_reason = f"Setup workflow failed due to an SkyvernException({e.__class__.__name__}): {str(e)}"
|
|
|
|
await self.mark_workflow_run_as_failed(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
raise e
|
|
|
|
return workflow_run
|
|
|
|
async def execute_workflow(
|
|
self,
|
|
workflow_run_id: str,
|
|
api_key: str,
|
|
organization: Organization,
|
|
browser_session_id: str | None = None,
|
|
) -> WorkflowRun:
|
|
"""Execute a workflow."""
|
|
organization_id = organization.organization_id
|
|
LOG.info(
|
|
"Executing workflow",
|
|
workflow_run_id=workflow_run_id,
|
|
organization_id=organization_id,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
|
|
workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id=workflow_run.workflow_permanent_id)
|
|
|
|
# Set workflow run status to running, create workflow run parameters
|
|
await self.mark_workflow_run_as_running(workflow_run_id=workflow_run.workflow_run_id)
|
|
|
|
# Get all context parameters from the workflow definition
|
|
context_parameters = [
|
|
parameter
|
|
for parameter in workflow.workflow_definition.parameters
|
|
if isinstance(parameter, ContextParameter)
|
|
]
|
|
|
|
secret_parameters = [
|
|
parameter
|
|
for parameter in workflow.workflow_definition.parameters
|
|
if isinstance(
|
|
parameter,
|
|
(
|
|
AWSSecretParameter,
|
|
BitwardenLoginCredentialParameter,
|
|
BitwardenCreditCardDataParameter,
|
|
BitwardenSensitiveInformationParameter,
|
|
CredentialParameter,
|
|
),
|
|
)
|
|
]
|
|
|
|
# Get all <workflow parameter, workflow run parameter> tuples
|
|
wp_wps_tuples = await self.get_workflow_run_parameter_tuples(workflow_run_id=workflow_run.workflow_run_id)
|
|
workflow_output_parameters = await self.get_workflow_output_parameters(workflow_id=workflow.workflow_id)
|
|
try:
|
|
await app.WORKFLOW_CONTEXT_MANAGER.initialize_workflow_run_context(
|
|
organization,
|
|
workflow_run_id,
|
|
wp_wps_tuples,
|
|
workflow_output_parameters,
|
|
context_parameters,
|
|
secret_parameters,
|
|
)
|
|
except Exception as e:
|
|
LOG.exception(
|
|
f"Error while initializing workflow run context for workflow run {workflow_run.workflow_run_id}",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
|
|
exception_message = f"Unexpected error: {str(e)}"
|
|
if isinstance(e, SkyvernException):
|
|
exception_message = f"unexpected SkyvernException({e.__class__.__name__}): {str(e)}"
|
|
|
|
failure_reason = f"Failed to initialize workflow run context. failure reason: {exception_message}"
|
|
await self.mark_workflow_run_as_failed(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
browser_session_id=browser_session_id,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
)
|
|
return workflow_run
|
|
|
|
# Execute workflow blocks
|
|
blocks = workflow.workflow_definition.blocks
|
|
blocks_cnt = len(blocks)
|
|
block_result = None
|
|
for block_idx, block in enumerate(blocks):
|
|
try:
|
|
refreshed_workflow_run = await app.DATABASE.get_workflow_run(
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
organization_id=organization_id,
|
|
)
|
|
if refreshed_workflow_run and refreshed_workflow_run.status == WorkflowRunStatus.canceled:
|
|
LOG.info(
|
|
"Workflow run is canceled, stopping execution inside workflow execution loop",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_type=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
need_call_webhook=True,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
|
|
if refreshed_workflow_run and refreshed_workflow_run.status == WorkflowRunStatus.timed_out:
|
|
LOG.info(
|
|
"Workflow run is timed out, stopping execution inside workflow execution loop",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_type=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
need_call_webhook=True,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
|
|
parameters = block.get_all_parameters(workflow_run_id)
|
|
await app.WORKFLOW_CONTEXT_MANAGER.register_block_parameters_for_workflow_run(
|
|
workflow_run_id, parameters, organization
|
|
)
|
|
LOG.info(
|
|
f"Executing root block {block.block_type} at index {block_idx}/{blocks_cnt - 1} for workflow run {workflow_run_id}",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
block_result = await block.execute_safe(
|
|
workflow_run_id=workflow_run_id,
|
|
organization_id=organization_id,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
if block_result.status == BlockStatus.canceled:
|
|
LOG.info(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} was canceled for workflow run {workflow_run_id}, cancelling workflow run",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
await self.mark_workflow_run_as_canceled(workflow_run_id=workflow_run.workflow_run_id)
|
|
# We're not sending a webhook here because the workflow run is manually marked as canceled.
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
need_call_webhook=False,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
elif block_result.status == BlockStatus.failed:
|
|
LOG.error(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} failed for workflow run {workflow_run_id}",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
if not block.continue_on_failure:
|
|
failure_reason = (
|
|
f"{block.block_type} block failed. failure reason: {block_result.failure_reason}"
|
|
)
|
|
await self.mark_workflow_run_as_failed(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
|
|
LOG.warning(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} failed but will continue executing the workflow run {workflow_run_id}",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
continue_on_failure=block.continue_on_failure,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
elif block_result.status == BlockStatus.terminated:
|
|
LOG.info(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} was terminated for workflow run {workflow_run_id}, marking workflow run as terminated",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
if not block.continue_on_failure:
|
|
failure_reason = f"{block.block_type} block terminated. Reason: {block_result.failure_reason}"
|
|
await self.mark_workflow_run_as_terminated(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
|
|
LOG.warning(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} was terminated for workflow run {workflow_run_id}, but will continue executing the workflow run",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
continue_on_failure=block.continue_on_failure,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
elif block_result.status == BlockStatus.timed_out:
|
|
LOG.info(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} timed out for workflow run {workflow_run_id}, marking workflow run as failed",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
if not block.continue_on_failure:
|
|
failure_reason = f"{block.block_type} block timed out. Reason: {block_result.failure_reason}"
|
|
await self.mark_workflow_run_as_failed(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
)
|
|
return workflow_run
|
|
|
|
LOG.warning(
|
|
f"Block with type {block.block_type} at index {block_idx}/{blocks_cnt - 1} timed out for workflow run {workflow_run_id}, but will continue executing the workflow run",
|
|
block_type=block.block_type,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_result=block_result,
|
|
continue_on_failure=block.continue_on_failure,
|
|
block_type_var=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
except Exception as e:
|
|
LOG.exception(
|
|
f"Error while executing workflow run {workflow_run.workflow_run_id}",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
block_idx=block_idx,
|
|
block_type=block.block_type,
|
|
block_label=block.label,
|
|
)
|
|
|
|
exception_message = f"Unexpected error: {str(e)}"
|
|
if isinstance(e, SkyvernException):
|
|
exception_message = f"unexpected SkyvernException({e.__class__.__name__}): {str(e)}"
|
|
|
|
failure_reason = f"{block.block_type} block failed. failure reason: {exception_message}"
|
|
await self.mark_workflow_run_as_failed(
|
|
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
browser_session_id=browser_session_id,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
)
|
|
return workflow_run
|
|
|
|
refreshed_workflow_run = await app.DATABASE.get_workflow_run(
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
organization_id=organization_id,
|
|
)
|
|
if refreshed_workflow_run and refreshed_workflow_run.status not in (
|
|
WorkflowRunStatus.canceled,
|
|
WorkflowRunStatus.failed,
|
|
WorkflowRunStatus.terminated,
|
|
WorkflowRunStatus.timed_out,
|
|
):
|
|
await self.mark_workflow_run_as_completed(workflow_run_id=workflow_run.workflow_run_id)
|
|
else:
|
|
LOG.info(
|
|
"Workflow run is already timed_out, canceled, failed, or terminated, not marking as completed",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
workflow_run_status=refreshed_workflow_run.status if refreshed_workflow_run else None,
|
|
)
|
|
await self.clean_up_workflow(
|
|
workflow=workflow,
|
|
workflow_run=workflow_run,
|
|
api_key=api_key,
|
|
browser_session_id=browser_session_id,
|
|
close_browser_on_completion=browser_session_id is None,
|
|
)
|
|
|
|
# Track workflow run duration when completed
|
|
duration_seconds = (datetime.now(UTC) - workflow_run.created_at.replace(tzinfo=UTC)).total_seconds()
|
|
LOG.info(
|
|
"Workflow run duration metrics",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_id=workflow_run.workflow_id,
|
|
duration_seconds=duration_seconds,
|
|
workflow_run_status=WorkflowRunStatus.completed,
|
|
organization_id=organization_id,
|
|
)
|
|
|
|
return workflow_run
|
|
|
|
async def create_workflow(
|
|
self,
|
|
organization_id: str,
|
|
title: str,
|
|
workflow_definition: WorkflowDefinition,
|
|
description: str | None = None,
|
|
proxy_location: ProxyLocation | None = None,
|
|
webhook_callback_url: str | None = None,
|
|
totp_verification_url: str | None = None,
|
|
totp_identifier: str | None = None,
|
|
persist_browser_session: bool = False,
|
|
workflow_permanent_id: str | None = None,
|
|
version: int | None = None,
|
|
is_saved_task: bool = False,
|
|
status: WorkflowStatus = WorkflowStatus.published,
|
|
) -> Workflow:
|
|
return await app.DATABASE.create_workflow(
|
|
title=title,
|
|
workflow_definition=workflow_definition.model_dump(),
|
|
organization_id=organization_id,
|
|
description=description,
|
|
proxy_location=proxy_location,
|
|
webhook_callback_url=webhook_callback_url,
|
|
totp_verification_url=totp_verification_url,
|
|
totp_identifier=totp_identifier,
|
|
persist_browser_session=persist_browser_session,
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
version=version,
|
|
is_saved_task=is_saved_task,
|
|
status=status,
|
|
)
|
|
|
|
async def get_workflow(self, workflow_id: str, organization_id: str | None = None) -> Workflow:
|
|
workflow = await app.DATABASE.get_workflow(workflow_id=workflow_id, organization_id=organization_id)
|
|
if not workflow:
|
|
raise WorkflowNotFound(workflow_id=workflow_id)
|
|
return workflow
|
|
|
|
async def get_workflow_by_permanent_id(
|
|
self,
|
|
workflow_permanent_id: str,
|
|
organization_id: str | None = None,
|
|
version: int | None = None,
|
|
exclude_deleted: bool = True,
|
|
) -> Workflow:
|
|
workflow = await app.DATABASE.get_workflow_by_permanent_id(
|
|
workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
version=version,
|
|
exclude_deleted=exclude_deleted,
|
|
)
|
|
if not workflow:
|
|
raise WorkflowNotFound(workflow_permanent_id=workflow_permanent_id, version=version)
|
|
return workflow
|
|
|
|
async def get_workflows_by_permanent_ids(
|
|
self,
|
|
workflow_permanent_ids: list[str],
|
|
organization_id: str | None = None,
|
|
page: int = 1,
|
|
page_size: int = 10,
|
|
title: str = "",
|
|
statuses: list[WorkflowStatus] | None = None,
|
|
) -> list[Workflow]:
|
|
return await app.DATABASE.get_workflows_by_permanent_ids(
|
|
workflow_permanent_ids,
|
|
organization_id=organization_id,
|
|
page=page,
|
|
page_size=page_size,
|
|
title=title,
|
|
statuses=statuses,
|
|
)
|
|
|
|
async def get_workflows_by_organization_id(
|
|
self,
|
|
organization_id: str,
|
|
page: int = 1,
|
|
page_size: int = 10,
|
|
only_saved_tasks: bool = False,
|
|
only_workflows: bool = False,
|
|
title: str = "",
|
|
statuses: list[WorkflowStatus] | None = None,
|
|
) -> list[Workflow]:
|
|
"""
|
|
Get all workflows with the latest version for the organization.
|
|
"""
|
|
return await app.DATABASE.get_workflows_by_organization_id(
|
|
organization_id=organization_id,
|
|
page=page,
|
|
page_size=page_size,
|
|
only_saved_tasks=only_saved_tasks,
|
|
only_workflows=only_workflows,
|
|
title=title,
|
|
statuses=statuses,
|
|
)
|
|
|
|
async def update_workflow(
|
|
self,
|
|
workflow_id: str,
|
|
organization_id: str | None = None,
|
|
title: str | None = None,
|
|
description: str | None = None,
|
|
workflow_definition: WorkflowDefinition | None = None,
|
|
) -> Workflow:
|
|
if workflow_definition:
|
|
workflow_definition.validate()
|
|
|
|
return await app.DATABASE.update_workflow(
|
|
workflow_id=workflow_id,
|
|
title=title,
|
|
organization_id=organization_id,
|
|
description=description,
|
|
workflow_definition=(workflow_definition.model_dump() if workflow_definition else None),
|
|
)
|
|
|
|
async def delete_workflow_by_permanent_id(
|
|
self,
|
|
workflow_permanent_id: str,
|
|
organization_id: str | None = None,
|
|
) -> None:
|
|
await app.DATABASE.soft_delete_workflow_by_permanent_id(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
)
|
|
|
|
async def delete_workflow_by_id(
|
|
self,
|
|
workflow_id: str,
|
|
organization_id: str,
|
|
) -> None:
|
|
await app.DATABASE.soft_delete_workflow_by_id(
|
|
workflow_id=workflow_id,
|
|
organization_id=organization_id,
|
|
)
|
|
|
|
async def get_workflow_runs(
|
|
self, organization_id: str, page: int = 1, page_size: int = 10, status: list[WorkflowRunStatus] | None = None
|
|
) -> list[WorkflowRun]:
|
|
return await app.DATABASE.get_workflow_runs(
|
|
organization_id=organization_id, page=page, page_size=page_size, status=status
|
|
)
|
|
|
|
async def get_workflow_runs_for_workflow_permanent_id(
|
|
self,
|
|
workflow_permanent_id: str,
|
|
organization_id: str,
|
|
page: int = 1,
|
|
page_size: int = 10,
|
|
status: list[WorkflowRunStatus] | None = None,
|
|
) -> list[WorkflowRun]:
|
|
return await app.DATABASE.get_workflow_runs_for_workflow_permanent_id(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
page=page,
|
|
page_size=page_size,
|
|
status=status,
|
|
)
|
|
|
|
async def create_workflow_run(
|
|
self,
|
|
workflow_request: WorkflowRequestBody,
|
|
workflow_permanent_id: str,
|
|
workflow_id: str,
|
|
organization_id: str,
|
|
parent_workflow_run_id: str | None = None,
|
|
) -> WorkflowRun:
|
|
return await app.DATABASE.create_workflow_run(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
workflow_id=workflow_id,
|
|
organization_id=organization_id,
|
|
proxy_location=workflow_request.proxy_location,
|
|
webhook_callback_url=workflow_request.webhook_callback_url,
|
|
totp_verification_url=workflow_request.totp_verification_url,
|
|
totp_identifier=workflow_request.totp_identifier,
|
|
parent_workflow_run_id=parent_workflow_run_id,
|
|
)
|
|
|
|
async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> None:
|
|
LOG.info(
|
|
f"Marking workflow run {workflow_run_id} as completed",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_status="completed",
|
|
)
|
|
await app.DATABASE.update_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
status=WorkflowRunStatus.completed,
|
|
)
|
|
|
|
async def mark_workflow_run_as_failed(self, workflow_run_id: str, failure_reason: str | None) -> None:
|
|
LOG.info(
|
|
f"Marking workflow run {workflow_run_id} as failed",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_status="failed",
|
|
failure_reason=failure_reason,
|
|
)
|
|
await app.DATABASE.update_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
status=WorkflowRunStatus.failed,
|
|
failure_reason=failure_reason,
|
|
)
|
|
|
|
async def mark_workflow_run_as_running(self, workflow_run_id: str) -> None:
|
|
LOG.info(
|
|
f"Marking workflow run {workflow_run_id} as running",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_status="running",
|
|
)
|
|
await app.DATABASE.update_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
status=WorkflowRunStatus.running,
|
|
)
|
|
|
|
async def mark_workflow_run_as_terminated(self, workflow_run_id: str, failure_reason: str | None) -> None:
|
|
LOG.info(
|
|
f"Marking workflow run {workflow_run_id} as terminated",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_status="terminated",
|
|
failure_reason=failure_reason,
|
|
)
|
|
await app.DATABASE.update_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
status=WorkflowRunStatus.terminated,
|
|
failure_reason=failure_reason,
|
|
)
|
|
|
|
async def mark_workflow_run_as_canceled(self, workflow_run_id: str) -> None:
|
|
LOG.info(
|
|
f"Marking workflow run {workflow_run_id} as canceled",
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_status="canceled",
|
|
)
|
|
await app.DATABASE.update_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
status=WorkflowRunStatus.canceled,
|
|
)
|
|
|
|
async def get_workflow_run(self, workflow_run_id: str, organization_id: str | None = None) -> WorkflowRun:
|
|
workflow_run = await app.DATABASE.get_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
organization_id=organization_id,
|
|
)
|
|
if not workflow_run:
|
|
raise WorkflowRunNotFound(workflow_run_id)
|
|
return workflow_run
|
|
|
|
async def create_workflow_parameter(
|
|
self,
|
|
workflow_id: str,
|
|
workflow_parameter_type: WorkflowParameterType,
|
|
key: str,
|
|
default_value: bool | int | float | str | dict | list | None = None,
|
|
description: str | None = None,
|
|
) -> WorkflowParameter:
|
|
return await app.DATABASE.create_workflow_parameter(
|
|
workflow_id=workflow_id,
|
|
workflow_parameter_type=workflow_parameter_type,
|
|
key=key,
|
|
description=description,
|
|
default_value=default_value,
|
|
)
|
|
|
|
async def create_aws_secret_parameter(
|
|
self, workflow_id: str, aws_key: str, key: str, description: str | None = None
|
|
) -> AWSSecretParameter:
|
|
return await app.DATABASE.create_aws_secret_parameter(
|
|
workflow_id=workflow_id, aws_key=aws_key, key=key, description=description
|
|
)
|
|
|
|
async def create_bitwarden_login_credential_parameter(
|
|
self,
|
|
workflow_id: str,
|
|
bitwarden_client_id_aws_secret_key: str,
|
|
bitwarden_client_secret_aws_secret_key: str,
|
|
bitwarden_master_password_aws_secret_key: str,
|
|
key: str,
|
|
url_parameter_key: str | None = None,
|
|
description: str | None = None,
|
|
bitwarden_collection_id: str | None = None,
|
|
bitwarden_item_id: str | None = None,
|
|
) -> Parameter:
|
|
return await app.DATABASE.create_bitwarden_login_credential_parameter(
|
|
workflow_id=workflow_id,
|
|
bitwarden_client_id_aws_secret_key=bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=bitwarden_master_password_aws_secret_key,
|
|
key=key,
|
|
url_parameter_key=url_parameter_key,
|
|
description=description,
|
|
bitwarden_collection_id=bitwarden_collection_id,
|
|
bitwarden_item_id=bitwarden_item_id,
|
|
)
|
|
|
|
async def create_credential_parameter(
|
|
self,
|
|
workflow_id: str,
|
|
key: str,
|
|
credential_id: str,
|
|
description: str | None = None,
|
|
) -> CredentialParameter:
|
|
return await app.DATABASE.create_credential_parameter(
|
|
workflow_id=workflow_id,
|
|
key=key,
|
|
credential_id=credential_id,
|
|
description=description,
|
|
)
|
|
|
|
async def create_bitwarden_sensitive_information_parameter(
|
|
self,
|
|
workflow_id: str,
|
|
bitwarden_client_id_aws_secret_key: str,
|
|
bitwarden_client_secret_aws_secret_key: str,
|
|
bitwarden_master_password_aws_secret_key: str,
|
|
bitwarden_collection_id: str,
|
|
bitwarden_identity_key: str,
|
|
bitwarden_identity_fields: list[str],
|
|
key: str,
|
|
description: str | None = None,
|
|
) -> Parameter:
|
|
return await app.DATABASE.create_bitwarden_sensitive_information_parameter(
|
|
workflow_id=workflow_id,
|
|
bitwarden_client_id_aws_secret_key=bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=bitwarden_master_password_aws_secret_key,
|
|
bitwarden_collection_id=bitwarden_collection_id,
|
|
bitwarden_identity_key=bitwarden_identity_key,
|
|
bitwarden_identity_fields=bitwarden_identity_fields,
|
|
key=key,
|
|
description=description,
|
|
)
|
|
|
|
async def create_bitwarden_credit_card_data_parameter(
|
|
self,
|
|
workflow_id: str,
|
|
bitwarden_client_id_aws_secret_key: str,
|
|
bitwarden_client_secret_aws_secret_key: str,
|
|
bitwarden_master_password_aws_secret_key: str,
|
|
bitwarden_collection_id: str,
|
|
bitwarden_item_id: str,
|
|
key: str,
|
|
description: str | None = None,
|
|
) -> Parameter:
|
|
return await app.DATABASE.create_bitwarden_credit_card_data_parameter(
|
|
workflow_id=workflow_id,
|
|
bitwarden_client_id_aws_secret_key=bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=bitwarden_master_password_aws_secret_key,
|
|
bitwarden_collection_id=bitwarden_collection_id,
|
|
bitwarden_item_id=bitwarden_item_id,
|
|
key=key,
|
|
description=description,
|
|
)
|
|
|
|
async def create_output_parameter(
|
|
self, workflow_id: str, key: str, description: str | None = None
|
|
) -> OutputParameter:
|
|
return await app.DATABASE.create_output_parameter(workflow_id=workflow_id, key=key, description=description)
|
|
|
|
async def get_workflow_parameters(self, workflow_id: str) -> list[WorkflowParameter]:
|
|
return await app.DATABASE.get_workflow_parameters(workflow_id=workflow_id)
|
|
|
|
async def create_workflow_run_parameter(
|
|
self,
|
|
workflow_run_id: str,
|
|
workflow_parameter: WorkflowParameter,
|
|
value: Any,
|
|
) -> WorkflowRunParameter:
|
|
value = json.dumps(value) if isinstance(value, (dict, list)) else value
|
|
# InvalidWorkflowParameter will be raised if the validation fails
|
|
workflow_parameter.workflow_parameter_type.convert_value(value)
|
|
|
|
return await app.DATABASE.create_workflow_run_parameter(
|
|
workflow_run_id=workflow_run_id,
|
|
workflow_parameter=workflow_parameter,
|
|
value=value,
|
|
)
|
|
|
|
async def get_workflow_run_parameter_tuples(
|
|
self, workflow_run_id: str
|
|
) -> list[tuple[WorkflowParameter, WorkflowRunParameter]]:
|
|
return await app.DATABASE.get_workflow_run_parameters(workflow_run_id=workflow_run_id)
|
|
|
|
@staticmethod
|
|
async def get_workflow_output_parameters(workflow_id: str) -> list[OutputParameter]:
|
|
return await app.DATABASE.get_workflow_output_parameters(workflow_id=workflow_id)
|
|
|
|
@staticmethod
|
|
async def get_workflow_run_output_parameters(
|
|
workflow_run_id: str,
|
|
) -> list[WorkflowRunOutputParameter]:
|
|
return await app.DATABASE.get_workflow_run_output_parameters(workflow_run_id=workflow_run_id)
|
|
|
|
@staticmethod
|
|
async def get_output_parameter_workflow_run_output_parameter_tuples(
|
|
workflow_id: str,
|
|
workflow_run_id: str,
|
|
) -> list[tuple[OutputParameter, WorkflowRunOutputParameter]]:
|
|
workflow_run_output_parameters = await app.DATABASE.get_workflow_run_output_parameters(
|
|
workflow_run_id=workflow_run_id
|
|
)
|
|
output_parameters = await app.DATABASE.get_workflow_output_parameters_by_ids(
|
|
output_parameter_ids=[
|
|
workflow_run_output_parameter.output_parameter_id
|
|
for workflow_run_output_parameter in workflow_run_output_parameters
|
|
]
|
|
)
|
|
|
|
return [
|
|
(output_parameter, workflow_run_output_parameter)
|
|
for workflow_run_output_parameter in workflow_run_output_parameters
|
|
for output_parameter in output_parameters
|
|
if output_parameter.output_parameter_id == workflow_run_output_parameter.output_parameter_id
|
|
]
|
|
|
|
async def get_last_task_for_workflow_run(self, workflow_run_id: str) -> Task | None:
|
|
return await app.DATABASE.get_last_task_for_workflow_run(workflow_run_id=workflow_run_id)
|
|
|
|
async def get_tasks_by_workflow_run_id(self, workflow_run_id: str) -> list[Task]:
|
|
return await app.DATABASE.get_tasks_by_workflow_run_id(workflow_run_id=workflow_run_id)
|
|
|
|
async def build_workflow_run_status_response_by_workflow_id(
|
|
self,
|
|
workflow_run_id: str,
|
|
organization_id: str,
|
|
include_cost: bool = False,
|
|
) -> WorkflowRunResponse:
|
|
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
|
|
if workflow_run is None:
|
|
LOG.error(f"Workflow run {workflow_run_id} not found")
|
|
raise WorkflowRunNotFound(workflow_run_id=workflow_run_id)
|
|
workflow_permanent_id = workflow_run.workflow_permanent_id
|
|
return await self.build_workflow_run_status_response(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
workflow_run_id=workflow_run_id,
|
|
organization_id=organization_id,
|
|
include_cost=include_cost,
|
|
)
|
|
|
|
async def build_workflow_run_status_response(
|
|
self,
|
|
workflow_permanent_id: str,
|
|
workflow_run_id: str,
|
|
organization_id: str,
|
|
include_cost: bool = False,
|
|
) -> WorkflowRunResponse:
|
|
workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id)
|
|
if workflow is None:
|
|
LOG.error(f"Workflow {workflow_permanent_id} not found")
|
|
raise WorkflowNotFound(workflow_permanent_id=workflow_permanent_id)
|
|
|
|
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
|
|
workflow_run_tasks = await app.DATABASE.get_tasks_by_workflow_run_id(workflow_run_id=workflow_run_id)
|
|
screenshot_artifacts = []
|
|
screenshot_urls: list[str] | None = None
|
|
# get the last screenshot for the last 3 tasks of the workflow run
|
|
for task in workflow_run_tasks[::-1]:
|
|
screenshot_artifact = await app.DATABASE.get_latest_artifact(
|
|
task_id=task.task_id,
|
|
artifact_types=[
|
|
ArtifactType.SCREENSHOT_ACTION,
|
|
ArtifactType.SCREENSHOT_FINAL,
|
|
],
|
|
organization_id=organization_id,
|
|
)
|
|
if screenshot_artifact:
|
|
screenshot_artifacts.append(screenshot_artifact)
|
|
if len(screenshot_artifacts) >= 3:
|
|
break
|
|
if screenshot_artifacts:
|
|
screenshot_urls = await app.ARTIFACT_MANAGER.get_share_links(screenshot_artifacts)
|
|
|
|
recording_url = None
|
|
recording_artifact = await app.DATABASE.get_artifact_for_workflow_run(
|
|
workflow_run_id=workflow_run_id,
|
|
artifact_type=ArtifactType.RECORDING,
|
|
organization_id=organization_id,
|
|
)
|
|
if recording_artifact:
|
|
recording_url = await app.ARTIFACT_MANAGER.get_share_link(recording_artifact)
|
|
|
|
downloaded_files: list[FileInfo] | None = None
|
|
downloaded_file_urls: list[str] | None = None
|
|
try:
|
|
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
|
|
downloaded_files = await app.STORAGE.get_downloaded_files(
|
|
organization_id=workflow_run.organization_id,
|
|
task_id=None,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
if downloaded_files:
|
|
downloaded_file_urls = [file_info.url for file_info in downloaded_files]
|
|
except asyncio.TimeoutError:
|
|
LOG.warning(
|
|
"Timeout to get downloaded files",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
except Exception:
|
|
LOG.warning(
|
|
"Failed to get downloaded files",
|
|
exc_info=True,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
|
|
workflow_parameter_tuples = await app.DATABASE.get_workflow_run_parameters(workflow_run_id=workflow_run_id)
|
|
parameters_with_value = {wfp.key: wfrp.value for wfp, wfrp in workflow_parameter_tuples}
|
|
output_parameter_tuples: list[
|
|
tuple[OutputParameter, WorkflowRunOutputParameter]
|
|
] = await self.get_output_parameter_workflow_run_output_parameter_tuples(
|
|
workflow_id=workflow_run.workflow_id, workflow_run_id=workflow_run_id
|
|
)
|
|
|
|
outputs = None
|
|
EXTRACTED_INFORMATION_KEY = "extracted_information"
|
|
if output_parameter_tuples:
|
|
outputs = {output_parameter.key: output.value for output_parameter, output in output_parameter_tuples}
|
|
extracted_information = {
|
|
output_parameter.key: output.value[EXTRACTED_INFORMATION_KEY]
|
|
for output_parameter, output in output_parameter_tuples
|
|
if output.value is not None
|
|
and isinstance(output.value, dict)
|
|
and EXTRACTED_INFORMATION_KEY in output.value
|
|
and output.value[EXTRACTED_INFORMATION_KEY] is not None
|
|
}
|
|
outputs[EXTRACTED_INFORMATION_KEY] = extracted_information
|
|
|
|
total_steps = None
|
|
total_cost = None
|
|
if include_cost:
|
|
workflow_run_steps = await app.DATABASE.get_steps_by_task_ids(
|
|
task_ids=[task.task_id for task in workflow_run_tasks], organization_id=organization_id
|
|
)
|
|
workflow_run_blocks = await app.DATABASE.get_workflow_run_blocks(
|
|
workflow_run_id=workflow_run_id, organization_id=organization_id
|
|
)
|
|
text_prompt_blocks = [block for block in workflow_run_blocks if block.block_type == BlockType.TEXT_PROMPT]
|
|
total_steps = len(workflow_run_steps)
|
|
# TODO: This is a temporary cost calculation. We need to implement a more accurate cost calculation.
|
|
# successful steps are the ones that have a status of completed and the total count of unique step.order
|
|
successful_steps = [step for step in workflow_run_steps if step.status == StepStatus.completed]
|
|
total_cost = 0.1 * (len(successful_steps) + len(text_prompt_blocks))
|
|
return WorkflowRunResponse(
|
|
workflow_id=workflow.workflow_permanent_id,
|
|
workflow_run_id=workflow_run_id,
|
|
status=workflow_run.status,
|
|
failure_reason=workflow_run.failure_reason,
|
|
proxy_location=workflow_run.proxy_location,
|
|
webhook_callback_url=workflow_run.webhook_callback_url,
|
|
totp_verification_url=workflow_run.totp_verification_url,
|
|
totp_identifier=workflow_run.totp_identifier,
|
|
created_at=workflow_run.created_at,
|
|
modified_at=workflow_run.modified_at,
|
|
parameters=parameters_with_value,
|
|
screenshot_urls=screenshot_urls,
|
|
recording_url=recording_url,
|
|
downloaded_files=downloaded_files,
|
|
downloaded_file_urls=downloaded_file_urls,
|
|
outputs=outputs,
|
|
total_steps=total_steps,
|
|
total_cost=total_cost,
|
|
workflow_title=workflow.title,
|
|
)
|
|
|
|
async def clean_up_workflow(
|
|
self,
|
|
workflow: Workflow,
|
|
workflow_run: WorkflowRun,
|
|
api_key: str | None = None,
|
|
close_browser_on_completion: bool = True,
|
|
need_call_webhook: bool = True,
|
|
browser_session_id: str | None = None,
|
|
) -> None:
|
|
analytics.capture("skyvern-oss-agent-workflow-status", {"status": workflow_run.status})
|
|
tasks = await self.get_tasks_by_workflow_run_id(workflow_run.workflow_run_id)
|
|
all_workflow_task_ids = [task.task_id for task in tasks]
|
|
browser_state = await app.BROWSER_MANAGER.cleanup_for_workflow_run(
|
|
workflow_run.workflow_run_id,
|
|
all_workflow_task_ids,
|
|
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
|
|
browser_session_id=browser_session_id,
|
|
organization_id=workflow_run.organization_id,
|
|
)
|
|
if browser_state:
|
|
await self.persist_video_data(browser_state, workflow, workflow_run)
|
|
if tasks:
|
|
await self.persist_debug_artifacts(browser_state, tasks[-1], workflow, workflow_run)
|
|
if workflow.persist_browser_session and browser_state.browser_artifacts.browser_session_dir:
|
|
await app.STORAGE.store_browser_session(
|
|
workflow_run.organization_id,
|
|
workflow.workflow_permanent_id,
|
|
browser_state.browser_artifacts.browser_session_dir,
|
|
)
|
|
LOG.info("Persisted browser session for workflow run", workflow_run_id=workflow_run.workflow_run_id)
|
|
|
|
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks(all_workflow_task_ids)
|
|
|
|
try:
|
|
async with asyncio.timeout(SAVE_DOWNLOADED_FILES_TIMEOUT):
|
|
await app.STORAGE.save_downloaded_files(
|
|
workflow_run.organization_id, task_id=None, workflow_run_id=workflow_run.workflow_run_id
|
|
)
|
|
except asyncio.TimeoutError:
|
|
LOG.warning(
|
|
"Timeout to save downloaded files",
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
except Exception:
|
|
LOG.warning(
|
|
"Failed to save downloaded files",
|
|
exc_info=True,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
|
|
if not need_call_webhook:
|
|
return
|
|
|
|
await self.execute_workflow_webhook(workflow_run, api_key)
|
|
|
|
async def execute_workflow_webhook(
|
|
self,
|
|
workflow_run: WorkflowRun,
|
|
api_key: str | None = None,
|
|
) -> None:
|
|
workflow_id = workflow_run.workflow_id
|
|
workflow_run_status_response = await self.build_workflow_run_status_response(
|
|
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
organization_id=workflow_run.organization_id,
|
|
)
|
|
LOG.info(
|
|
"Built workflow run status response",
|
|
workflow_run_status_response=workflow_run_status_response,
|
|
)
|
|
|
|
if not workflow_run.webhook_callback_url:
|
|
LOG.warning(
|
|
"Workflow has no webhook callback url. Not sending workflow response",
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
return
|
|
|
|
if not api_key:
|
|
LOG.warning(
|
|
"Request has no api key. Not sending workflow response",
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
)
|
|
return
|
|
|
|
# send webhook to the webhook callback url
|
|
payload = workflow_run_status_response.model_dump_json()
|
|
headers = generate_skyvern_webhook_headers(
|
|
payload=payload,
|
|
api_key=api_key,
|
|
)
|
|
LOG.info(
|
|
"Sending webhook run status to webhook callback url",
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
webhook_callback_url=workflow_run.webhook_callback_url,
|
|
payload=payload,
|
|
headers=headers,
|
|
)
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(
|
|
url=workflow_run.webhook_callback_url, data=payload, headers=headers, timeout=httpx.Timeout(30.0)
|
|
)
|
|
if resp.status_code == 200:
|
|
LOG.info(
|
|
"Webhook sent successfully",
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
resp_code=resp.status_code,
|
|
resp_text=resp.text,
|
|
)
|
|
else:
|
|
LOG.info(
|
|
"Webhook failed",
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
webhook_data=payload,
|
|
resp=resp,
|
|
resp_code=resp.status_code,
|
|
resp_text=resp.text,
|
|
)
|
|
except Exception as e:
|
|
raise FailedToSendWebhook(
|
|
workflow_id=workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
) from e
|
|
|
|
async def persist_video_data(
|
|
self, browser_state: BrowserState, workflow: Workflow, workflow_run: WorkflowRun
|
|
) -> None:
|
|
# Create recording artifact after closing the browser, so we can get an accurate recording
|
|
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(
|
|
workflow_id=workflow.workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
browser_state=browser_state,
|
|
)
|
|
for video_artifact in video_artifacts:
|
|
await app.ARTIFACT_MANAGER.update_artifact_data(
|
|
artifact_id=video_artifact.video_artifact_id,
|
|
organization_id=workflow_run.organization_id,
|
|
data=video_artifact.video_data,
|
|
)
|
|
|
|
async def persist_har_data(
|
|
self,
|
|
browser_state: BrowserState,
|
|
last_step: Step,
|
|
workflow: Workflow,
|
|
workflow_run: WorkflowRun,
|
|
) -> None:
|
|
har_data = await app.BROWSER_MANAGER.get_har_data(
|
|
workflow_id=workflow.workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
browser_state=browser_state,
|
|
)
|
|
if har_data:
|
|
await app.ARTIFACT_MANAGER.create_artifact(
|
|
step=last_step,
|
|
artifact_type=ArtifactType.HAR,
|
|
data=har_data,
|
|
)
|
|
|
|
async def persist_browser_console_log(
|
|
self,
|
|
browser_state: BrowserState,
|
|
last_step: Step,
|
|
workflow: Workflow,
|
|
workflow_run: WorkflowRun,
|
|
) -> None:
|
|
browser_log = await app.BROWSER_MANAGER.get_browser_console_log(
|
|
workflow_id=workflow.workflow_id,
|
|
workflow_run_id=workflow_run.workflow_run_id,
|
|
browser_state=browser_state,
|
|
)
|
|
if browser_log:
|
|
await app.ARTIFACT_MANAGER.create_artifact(
|
|
step=last_step,
|
|
artifact_type=ArtifactType.BROWSER_CONSOLE_LOG,
|
|
data=browser_log,
|
|
)
|
|
|
|
async def persist_tracing_data(
|
|
self, browser_state: BrowserState, last_step: Step, workflow_run: WorkflowRun
|
|
) -> None:
|
|
if browser_state.browser_context is None or browser_state.browser_artifacts.traces_dir is None:
|
|
return
|
|
|
|
trace_path = f"{browser_state.browser_artifacts.traces_dir}/{workflow_run.workflow_run_id}.zip"
|
|
await app.ARTIFACT_MANAGER.create_artifact(step=last_step, artifact_type=ArtifactType.TRACE, path=trace_path)
|
|
|
|
async def persist_debug_artifacts(
|
|
self,
|
|
browser_state: BrowserState,
|
|
last_task: Task,
|
|
workflow: Workflow,
|
|
workflow_run: WorkflowRun,
|
|
) -> None:
|
|
last_step = await app.DATABASE.get_latest_step(
|
|
task_id=last_task.task_id, organization_id=last_task.organization_id
|
|
)
|
|
if not last_step:
|
|
return
|
|
|
|
await self.persist_browser_console_log(browser_state, last_step, workflow, workflow_run)
|
|
await self.persist_har_data(browser_state, last_step, workflow, workflow_run)
|
|
await self.persist_tracing_data(browser_state, last_step, workflow_run)
|
|
|
|
async def create_workflow_from_request(
|
|
self,
|
|
organization: Organization,
|
|
request: WorkflowCreateYAMLRequest,
|
|
workflow_permanent_id: str | None = None,
|
|
) -> Workflow:
|
|
organization_id = organization.organization_id
|
|
LOG.info(
|
|
"Creating workflow from request",
|
|
organization_id=organization_id,
|
|
title=request.title,
|
|
)
|
|
new_workflow_id: str | None = None
|
|
try:
|
|
if workflow_permanent_id:
|
|
existing_latest_workflow = await self.get_workflow_by_permanent_id(
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
exclude_deleted=False,
|
|
)
|
|
existing_version = existing_latest_workflow.version
|
|
workflow = await self.create_workflow(
|
|
title=request.title,
|
|
workflow_definition=WorkflowDefinition(parameters=[], blocks=[]),
|
|
description=request.description,
|
|
organization_id=organization_id,
|
|
proxy_location=request.proxy_location,
|
|
webhook_callback_url=request.webhook_callback_url,
|
|
totp_verification_url=request.totp_verification_url,
|
|
totp_identifier=request.totp_identifier,
|
|
persist_browser_session=request.persist_browser_session,
|
|
workflow_permanent_id=workflow_permanent_id,
|
|
version=existing_version + 1,
|
|
is_saved_task=request.is_saved_task,
|
|
status=request.status,
|
|
)
|
|
else:
|
|
workflow = await self.create_workflow(
|
|
title=request.title,
|
|
workflow_definition=WorkflowDefinition(parameters=[], blocks=[]),
|
|
description=request.description,
|
|
organization_id=organization_id,
|
|
proxy_location=request.proxy_location,
|
|
webhook_callback_url=request.webhook_callback_url,
|
|
totp_verification_url=request.totp_verification_url,
|
|
totp_identifier=request.totp_identifier,
|
|
persist_browser_session=request.persist_browser_session,
|
|
is_saved_task=request.is_saved_task,
|
|
status=request.status,
|
|
)
|
|
# Keeping track of the new workflow id to delete it if an error occurs during the creation process
|
|
new_workflow_id = workflow.workflow_id
|
|
# Create parameters from the request
|
|
parameters: dict[str, PARAMETER_TYPE] = {}
|
|
duplicate_parameter_keys = set()
|
|
|
|
# Check if user's trying to manually create an output parameter
|
|
if any(
|
|
parameter.parameter_type == ParameterType.OUTPUT for parameter in request.workflow_definition.parameters
|
|
):
|
|
raise InvalidWorkflowDefinition(message="Cannot manually create output parameters")
|
|
|
|
# Check if any parameter keys collide with automatically created output parameter keys
|
|
block_labels = [block.label for block in request.workflow_definition.blocks]
|
|
# TODO (kerem): Check if block labels are unique
|
|
output_parameter_keys = [f"{block_label}_output" for block_label in block_labels]
|
|
parameter_keys = [parameter.key for parameter in request.workflow_definition.parameters]
|
|
if any(key in output_parameter_keys for key in parameter_keys):
|
|
raise WorkflowDefinitionHasReservedParameterKeys(
|
|
reserved_keys=output_parameter_keys, parameter_keys=parameter_keys
|
|
)
|
|
|
|
# Create output parameters for all blocks
|
|
block_output_parameters = await WorkflowService._create_all_output_parameters_for_workflow(
|
|
workflow_id=workflow.workflow_id,
|
|
block_yamls=request.workflow_definition.blocks,
|
|
)
|
|
for block_output_parameter in block_output_parameters.values():
|
|
parameters[block_output_parameter.key] = block_output_parameter
|
|
|
|
# We're going to process context parameters after other parameters since they depend on the other parameters
|
|
context_parameter_yamls = []
|
|
|
|
for parameter in request.workflow_definition.parameters:
|
|
if parameter.key in parameters:
|
|
LOG.error(f"Duplicate parameter key {parameter.key}")
|
|
duplicate_parameter_keys.add(parameter.key)
|
|
continue
|
|
if parameter.parameter_type == ParameterType.AWS_SECRET:
|
|
parameters[parameter.key] = await self.create_aws_secret_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
aws_key=parameter.aws_key,
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.CREDENTIAL:
|
|
parameters[parameter.key] = await self.create_credential_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
credential_id=parameter.credential_id,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.BITWARDEN_LOGIN_CREDENTIAL:
|
|
if not parameter.bitwarden_collection_id and not parameter.bitwarden_item_id:
|
|
raise WorkflowParameterMissingRequiredValue(
|
|
workflow_parameter_type=ParameterType.BITWARDEN_LOGIN_CREDENTIAL,
|
|
workflow_parameter_key=parameter.key,
|
|
required_value="bitwarden_collection_id or bitwarden_item_id",
|
|
)
|
|
if parameter.bitwarden_collection_id and not parameter.url_parameter_key:
|
|
raise WorkflowParameterMissingRequiredValue(
|
|
workflow_parameter_type=ParameterType.BITWARDEN_LOGIN_CREDENTIAL,
|
|
workflow_parameter_key=parameter.key,
|
|
required_value="url_parameter_key",
|
|
)
|
|
parameters[parameter.key] = await self.create_bitwarden_login_credential_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
bitwarden_client_id_aws_secret_key=parameter.bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=parameter.bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=parameter.bitwarden_master_password_aws_secret_key,
|
|
url_parameter_key=parameter.url_parameter_key,
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
bitwarden_collection_id=parameter.bitwarden_collection_id,
|
|
bitwarden_item_id=parameter.bitwarden_item_id,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.BITWARDEN_SENSITIVE_INFORMATION:
|
|
parameters[parameter.key] = await self.create_bitwarden_sensitive_information_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
bitwarden_client_id_aws_secret_key=parameter.bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=parameter.bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=parameter.bitwarden_master_password_aws_secret_key,
|
|
# TODO: remove "# type: ignore" after ensuring bitwarden_collection_id is always set
|
|
bitwarden_collection_id=parameter.bitwarden_collection_id, # type: ignore
|
|
bitwarden_identity_key=parameter.bitwarden_identity_key,
|
|
bitwarden_identity_fields=parameter.bitwarden_identity_fields,
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.BITWARDEN_CREDIT_CARD_DATA:
|
|
parameters[parameter.key] = await self.create_bitwarden_credit_card_data_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
bitwarden_client_id_aws_secret_key=parameter.bitwarden_client_id_aws_secret_key,
|
|
bitwarden_client_secret_aws_secret_key=parameter.bitwarden_client_secret_aws_secret_key,
|
|
bitwarden_master_password_aws_secret_key=parameter.bitwarden_master_password_aws_secret_key,
|
|
# TODO: remove "# type: ignore" after ensuring bitwarden_collection_id is always set
|
|
bitwarden_collection_id=parameter.bitwarden_collection_id, # type: ignore
|
|
bitwarden_item_id=parameter.bitwarden_item_id, # type: ignore
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.WORKFLOW:
|
|
parameters[parameter.key] = await self.create_workflow_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
workflow_parameter_type=parameter.workflow_parameter_type,
|
|
key=parameter.key,
|
|
default_value=parameter.default_value,
|
|
description=parameter.description,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.OUTPUT:
|
|
parameters[parameter.key] = await self.create_output_parameter(
|
|
workflow_id=workflow.workflow_id,
|
|
key=parameter.key,
|
|
description=parameter.description,
|
|
)
|
|
elif parameter.parameter_type == ParameterType.CONTEXT:
|
|
context_parameter_yamls.append(parameter)
|
|
else:
|
|
LOG.error(f"Invalid parameter type {parameter.parameter_type}")
|
|
|
|
# Now we can process the context parameters since all other parameters have been created
|
|
for context_parameter in context_parameter_yamls:
|
|
if context_parameter.source_parameter_key not in parameters:
|
|
raise ContextParameterSourceNotDefined(
|
|
context_parameter_key=context_parameter.key,
|
|
source_key=context_parameter.source_parameter_key,
|
|
)
|
|
|
|
if context_parameter.key in parameters:
|
|
LOG.error(f"Duplicate parameter key {context_parameter.key}")
|
|
duplicate_parameter_keys.add(context_parameter.key)
|
|
continue
|
|
|
|
# We're only adding the context parameter to the parameters dict, we're not creating it in the database
|
|
# It'll only be stored in the `workflow.workflow_definition`
|
|
# todo (kerem): should we have a database table for context parameters?
|
|
parameters[context_parameter.key] = ContextParameter(
|
|
key=context_parameter.key,
|
|
description=context_parameter.description,
|
|
source=parameters[context_parameter.source_parameter_key],
|
|
# Context parameters don't have a default value, the value always depends on the source parameter
|
|
value=None,
|
|
)
|
|
|
|
if duplicate_parameter_keys:
|
|
raise WorkflowDefinitionHasDuplicateParameterKeys(duplicate_keys=duplicate_parameter_keys)
|
|
# Create blocks from the request
|
|
block_label_mapping = {}
|
|
blocks = []
|
|
for block_yaml in request.workflow_definition.blocks:
|
|
block = await self.block_yaml_to_block(workflow, block_yaml, parameters)
|
|
blocks.append(block)
|
|
block_label_mapping[block.label] = block
|
|
|
|
# Set the blocks for the workflow definition
|
|
workflow_definition = WorkflowDefinition(parameters=parameters.values(), blocks=blocks)
|
|
workflow = await self.update_workflow(
|
|
workflow_id=workflow.workflow_id,
|
|
organization_id=organization_id,
|
|
workflow_definition=workflow_definition,
|
|
)
|
|
LOG.info(
|
|
f"Created workflow from request, title: {request.title}",
|
|
parameter_keys=[parameter.key for parameter in parameters.values()],
|
|
block_labels=[block.label for block in blocks],
|
|
organization_id=organization_id,
|
|
title=request.title,
|
|
workflow_id=workflow.workflow_id,
|
|
)
|
|
return workflow
|
|
except Exception as e:
|
|
if new_workflow_id:
|
|
LOG.error(
|
|
f"Failed to create workflow from request, deleting workflow {new_workflow_id}",
|
|
organization_id=organization_id,
|
|
)
|
|
await self.delete_workflow_by_id(workflow_id=new_workflow_id, organization_id=organization_id)
|
|
else:
|
|
LOG.exception(f"Failed to create workflow from request, title: {request.title}")
|
|
raise e
|
|
|
|
@staticmethod
|
|
async def create_output_parameter_for_block(workflow_id: str, block_yaml: BLOCK_YAML_TYPES) -> OutputParameter:
|
|
output_parameter_key = f"{block_yaml.label}_output"
|
|
return await app.DATABASE.create_output_parameter(
|
|
workflow_id=workflow_id,
|
|
key=output_parameter_key,
|
|
description=f"Output parameter for block {block_yaml.label}",
|
|
)
|
|
|
|
@staticmethod
|
|
async def _create_all_output_parameters_for_workflow(
|
|
workflow_id: str, block_yamls: list[BLOCK_YAML_TYPES]
|
|
) -> dict[str, OutputParameter]:
|
|
output_parameters = {}
|
|
for block_yaml in block_yamls:
|
|
output_parameter = await WorkflowService.create_output_parameter_for_block(
|
|
workflow_id=workflow_id, block_yaml=block_yaml
|
|
)
|
|
output_parameters[block_yaml.label] = output_parameter
|
|
# Recursively create output parameters for for loop blocks
|
|
if isinstance(block_yaml, ForLoopBlockYAML):
|
|
output_parameters.update(
|
|
await WorkflowService._create_all_output_parameters_for_workflow(
|
|
workflow_id=workflow_id, block_yamls=block_yaml.loop_blocks
|
|
)
|
|
)
|
|
return output_parameters
|
|
|
|
@staticmethod
|
|
async def block_yaml_to_block(
|
|
workflow: Workflow,
|
|
block_yaml: BLOCK_YAML_TYPES,
|
|
parameters: dict[str, Parameter],
|
|
) -> BlockTypeVar:
|
|
output_parameter = parameters[f"{block_yaml.label}_output"]
|
|
if block_yaml.block_type == BlockType.TASK:
|
|
task_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
return TaskBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
parameters=task_block_parameters,
|
|
output_parameter=output_parameter,
|
|
navigation_goal=block_yaml.navigation_goal,
|
|
data_extraction_goal=block_yaml.data_extraction_goal,
|
|
data_schema=block_yaml.data_schema,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
max_steps_per_run=block_yaml.max_steps_per_run,
|
|
max_retries=block_yaml.max_retries,
|
|
complete_on_download=block_yaml.complete_on_download,
|
|
download_suffix=block_yaml.download_suffix,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
cache_actions=block_yaml.cache_actions,
|
|
complete_criterion=block_yaml.complete_criterion,
|
|
terminate_criterion=block_yaml.terminate_criterion,
|
|
complete_verification=block_yaml.complete_verification,
|
|
)
|
|
elif block_yaml.block_type == BlockType.FOR_LOOP:
|
|
loop_blocks = [
|
|
await WorkflowService.block_yaml_to_block(workflow, loop_block, parameters)
|
|
for loop_block in block_yaml.loop_blocks
|
|
]
|
|
|
|
loop_over_parameter: Parameter | None = None
|
|
if block_yaml.loop_over_parameter_key:
|
|
loop_over_parameter = parameters[block_yaml.loop_over_parameter_key]
|
|
|
|
if block_yaml.loop_variable_reference:
|
|
# it's backaward compatible with jinja style parameter and context paramter
|
|
# we trim the format like {{ loop_key }} into loop_key to initialize the context parater,
|
|
# otherwise it might break the context parameter initialization chain, blow up the worklofw parameters
|
|
# TODO: consider remove this if we totally give up context parameter
|
|
trimmed_key = block_yaml.loop_variable_reference.strip(" {}")
|
|
if trimmed_key in parameters:
|
|
loop_over_parameter = parameters[trimmed_key]
|
|
|
|
if loop_over_parameter is None and not block_yaml.loop_variable_reference:
|
|
raise Exception("Loop value parameter is required for for loop block")
|
|
|
|
return ForLoopBlock(
|
|
label=block_yaml.label,
|
|
loop_over=loop_over_parameter,
|
|
loop_variable_reference=block_yaml.loop_variable_reference,
|
|
loop_blocks=loop_blocks,
|
|
output_parameter=output_parameter,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
complete_if_empty=block_yaml.complete_if_empty,
|
|
)
|
|
elif block_yaml.block_type == BlockType.CODE:
|
|
return CodeBlock(
|
|
label=block_yaml.label,
|
|
code=block_yaml.code,
|
|
parameters=(
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
),
|
|
output_parameter=output_parameter,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.TEXT_PROMPT:
|
|
return TextPromptBlock(
|
|
label=block_yaml.label,
|
|
llm_key=block_yaml.llm_key,
|
|
prompt=block_yaml.prompt,
|
|
parameters=(
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
),
|
|
json_schema=block_yaml.json_schema,
|
|
output_parameter=output_parameter,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.DOWNLOAD_TO_S3:
|
|
return DownloadToS3Block(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
url=block_yaml.url,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.UPLOAD_TO_S3:
|
|
return UploadToS3Block(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
path=block_yaml.path,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.FILE_UPLOAD:
|
|
return FileUploadBlock(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
storage_type=block_yaml.storage_type,
|
|
s3_bucket=block_yaml.s3_bucket,
|
|
aws_access_key_id=block_yaml.aws_access_key_id,
|
|
aws_secret_access_key=block_yaml.aws_secret_access_key,
|
|
region_name=block_yaml.region_name,
|
|
path=block_yaml.path,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.SEND_EMAIL:
|
|
return SendEmailBlock(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
smtp_host=parameters[block_yaml.smtp_host_secret_parameter_key],
|
|
smtp_port=parameters[block_yaml.smtp_port_secret_parameter_key],
|
|
smtp_username=parameters[block_yaml.smtp_username_secret_parameter_key],
|
|
smtp_password=parameters[block_yaml.smtp_password_secret_parameter_key],
|
|
sender=block_yaml.sender,
|
|
recipients=block_yaml.recipients,
|
|
subject=block_yaml.subject,
|
|
body=block_yaml.body,
|
|
file_attachments=block_yaml.file_attachments or [],
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.FILE_URL_PARSER:
|
|
return FileParserBlock(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
file_url=block_yaml.file_url,
|
|
file_type=block_yaml.file_type,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.PDF_PARSER:
|
|
return PDFParserBlock(
|
|
label=block_yaml.label,
|
|
output_parameter=output_parameter,
|
|
file_url=block_yaml.file_url,
|
|
json_schema=block_yaml.json_schema,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
)
|
|
elif block_yaml.block_type == BlockType.VALIDATION:
|
|
validation_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
|
|
if not block_yaml.complete_criterion and not block_yaml.terminate_criterion:
|
|
raise Exception("Both complete criterion and terminate criterion are empty")
|
|
|
|
return ValidationBlock(
|
|
label=block_yaml.label,
|
|
task_type=TaskType.validation,
|
|
parameters=validation_block_parameters,
|
|
output_parameter=output_parameter,
|
|
complete_criterion=block_yaml.complete_criterion,
|
|
terminate_criterion=block_yaml.terminate_criterion,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
# only need one step for validation block
|
|
max_steps_per_run=1,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.ACTION:
|
|
action_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
|
|
if not block_yaml.navigation_goal:
|
|
raise Exception("empty action instruction")
|
|
|
|
return ActionBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
task_type=TaskType.action,
|
|
parameters=action_block_parameters,
|
|
output_parameter=output_parameter,
|
|
navigation_goal=block_yaml.navigation_goal,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
max_retries=block_yaml.max_retries,
|
|
complete_on_download=block_yaml.complete_on_download,
|
|
download_suffix=block_yaml.download_suffix,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
cache_actions=block_yaml.cache_actions,
|
|
# DO NOT run complete verification for action block
|
|
complete_verification=False,
|
|
max_steps_per_run=1,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.NAVIGATION:
|
|
navigation_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
return NavigationBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
parameters=navigation_block_parameters,
|
|
output_parameter=output_parameter,
|
|
navigation_goal=block_yaml.navigation_goal,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
max_steps_per_run=block_yaml.max_steps_per_run,
|
|
max_retries=block_yaml.max_retries,
|
|
complete_on_download=block_yaml.complete_on_download,
|
|
download_suffix=block_yaml.download_suffix,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
cache_actions=block_yaml.cache_actions,
|
|
complete_criterion=block_yaml.complete_criterion,
|
|
terminate_criterion=block_yaml.terminate_criterion,
|
|
complete_verification=block_yaml.complete_verification,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.EXTRACTION:
|
|
extraction_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
return ExtractionBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
parameters=extraction_block_parameters,
|
|
output_parameter=output_parameter,
|
|
data_extraction_goal=block_yaml.data_extraction_goal,
|
|
data_schema=block_yaml.data_schema,
|
|
max_steps_per_run=block_yaml.max_steps_per_run,
|
|
max_retries=block_yaml.max_retries,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
cache_actions=block_yaml.cache_actions,
|
|
complete_verification=False,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.LOGIN:
|
|
login_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
return LoginBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
parameters=login_block_parameters,
|
|
output_parameter=output_parameter,
|
|
navigation_goal=block_yaml.navigation_goal,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
max_steps_per_run=block_yaml.max_steps_per_run,
|
|
max_retries=block_yaml.max_retries,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
cache_actions=block_yaml.cache_actions,
|
|
complete_criterion=block_yaml.complete_criterion,
|
|
terminate_criterion=block_yaml.terminate_criterion,
|
|
complete_verification=block_yaml.complete_verification,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.WAIT:
|
|
if block_yaml.wait_sec <= 0 or block_yaml.wait_sec > settings.WORKFLOW_WAIT_BLOCK_MAX_SEC:
|
|
raise InvalidWaitBlockTime(settings.WORKFLOW_WAIT_BLOCK_MAX_SEC)
|
|
|
|
return WaitBlock(
|
|
label=block_yaml.label,
|
|
wait_sec=block_yaml.wait_sec,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
output_parameter=output_parameter,
|
|
)
|
|
|
|
elif block_yaml.block_type == BlockType.FILE_DOWNLOAD:
|
|
file_download_block_parameters = (
|
|
[parameters[parameter_key] for parameter_key in block_yaml.parameter_keys]
|
|
if block_yaml.parameter_keys
|
|
else []
|
|
)
|
|
return FileDownloadBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
title=block_yaml.title,
|
|
parameters=file_download_block_parameters,
|
|
output_parameter=output_parameter,
|
|
navigation_goal=block_yaml.navigation_goal,
|
|
error_code_mapping=block_yaml.error_code_mapping,
|
|
max_steps_per_run=block_yaml.max_steps_per_run,
|
|
max_retries=block_yaml.max_retries,
|
|
download_suffix=block_yaml.download_suffix,
|
|
continue_on_failure=block_yaml.continue_on_failure,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
cache_actions=block_yaml.cache_actions,
|
|
complete_on_download=True,
|
|
complete_verification=True,
|
|
)
|
|
elif block_yaml.block_type == BlockType.TaskV2:
|
|
return TaskV2Block(
|
|
label=block_yaml.label,
|
|
prompt=block_yaml.prompt,
|
|
url=block_yaml.url,
|
|
totp_verification_url=block_yaml.totp_verification_url,
|
|
totp_identifier=block_yaml.totp_identifier,
|
|
max_iterations=block_yaml.max_iterations,
|
|
max_steps=block_yaml.max_steps,
|
|
output_parameter=output_parameter,
|
|
)
|
|
elif block_yaml.block_type == BlockType.GOTO_URL:
|
|
return UrlBlock(
|
|
label=block_yaml.label,
|
|
url=block_yaml.url,
|
|
output_parameter=output_parameter,
|
|
complete_verification=False,
|
|
)
|
|
|
|
raise ValueError(f"Invalid block type {block_yaml.block_type}")
|
|
|
|
async def create_empty_workflow(
|
|
self,
|
|
organization: Organization,
|
|
title: str,
|
|
proxy_location: ProxyLocation | None = None,
|
|
status: WorkflowStatus = WorkflowStatus.published,
|
|
) -> Workflow:
|
|
"""
|
|
Create a blank workflow with no blocks
|
|
"""
|
|
# create a new workflow
|
|
workflow_create_request = WorkflowCreateYAMLRequest(
|
|
title=title,
|
|
workflow_definition=WorkflowDefinitionYAML(
|
|
parameters=[],
|
|
blocks=[],
|
|
),
|
|
proxy_location=proxy_location,
|
|
status=status,
|
|
)
|
|
return await app.WORKFLOW_SERVICE.create_workflow_from_request(
|
|
organization=organization,
|
|
request=workflow_create_request,
|
|
)
|
|
|
|
async def get_workflow_run_timeline(
|
|
self,
|
|
workflow_run_id: str,
|
|
organization_id: str | None = None,
|
|
) -> list[WorkflowRunTimeline]:
|
|
"""
|
|
build the tree structure of the workflow run timeline
|
|
"""
|
|
workflow_run_blocks = await app.DATABASE.get_workflow_run_blocks(
|
|
workflow_run_id=workflow_run_id,
|
|
organization_id=organization_id,
|
|
)
|
|
# get all the actions for all workflow run blocks
|
|
task_ids = [block.task_id for block in workflow_run_blocks if block.task_id]
|
|
task_id_to_block: dict[str, WorkflowRunBlock] = {
|
|
block.task_id: block for block in workflow_run_blocks if block.task_id
|
|
}
|
|
actions = await app.DATABASE.get_tasks_actions(task_ids=task_ids, organization_id=organization_id)
|
|
for action in actions:
|
|
if not action.task_id:
|
|
continue
|
|
task_block = task_id_to_block[action.task_id]
|
|
task_block.actions.append(action)
|
|
|
|
result = []
|
|
block_map: dict[str, WorkflowRunTimeline] = {}
|
|
counter = 0
|
|
while workflow_run_blocks:
|
|
counter += 1
|
|
block = workflow_run_blocks.pop(0)
|
|
workflow_run_timeline = WorkflowRunTimeline(
|
|
type=WorkflowRunTimelineType.block,
|
|
block=block,
|
|
created_at=block.created_at,
|
|
modified_at=block.modified_at,
|
|
)
|
|
if block.parent_workflow_run_block_id:
|
|
if block.parent_workflow_run_block_id in block_map:
|
|
block_map[block.parent_workflow_run_block_id].children.append(workflow_run_timeline)
|
|
else:
|
|
# put the block back to the queue
|
|
workflow_run_blocks.append(block)
|
|
else:
|
|
result.append(workflow_run_timeline)
|
|
block_map[block.workflow_run_block_id] = workflow_run_timeline
|
|
|
|
if counter > 1000:
|
|
LOG.error("Too many blocks in the workflow run", workflow_run_id=workflow_run_id)
|
|
break
|
|
|
|
return result
|