mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
6590 lines
285 KiB
Python
6590 lines
285 KiB
Python
from __future__ import annotations
|
||
|
||
import abc
|
||
import ast
|
||
import asyncio
|
||
import csv
|
||
import json
|
||
import os
|
||
import re
|
||
import smtplib
|
||
import textwrap
|
||
import uuid
|
||
from collections import defaultdict, deque
|
||
from datetime import datetime, timezone
|
||
from email.message import EmailMessage
|
||
from pathlib import Path
|
||
from types import SimpleNamespace
|
||
from typing import Annotated, Any, Awaitable, Callable, ClassVar, Literal, Union, cast
|
||
from urllib.parse import quote, urlparse
|
||
|
||
import aiofiles
|
||
import aiohttp
|
||
import docx
|
||
import filetype
|
||
import pandas as pd
|
||
import pyotp
|
||
import structlog
|
||
from charset_normalizer import from_bytes
|
||
from email_validator import EmailNotValidError, validate_email
|
||
from jinja2 import StrictUndefined
|
||
from jinja2.sandbox import SandboxedEnvironment
|
||
from playwright.async_api import Page
|
||
from pydantic import BaseModel, Field, model_validator
|
||
|
||
from skyvern.config import settings
|
||
from skyvern.constants import (
|
||
AZURE_BLOB_STORAGE_MAX_UPLOAD_FILE_COUNT,
|
||
GET_DOWNLOADED_FILES_TIMEOUT,
|
||
MAX_FILE_PARSE_INPUT_TOKENS,
|
||
MAX_UPLOAD_FILE_COUNT,
|
||
)
|
||
from skyvern.exceptions import (
|
||
AzureConfigurationError,
|
||
ContextParameterValueNotFound,
|
||
DownloadFileMaxSizeExceeded,
|
||
MissingBrowserState,
|
||
MissingBrowserStatePage,
|
||
PDFParsingError,
|
||
TaskNotFound,
|
||
UnexpectedTaskStatus,
|
||
get_user_facing_exception_message,
|
||
)
|
||
from skyvern.forge import app
|
||
from skyvern.forge.prompts import prompt_engine
|
||
from skyvern.forge.sdk.api import email
|
||
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
||
from skyvern.forge.sdk.api.files import (
|
||
calculate_sha256_for_file,
|
||
create_named_temporary_file,
|
||
download_file,
|
||
download_from_s3,
|
||
get_download_dir,
|
||
get_path_for_workflow_download_directory,
|
||
parse_uri_to_path,
|
||
)
|
||
from skyvern.forge.sdk.api.llm.api_handler import LLMAPIHandler
|
||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
||
from skyvern.forge.sdk.artifact.models import ArtifactType
|
||
from skyvern.forge.sdk.core import skyvern_context
|
||
from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_request
|
||
from skyvern.forge.sdk.db.enums import TaskType
|
||
from skyvern.forge.sdk.db.exceptions import NotFoundError
|
||
from skyvern.forge.sdk.experimentation.llm_prompt_config import get_llm_handler_for_prompt_type
|
||
from skyvern.forge.sdk.models import Step
|
||
from skyvern.forge.sdk.schemas.files import FileInfo
|
||
from skyvern.forge.sdk.schemas.task_v2 import TaskV2Status
|
||
from skyvern.forge.sdk.schemas.tasks import Task, TaskOutput, TaskStatus
|
||
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
|
||
from skyvern.forge.sdk.services.credentials import AzureVaultConstants, OnePasswordConstants
|
||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||
from skyvern.forge.sdk.trace import traced
|
||
from skyvern.forge.sdk.utils.pdf_parser import extract_pdf_file, validate_pdf_file
|
||
from skyvern.forge.sdk.utils.sanitization import sanitize_postgres_text
|
||
from skyvern.forge.sdk.workflow.context_manager import BlockMetadata, WorkflowRunContext
|
||
from skyvern.forge.sdk.workflow.exceptions import (
|
||
CustomizedCodeException,
|
||
FailedToFormatJinjaStyleParameter,
|
||
InsecureCodeDetected,
|
||
InvalidEmailClientConfiguration,
|
||
InvalidFileType,
|
||
InvalidWorkflowDefinition,
|
||
MissingJinjaVariables,
|
||
NoIterableValueFound,
|
||
NoValidEmailRecipient,
|
||
)
|
||
from skyvern.forge.sdk.workflow.models.parameter import (
|
||
PARAMETER_TYPE,
|
||
AWSSecretParameter,
|
||
ContextParameter,
|
||
OutputParameter,
|
||
ParameterType,
|
||
WorkflowParameter,
|
||
)
|
||
from skyvern.schemas.runs import RunEngine
|
||
from skyvern.schemas.workflows import BlockResult, BlockStatus, BlockType, FileStorageType, FileType
|
||
from skyvern.services.error_detection_service import detect_user_defined_errors_for_task
|
||
from skyvern.utils.strings import generate_random_string
|
||
from skyvern.utils.templating import get_missing_variables
|
||
from skyvern.utils.token_counter import count_tokens
|
||
from skyvern.utils.url_validators import prepend_scheme_and_validate_url
|
||
from skyvern.webeye.browser_state import BrowserState
|
||
from skyvern.webeye.utils.page import SkyvernFrame
|
||
|
||
LOG = structlog.get_logger()
|
||
|
||
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict":
|
||
jinja_sandbox_env = SandboxedEnvironment(undefined=StrictUndefined)
|
||
else:
|
||
jinja_sandbox_env = SandboxedEnvironment()
|
||
|
||
|
||
# Date format used for the built-in {{current_date}} reserved parameter.
|
||
CURRENT_DATE_FORMAT = "%Y-%m-%d"
|
||
|
||
# Sentinel marker for native JSON type injection via | json filter.
|
||
_JSON_TYPE_MARKER = "__SKYVERN_RAW_JSON__"
|
||
|
||
|
||
def _json_type_filter(value: Any) -> str:
|
||
"""Jinja filter that marks a value for native JSON type injection.
|
||
|
||
Usage in templates: {{ some_bool | json }}
|
||
|
||
The filter serializes the value to JSON and wraps it with sentinel markers.
|
||
When _render_templates_in_json() detects these markers, it unwraps and
|
||
parses the JSON to get the native typed value (bool, int, list, etc.).
|
||
|
||
Uses default=str to handle non-JSON-serializable types (datetime, Enum, etc.)
|
||
"""
|
||
return f"{_JSON_TYPE_MARKER}{json.dumps(value, default=str)}{_JSON_TYPE_MARKER}"
|
||
|
||
|
||
jinja_sandbox_env.filters["json"] = _json_type_filter
|
||
|
||
|
||
# Mapping from TaskV2Status to the corresponding BlockStatus. Declared once at
|
||
# import time so it is not recreated on each block execution.
|
||
TASKV2_TO_BLOCK_STATUS: dict[TaskV2Status, BlockStatus] = {
|
||
TaskV2Status.completed: BlockStatus.completed,
|
||
TaskV2Status.terminated: BlockStatus.terminated,
|
||
TaskV2Status.failed: BlockStatus.failed,
|
||
TaskV2Status.canceled: BlockStatus.canceled,
|
||
TaskV2Status.timed_out: BlockStatus.timed_out,
|
||
}
|
||
|
||
# ForLoop constants
|
||
DEFAULT_MAX_LOOP_ITERATIONS = 100
|
||
DEFAULT_MAX_STEPS_PER_ITERATION = 50
|
||
|
||
|
||
class Block(BaseModel, abc.ABC):
|
||
"""Base class for workflow nodes (see branching spec [[s-4bnl]] for metadata semantics)."""
|
||
|
||
# Must be unique within workflow definition
|
||
label: str = Field(description="Author-facing identifier for a block; unique within a workflow.")
|
||
next_block_label: str | None = Field(
|
||
default=None,
|
||
description="Optional pointer to the next block label when constructing a DAG. "
|
||
"Defaults to sequential order when omitted.",
|
||
)
|
||
block_type: BlockType
|
||
output_parameter: OutputParameter
|
||
continue_on_failure: bool = False
|
||
model: dict[str, Any] | None = None
|
||
disable_cache: bool = False
|
||
|
||
# Only valid for blocks inside a for loop block
|
||
# Whether to continue to the next iteration when the block fails
|
||
next_loop_on_failure: bool = False
|
||
|
||
@property
|
||
def override_llm_key(self) -> str | None:
|
||
"""
|
||
If the `Block` has a `model` defined, then return the mapped llm_key for it.
|
||
|
||
Otherwise return `None`.
|
||
"""
|
||
if self.model:
|
||
model_name = self.model.get("model_name")
|
||
if model_name:
|
||
mapping = SettingsManager.get_settings().get_model_name_to_llm_key()
|
||
return mapping.get(model_name, {}).get("llm_key")
|
||
|
||
return None
|
||
|
||
async def record_output_parameter_value(
|
||
self,
|
||
workflow_run_context: WorkflowRunContext,
|
||
workflow_run_id: str,
|
||
value: dict[str, Any] | list | str | None = None,
|
||
) -> None:
|
||
await workflow_run_context.register_output_parameter_value_post_execution(
|
||
parameter=self.output_parameter,
|
||
value=value,
|
||
)
|
||
await app.DATABASE.create_or_update_workflow_run_output_parameter(
|
||
workflow_run_id=workflow_run_id,
|
||
output_parameter_id=self.output_parameter.output_parameter_id,
|
||
value=value,
|
||
)
|
||
LOG.info(
|
||
"Registered output parameter value",
|
||
output_parameter_id=self.output_parameter.output_parameter_id,
|
||
workflow_run_id=workflow_run_id,
|
||
output_parameter_value=value,
|
||
)
|
||
|
||
async def build_block_result(
|
||
self,
|
||
success: bool,
|
||
failure_reason: str | None,
|
||
output_parameter_value: dict[str, Any] | list | str | None = None,
|
||
status: BlockStatus | None = None,
|
||
workflow_run_block_id: str | None = None,
|
||
organization_id: str | None = None,
|
||
executed_branch_id: str | None = None,
|
||
executed_branch_expression: str | None = None,
|
||
executed_branch_result: bool | None = None,
|
||
executed_branch_next_block: str | None = None,
|
||
) -> BlockResult:
|
||
# TODO: update workflow run block status and failure reason
|
||
if isinstance(output_parameter_value, str):
|
||
output_parameter_value = {"value": output_parameter_value}
|
||
|
||
if workflow_run_block_id:
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
output=output_parameter_value,
|
||
status=status,
|
||
failure_reason=failure_reason,
|
||
organization_id=organization_id,
|
||
executed_branch_id=executed_branch_id,
|
||
executed_branch_expression=executed_branch_expression,
|
||
executed_branch_result=executed_branch_result,
|
||
executed_branch_next_block=executed_branch_next_block,
|
||
)
|
||
return BlockResult(
|
||
success=success,
|
||
failure_reason=failure_reason,
|
||
output_parameter=self.output_parameter,
|
||
output_parameter_value=output_parameter_value,
|
||
status=status,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
)
|
||
|
||
async def get_or_create_browser_state(
|
||
self,
|
||
workflow_run_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
) -> BrowserState | None:
|
||
"""
|
||
Acquire or create browser state for block execution.
|
||
|
||
Checks persistent sessions first (debugger use case), then falls back to
|
||
workflow run browser manager. If no state exists, creates a new one.
|
||
|
||
Returns BrowserState if successful, None if creation failed.
|
||
"""
|
||
browser_state: BrowserState | None = None
|
||
|
||
if browser_session_id and organization_id:
|
||
browser_state = await app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id, organization_id)
|
||
else:
|
||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
|
||
|
||
if not browser_state:
|
||
workflow_run = await app.WORKFLOW_SERVICE.get_workflow_run(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
try:
|
||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||
workflow_run=workflow_run,
|
||
url=None,
|
||
browser_session_id=browser_session_id,
|
||
browser_profile_id=workflow_run.browser_profile_id,
|
||
)
|
||
await browser_state.check_and_fix_state(
|
||
url=None,
|
||
proxy_location=workflow_run.proxy_location,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
||
organization_id=workflow_run.organization_id,
|
||
extra_http_headers=workflow_run.extra_http_headers,
|
||
browser_address=workflow_run.browser_address,
|
||
browser_profile_id=workflow_run.browser_profile_id,
|
||
)
|
||
except Exception:
|
||
LOG.exception(
|
||
"Failed to create browser state",
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
return None
|
||
|
||
return browser_state
|
||
|
||
def format_block_parameter_template_from_workflow_run_context(
|
||
self,
|
||
potential_template: str,
|
||
workflow_run_context: WorkflowRunContext,
|
||
*,
|
||
force_include_secrets: bool = False,
|
||
) -> str:
|
||
"""
|
||
Format a template string using the workflow run context.
|
||
|
||
Security Note:
|
||
Real secret values are ONLY resolved for blocks that do NOT expose data to the LLM
|
||
(like HttpRequestBlock, CodeBlock), as determined by is_safe_block_for_secrets.
|
||
"""
|
||
if not potential_template:
|
||
return potential_template
|
||
|
||
# Security: only allow real secret values for non-LLM blocks (HttpRequestBlock, CodeBlock)
|
||
is_safe_block_for_secrets = self.block_type in [
|
||
BlockType.CODE,
|
||
BlockType.HTTP_REQUEST,
|
||
BlockType.WORKFLOW_TRIGGER,
|
||
]
|
||
|
||
template = jinja_sandbox_env.from_string(potential_template)
|
||
|
||
block_reference_data: dict[str, Any] = workflow_run_context.get_block_metadata(self.label)
|
||
template_data = workflow_run_context.values.copy()
|
||
|
||
include_secrets = workflow_run_context.include_secrets_in_templates or force_include_secrets
|
||
|
||
# FORCE DISABLE if block is not safe (sends data to LLM)
|
||
if include_secrets and not is_safe_block_for_secrets:
|
||
include_secrets = False
|
||
|
||
if include_secrets:
|
||
template_data.update(workflow_run_context.secrets)
|
||
|
||
# Create easier-to-access entries for credentials
|
||
# Look for credential parameters and create real_username/real_password entries
|
||
# First collect all credential parameters to avoid modifying dict during iteration
|
||
credential_params = []
|
||
for key, value in list(template_data.items()):
|
||
if isinstance(value, dict) and "context" in value:
|
||
# PASSWORD credential: has username and password
|
||
if "username" in value and "password" in value:
|
||
credential_params.append((key, value))
|
||
# SECRET credential: has secret_value
|
||
elif "secret_value" in value:
|
||
credential_params.append((key, value))
|
||
|
||
# Now add the real_username/real_password entries
|
||
for key, value in credential_params:
|
||
username_secret_id = value.get("username", "")
|
||
password_secret_id = value.get("password", "")
|
||
|
||
# Get the actual values from the secrets
|
||
real_username = template_data.get(username_secret_id, "")
|
||
real_password = template_data.get(password_secret_id, "")
|
||
|
||
# Add easier-to-access entries
|
||
template_data[f"{key}_real_username"] = real_username
|
||
template_data[f"{key}_real_password"] = real_password
|
||
|
||
if is_safe_block_for_secrets:
|
||
resolved_credential = value.copy()
|
||
for credential_field, credential_placeholder in value.items():
|
||
if credential_field == "context":
|
||
continue
|
||
secret_value = workflow_run_context.get_original_secret_value_or_none(credential_placeholder)
|
||
if secret_value is not None:
|
||
resolved_credential[credential_field] = secret_value
|
||
resolved_credential.pop("context", None)
|
||
template_data[key] = resolved_credential
|
||
|
||
if self.label in template_data:
|
||
current_value = template_data[self.label]
|
||
if isinstance(current_value, dict):
|
||
block_reference_data.update(current_value)
|
||
else:
|
||
LOG.warning(
|
||
f"Parameter {self.label} has a registered reference value, going to overwrite it by block metadata"
|
||
)
|
||
|
||
template_data[self.label] = block_reference_data
|
||
|
||
# TODO (suchintan): This is pretty hacky - we should have a standard way to initialize the workflow run context
|
||
# inject the forloop metadata as global variables
|
||
if "current_index" in block_reference_data:
|
||
template_data["current_index"] = block_reference_data["current_index"]
|
||
if "current_item" in block_reference_data:
|
||
template_data["current_item"] = block_reference_data["current_item"]
|
||
if "current_value" in block_reference_data:
|
||
template_data["current_value"] = block_reference_data["current_value"]
|
||
|
||
# Initialize workflow-level parameters
|
||
if "workflow_title" not in template_data:
|
||
template_data["workflow_title"] = workflow_run_context.workflow_title
|
||
if "workflow_id" not in template_data:
|
||
template_data["workflow_id"] = workflow_run_context.workflow_id
|
||
if "workflow_permanent_id" not in template_data:
|
||
template_data["workflow_permanent_id"] = workflow_run_context.workflow_permanent_id
|
||
if "workflow_run_id" not in template_data:
|
||
template_data["workflow_run_id"] = workflow_run_context.workflow_run_id
|
||
if "current_date" not in template_data:
|
||
template_data["current_date"] = datetime.now(timezone.utc).strftime(CURRENT_DATE_FORMAT)
|
||
if "browser_session_id" not in template_data:
|
||
template_data["browser_session_id"] = workflow_run_context.browser_session_id or ""
|
||
|
||
template_data["workflow_run_outputs"] = workflow_run_context.workflow_run_outputs
|
||
template_data["workflow_run_summary"] = workflow_run_context.build_workflow_run_summary()
|
||
|
||
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict":
|
||
if missing_variables := get_missing_variables(potential_template, template_data):
|
||
raise MissingJinjaVariables(
|
||
template=potential_template,
|
||
variables=missing_variables,
|
||
)
|
||
|
||
return template.render(template_data)
|
||
|
||
@classmethod
|
||
def get_subclasses(cls) -> tuple[type[Block], ...]:
|
||
return tuple(cls.__subclasses__())
|
||
|
||
@staticmethod
|
||
def get_workflow_run_context(workflow_run_id: str) -> WorkflowRunContext:
|
||
return app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
|
||
|
||
@staticmethod
|
||
def get_async_aws_client() -> AsyncAWSClient:
|
||
return app.WORKFLOW_CONTEXT_MANAGER.aws_client
|
||
|
||
@abc.abstractmethod
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
pass
|
||
|
||
async def _generate_workflow_run_block_description(
|
||
self, workflow_run_block_id: str, organization_id: str | None = None
|
||
) -> None:
|
||
description = None
|
||
try:
|
||
block_data = self.model_dump(
|
||
exclude={
|
||
"workflow_run_block_id",
|
||
"organization_id",
|
||
"task_id",
|
||
"workflow_run_id",
|
||
"parent_workflow_run_block_id",
|
||
"label",
|
||
"status",
|
||
"output",
|
||
"continue_on_failure",
|
||
"failure_reason",
|
||
"actions",
|
||
"created_at",
|
||
"modified_at",
|
||
},
|
||
exclude_none=True,
|
||
)
|
||
description_generation_prompt = prompt_engine.load_prompt(
|
||
"generate_workflow_run_block_description",
|
||
block=block_data,
|
||
)
|
||
json_response = await app.SECONDARY_LLM_API_HANDLER(
|
||
prompt=description_generation_prompt, prompt_name="generate-workflow-run-block-description"
|
||
)
|
||
description = json_response.get("summary")
|
||
LOG.info(
|
||
"Generated description for the workflow run block",
|
||
description=description,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
)
|
||
except Exception as e:
|
||
LOG.exception("Failed to generate description for the workflow run block", error=e)
|
||
|
||
if description:
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
description=description,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
@traced()
|
||
async def execute_safe(
|
||
self,
|
||
workflow_run_id: str,
|
||
parent_workflow_run_block_id: str | None = None,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_block_id = None
|
||
engine: RunEngine | None = None
|
||
try:
|
||
if isinstance(self, BaseTaskBlock):
|
||
engine = self.engine
|
||
|
||
workflow_run_block = await app.DATABASE.create_workflow_run_block(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
parent_workflow_run_block_id=parent_workflow_run_block_id,
|
||
label=self.label,
|
||
block_type=self.block_type,
|
||
continue_on_failure=self.continue_on_failure,
|
||
engine=engine,
|
||
)
|
||
workflow_run_block_id = workflow_run_block.workflow_run_block_id
|
||
|
||
# generate the description for the workflow run block asynchronously
|
||
asyncio.create_task(self._generate_workflow_run_block_description(workflow_run_block_id, organization_id))
|
||
|
||
# create a screenshot
|
||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
|
||
if not browser_state:
|
||
LOG.warning(
|
||
"No browser state found when creating workflow_run_block",
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
browser_session_id=browser_session_id,
|
||
block_label=self.label,
|
||
)
|
||
else:
|
||
try:
|
||
screenshot = await browser_state.take_fullpage_screenshot()
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to take screenshot before executing the block, ignoring the exception",
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
)
|
||
screenshot = None
|
||
if screenshot:
|
||
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
|
||
workflow_run_block=workflow_run_block,
|
||
artifact_type=ArtifactType.SCREENSHOT_LLM,
|
||
data=screenshot,
|
||
)
|
||
|
||
LOG.info(
|
||
"Executing block", workflow_run_id=workflow_run_id, block_label=self.label, block_type=self.block_type
|
||
)
|
||
return await self.execute(
|
||
workflow_run_id,
|
||
workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
**kwargs,
|
||
)
|
||
except Exception as e:
|
||
LOG.exception(
|
||
"Block execution failed",
|
||
workflow_run_id=workflow_run_id,
|
||
block_label=self.label,
|
||
block_type=self.block_type,
|
||
)
|
||
# Record output parameter value if it hasn't been recorded yet
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
if not workflow_run_context.has_value(self.output_parameter.key):
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id)
|
||
|
||
failure_reason = get_user_facing_exception_message(e)
|
||
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=failure_reason,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
@abc.abstractmethod
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
pass
|
||
|
||
|
||
class BaseTaskBlock(Block):
|
||
task_type: str = TaskType.general
|
||
url: str | None = None
|
||
title: str = ""
|
||
engine: RunEngine = RunEngine.skyvern_v1
|
||
complete_criterion: str | None = None
|
||
terminate_criterion: str | None = None
|
||
navigation_goal: str | None = None
|
||
data_extraction_goal: str | None = None
|
||
data_schema: dict[str, Any] | list | str | None = None
|
||
# error code to error description for the LLM
|
||
error_code_mapping: dict[str, str] | None = None
|
||
max_retries: int = 0
|
||
max_steps_per_run: int | None = None
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
complete_on_download: bool = False
|
||
download_suffix: str | None = None
|
||
totp_verification_url: str | None = None
|
||
totp_identifier: str | None = None
|
||
complete_verification: bool = True
|
||
include_action_history_in_verification: bool = False
|
||
download_timeout: float | None = None # minutes
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
parameters = self.parameters
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
if self.url and workflow_run_context.has_parameter(self.url):
|
||
if self.url not in [parameter.key for parameter in parameters]:
|
||
parameters.append(workflow_run_context.get_parameter(self.url))
|
||
|
||
return parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.title = self.format_block_parameter_template_from_workflow_run_context(self.title, workflow_run_context)
|
||
|
||
if self.url:
|
||
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
|
||
self.url = prepend_scheme_and_validate_url(self.url)
|
||
|
||
if self.totp_identifier:
|
||
self.totp_identifier = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.totp_identifier, workflow_run_context
|
||
)
|
||
|
||
if self.totp_verification_url:
|
||
self.totp_verification_url = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.totp_verification_url, workflow_run_context
|
||
)
|
||
self.totp_verification_url = prepend_scheme_and_validate_url(self.totp_verification_url)
|
||
|
||
if self.download_suffix:
|
||
self.download_suffix = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.download_suffix, workflow_run_context
|
||
)
|
||
# encode the suffix to prevent invalid path style
|
||
self.download_suffix = quote(string=self.download_suffix, safe="")
|
||
|
||
if self.navigation_goal:
|
||
self.navigation_goal = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.navigation_goal, workflow_run_context
|
||
)
|
||
|
||
if self.data_extraction_goal:
|
||
self.data_extraction_goal = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.data_extraction_goal, workflow_run_context
|
||
)
|
||
|
||
if isinstance(self.data_schema, str):
|
||
self.data_schema = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.data_schema, workflow_run_context
|
||
)
|
||
|
||
if self.complete_criterion:
|
||
self.complete_criterion = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.complete_criterion, workflow_run_context
|
||
)
|
||
|
||
if self.terminate_criterion:
|
||
self.terminate_criterion = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.terminate_criterion, workflow_run_context
|
||
)
|
||
|
||
@staticmethod
|
||
async def get_task_order(workflow_run_id: str, current_retry: int) -> tuple[int, int]:
|
||
"""
|
||
Returns the order and retry for the next task in the workflow run as a tuple.
|
||
"""
|
||
last_task_for_workflow_run = await app.DATABASE.get_last_task_for_workflow_run(workflow_run_id=workflow_run_id)
|
||
# If there is no previous task, the order will be 0 and the retry will be 0.
|
||
if last_task_for_workflow_run is None:
|
||
return 0, 0
|
||
# If there is a previous task but the current retry is 0, the order will be the order of the last task + 1
|
||
# and the retry will be 0.
|
||
order = last_task_for_workflow_run.order or 0
|
||
if current_retry == 0:
|
||
return order + 1, 0
|
||
# If there is a previous task and the current retry is not 0, the order will be the order of the last task
|
||
# and the retry will be the retry of the last task + 1. (There is a validation that makes sure the retry
|
||
# of the last task is equal to current_retry - 1) if it is not, we use last task retry + 1.
|
||
retry = last_task_for_workflow_run.retry or 0
|
||
if retry + 1 != current_retry:
|
||
LOG.error(
|
||
f"Last task for workflow run is retry number {last_task_for_workflow_run.retry}, "
|
||
f"but current retry is {current_retry}. Could be race condition. Using last task retry + 1",
|
||
workflow_run_id=workflow_run_id,
|
||
last_task_id=last_task_for_workflow_run.task_id,
|
||
last_task_retry=last_task_for_workflow_run.retry,
|
||
current_retry=current_retry,
|
||
)
|
||
|
||
return order, retry + 1
|
||
|
||
async def _handle_task_failure_with_error_detection(
|
||
self,
|
||
task: Task,
|
||
step: Step,
|
||
browser_state: BrowserState | None,
|
||
failure_reason: str,
|
||
organization_id: str,
|
||
) -> None:
|
||
"""
|
||
Handle task failure by updating the task status and detecting user-defined errors.
|
||
|
||
This helper method consolidates the error detection logic that was previously
|
||
duplicated across multiple exception handlers in the execute method.
|
||
"""
|
||
await app.DATABASE.update_task(
|
||
task.task_id,
|
||
status=TaskStatus.failed,
|
||
organization_id=organization_id,
|
||
failure_reason=failure_reason,
|
||
)
|
||
# Detect user-defined errors if error_code_mapping is provided
|
||
if self.error_code_mapping:
|
||
try:
|
||
detected_errors = await detect_user_defined_errors_for_task(
|
||
task=task,
|
||
step=step,
|
||
browser_state=browser_state,
|
||
failure_reason=failure_reason,
|
||
)
|
||
if detected_errors:
|
||
# Only pass new errors — update_task() appends to existing errors
|
||
new_errors = [error.model_dump() for error in detected_errors]
|
||
await app.DATABASE.update_task(
|
||
task_id=task.task_id,
|
||
organization_id=organization_id,
|
||
errors=new_errors,
|
||
)
|
||
except Exception:
|
||
LOG.exception(
|
||
"Failed to detect or store user-defined errors during task failure",
|
||
task_id=task.task_id,
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
current_retry = 0
|
||
# initial value for will_retry is True, so that the loop runs at least once
|
||
will_retry = True
|
||
current_running_task: Task | None = None
|
||
workflow_run = await app.WORKFLOW_SERVICE.get_workflow_run(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
# Get workflow from context if available, otherwise query database
|
||
workflow = workflow_run_context.workflow
|
||
if workflow is None:
|
||
workflow = await app.WORKFLOW_SERVICE.get_workflow_by_permanent_id(
|
||
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
||
)
|
||
# Cache the workflow back to context for future block executions
|
||
workflow_run_context.set_workflow(workflow)
|
||
# if the task url is parameterized, we need to get the value from the workflow run context
|
||
if self.url and workflow_run_context.has_parameter(self.url) and workflow_run_context.has_value(self.url):
|
||
task_url_parameter_value = workflow_run_context.get_value(self.url)
|
||
if task_url_parameter_value:
|
||
LOG.info(
|
||
"Task URL is parameterized, using parameter value",
|
||
task_url_parameter_value=task_url_parameter_value,
|
||
task_url_parameter_key=self.url,
|
||
)
|
||
self.url = task_url_parameter_value
|
||
|
||
if self.totp_identifier:
|
||
if workflow_run_context.has_parameter(self.totp_identifier) and workflow_run_context.has_value(
|
||
self.totp_identifier
|
||
):
|
||
totp_identifier_parameter_value = workflow_run_context.get_value(self.totp_identifier)
|
||
if totp_identifier_parameter_value:
|
||
self.totp_identifier = totp_identifier_parameter_value
|
||
else:
|
||
for parameter in self.get_all_parameters(workflow_run_id):
|
||
parameter_key = getattr(parameter, "key", None)
|
||
if not parameter_key:
|
||
continue
|
||
credential_totp_identifier = workflow_run_context.get_credential_totp_identifier(parameter_key)
|
||
if credential_totp_identifier:
|
||
self.totp_identifier = credential_totp_identifier
|
||
break
|
||
|
||
if self.download_suffix and workflow_run_context.has_parameter(self.download_suffix):
|
||
download_suffix_parameter_value = workflow_run_context.get_value(self.download_suffix)
|
||
if download_suffix_parameter_value:
|
||
LOG.info(
|
||
"Download prefix is parameterized, using parameter value",
|
||
download_suffix_parameter_value=download_suffix_parameter_value,
|
||
download_suffix_parameter_key=self.download_suffix,
|
||
)
|
||
self.download_suffix = download_suffix_parameter_value
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context=workflow_run_context)
|
||
except Exception as e:
|
||
failure_reason = f"Failed to format jinja template: {str(e)}"
|
||
await self.record_output_parameter_value(
|
||
workflow_run_context, workflow_run_id, {"failure_reason": failure_reason}
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=failure_reason,
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# TODO (kerem) we should always retry on terminated. We should make a distinction between retriable and
|
||
# non-retryable terminations
|
||
while will_retry:
|
||
task_order, task_retry = await self.get_task_order(workflow_run_id, current_retry)
|
||
is_first_task = task_order == 0
|
||
task, step = await app.agent.create_task_and_step_from_block(
|
||
task_block=self,
|
||
workflow=workflow,
|
||
workflow_run=workflow_run,
|
||
workflow_run_context=workflow_run_context,
|
||
task_order=task_order,
|
||
task_retry=task_retry,
|
||
)
|
||
workflow_run_block = await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
task_id=task.task_id,
|
||
organization_id=organization_id,
|
||
)
|
||
current_running_task = task
|
||
organization = await app.DATABASE.get_organization(organization_id=workflow_run.organization_id)
|
||
if not organization:
|
||
raise Exception(f"Organization is missing organization_id={workflow_run.organization_id}")
|
||
|
||
browser_state: BrowserState | None = None
|
||
if is_first_task:
|
||
# the first task block will create the browser state and do the navigation
|
||
try:
|
||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||
workflow_run=workflow_run,
|
||
url=self.url,
|
||
browser_session_id=browser_session_id,
|
||
browser_profile_id=workflow_run.browser_profile_id,
|
||
)
|
||
working_page = await browser_state.get_working_page()
|
||
if not working_page:
|
||
LOG.error(
|
||
"BrowserState has no page",
|
||
workflow_run_id=workflow_run.workflow_run_id,
|
||
)
|
||
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
|
||
if working_page.url == "about:blank" and self.url:
|
||
await browser_state.navigate_to_url(page=working_page, url=self.url)
|
||
|
||
# When a browser profile is loaded, wait for the page to fully settle
|
||
# so that cookie-based authentication can redirect or restore the session
|
||
# BEFORE the agent starts interacting with the page.
|
||
if workflow_run.browser_profile_id:
|
||
LOG.info(
|
||
"Browser profile loaded — waiting for page to settle before agent acts",
|
||
browser_profile_id=workflow_run.browser_profile_id,
|
||
workflow_run_id=workflow_run.workflow_run_id,
|
||
)
|
||
try:
|
||
await working_page.wait_for_load_state("networkidle", timeout=10000)
|
||
except Exception:
|
||
LOG.debug(
|
||
"networkidle timeout after browser profile load (non-fatal)",
|
||
workflow_run_id=workflow_run.workflow_run_id,
|
||
)
|
||
|
||
except Exception as e:
|
||
LOG.exception(
|
||
"Failed to get browser state for first task",
|
||
task_id=task.task_id,
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
await self._handle_task_failure_with_error_detection(
|
||
task=task,
|
||
step=step,
|
||
browser_state=browser_state,
|
||
failure_reason=str(e),
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
raise e
|
||
|
||
try:
|
||
# add screenshot artifact for the first task
|
||
screenshot = await browser_state.take_fullpage_screenshot()
|
||
if screenshot:
|
||
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
|
||
workflow_run_block=workflow_run_block,
|
||
artifact_type=ArtifactType.SCREENSHOT_LLM,
|
||
data=screenshot,
|
||
)
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to take screenshot for first task",
|
||
task_id=task.task_id,
|
||
workflow_run_id=workflow_run_id,
|
||
exc_info=True,
|
||
)
|
||
else:
|
||
# if not the first task block, need to navigate manually
|
||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id)
|
||
if browser_state is None:
|
||
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id)
|
||
|
||
working_page = await browser_state.get_working_page()
|
||
if not working_page:
|
||
LOG.error(
|
||
"BrowserState has no page",
|
||
workflow_run_id=workflow_run.workflow_run_id,
|
||
)
|
||
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
|
||
|
||
if self.url:
|
||
LOG.info(
|
||
"Navigating to page",
|
||
url=self.url,
|
||
workflow_run_id=workflow_run_id,
|
||
task_id=task.task_id,
|
||
workflow_id=workflow.workflow_id,
|
||
organization_id=workflow_run.organization_id,
|
||
step_id=step.step_id,
|
||
)
|
||
try:
|
||
await browser_state.navigate_to_url(page=working_page, url=self.url)
|
||
except Exception as e:
|
||
await self._handle_task_failure_with_error_detection(
|
||
task=task,
|
||
step=step,
|
||
browser_state=browser_state,
|
||
failure_reason=str(e),
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
raise e
|
||
|
||
try:
|
||
current_context = skyvern_context.ensure_context()
|
||
current_context.task_id = task.task_id
|
||
close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address
|
||
await app.agent.execute_step(
|
||
organization=organization,
|
||
task=task,
|
||
step=step,
|
||
task_block=self,
|
||
browser_session_id=browser_session_id,
|
||
close_browser_on_completion=close_browser_on_completion,
|
||
complete_verification=self.complete_verification,
|
||
engine=self.engine,
|
||
)
|
||
except Exception as e:
|
||
# Make sure the task is marked as failed in the database before raising the exception
|
||
await self._handle_task_failure_with_error_detection(
|
||
task=task,
|
||
step=step,
|
||
browser_state=browser_state,
|
||
failure_reason=str(e),
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
raise e
|
||
finally:
|
||
current_context.task_id = None
|
||
|
||
# Check task status
|
||
updated_task = await app.DATABASE.get_task(
|
||
task_id=task.task_id, organization_id=workflow_run.organization_id
|
||
)
|
||
if not updated_task:
|
||
raise TaskNotFound(task.task_id)
|
||
if not updated_task.status.is_final():
|
||
raise UnexpectedTaskStatus(task_id=updated_task.task_id, status=updated_task.status)
|
||
current_running_task = updated_task
|
||
|
||
block_status_mapping = {
|
||
TaskStatus.completed: BlockStatus.completed,
|
||
TaskStatus.terminated: BlockStatus.terminated,
|
||
TaskStatus.failed: BlockStatus.failed,
|
||
TaskStatus.canceled: BlockStatus.canceled,
|
||
TaskStatus.timed_out: BlockStatus.timed_out,
|
||
}
|
||
if updated_task.status == TaskStatus.completed or updated_task.status == TaskStatus.terminated:
|
||
LOG.info(
|
||
"Task completed",
|
||
task_id=updated_task.task_id,
|
||
task_status=updated_task.status,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_id=workflow.workflow_id,
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
success = updated_task.status == TaskStatus.completed
|
||
|
||
downloaded_files: list[FileInfo] = []
|
||
try:
|
||
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
|
||
downloaded_files = await app.STORAGE.get_downloaded_files(
|
||
organization_id=workflow_run.organization_id,
|
||
run_id=current_context.run_id
|
||
if current_context and current_context.run_id
|
||
else workflow_run_id or updated_task.task_id,
|
||
)
|
||
except asyncio.TimeoutError:
|
||
LOG.warning("Timeout getting downloaded files", task_id=updated_task.task_id)
|
||
|
||
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
|
||
organization_id=workflow_run.organization_id,
|
||
task_id=updated_task.task_id,
|
||
)
|
||
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
|
||
task_output = TaskOutput.from_task(
|
||
updated_task,
|
||
downloaded_files,
|
||
task_screenshot_artifact_ids=[a.artifact_id for a in task_screenshot_artifacts],
|
||
workflow_screenshot_artifact_ids=[a.artifact_id for a in workflow_screenshot_artifacts],
|
||
)
|
||
output_parameter_value = task_output.model_dump()
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_parameter_value)
|
||
return await self.build_block_result(
|
||
success=success,
|
||
failure_reason=(
|
||
updated_task.failure_reason
|
||
if success
|
||
else (
|
||
updated_task.failure_reason
|
||
or f"Task {updated_task.task_id} finished with status {updated_task.status}"
|
||
)
|
||
),
|
||
output_parameter_value=output_parameter_value,
|
||
status=block_status_mapping[updated_task.status],
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
elif updated_task.status == TaskStatus.canceled:
|
||
LOG.info(
|
||
"Task canceled, cancelling block",
|
||
task_id=updated_task.task_id,
|
||
task_status=updated_task.status,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_id=workflow.workflow_id,
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} was canceled",
|
||
output_parameter_value=None,
|
||
status=block_status_mapping[updated_task.status],
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
elif updated_task.status == TaskStatus.timed_out:
|
||
LOG.info(
|
||
"Task timed out, making the block time out",
|
||
task_id=updated_task.task_id,
|
||
task_status=updated_task.status,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_id=workflow.workflow_id,
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} timed out",
|
||
output_parameter_value=None,
|
||
status=block_status_mapping[updated_task.status],
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
else:
|
||
current_retry += 1
|
||
will_retry = current_retry <= self.max_retries
|
||
retry_message = f", retrying task {current_retry}/{self.max_retries}" if will_retry else ""
|
||
downloaded_files = []
|
||
try:
|
||
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
|
||
downloaded_files = await app.STORAGE.get_downloaded_files(
|
||
organization_id=workflow_run.organization_id,
|
||
run_id=current_context.run_id
|
||
if current_context and current_context.run_id
|
||
else workflow_run_id or updated_task.task_id,
|
||
)
|
||
|
||
except asyncio.TimeoutError:
|
||
LOG.warning("Timeout getting downloaded files", task_id=updated_task.task_id)
|
||
|
||
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
|
||
organization_id=workflow_run.organization_id,
|
||
task_id=updated_task.task_id,
|
||
)
|
||
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=workflow_run.organization_id,
|
||
)
|
||
|
||
task_output = TaskOutput.from_task(
|
||
updated_task,
|
||
downloaded_files,
|
||
task_screenshot_artifact_ids=[a.artifact_id for a in task_screenshot_artifacts],
|
||
workflow_screenshot_artifact_ids=[a.artifact_id for a in workflow_screenshot_artifacts],
|
||
)
|
||
LOG.warning(
|
||
f"Task failed with status {updated_task.status}{retry_message}",
|
||
task_id=updated_task.task_id,
|
||
task_status=updated_task.status,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_id=workflow.workflow_id,
|
||
organization_id=workflow_run.organization_id,
|
||
current_retry=current_retry,
|
||
max_retries=self.max_retries,
|
||
task_output=task_output.model_dump_json(),
|
||
)
|
||
if not will_retry:
|
||
output_parameter_value = task_output.model_dump()
|
||
await self.record_output_parameter_value(
|
||
workflow_run_context, workflow_run_id, output_parameter_value
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=(
|
||
updated_task.failure_reason
|
||
or f"Task {updated_task.task_id} failed with status {updated_task.status}"
|
||
),
|
||
output_parameter_value=output_parameter_value,
|
||
status=block_status_mapping[updated_task.status],
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
status=BlockStatus.failed,
|
||
failure_reason=(
|
||
(current_running_task.failure_reason or f"Task {current_running_task.task_id} failed")
|
||
if current_running_task
|
||
else "Task failed (no task reference available)"
|
||
),
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class TaskBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.TASK] = BlockType.TASK # type: ignore
|
||
|
||
|
||
class LoopBlockExecutedResult(BaseModel):
|
||
outputs_with_loop_values: list[list[dict[str, Any]]]
|
||
block_outputs: list[BlockResult]
|
||
last_block: BlockTypeVar | None
|
||
|
||
def is_canceled(self) -> bool:
|
||
return len(self.block_outputs) > 0 and self.block_outputs[-1].status == BlockStatus.canceled
|
||
|
||
def is_completed(self) -> bool:
|
||
if len(self.block_outputs) == 0:
|
||
return False
|
||
|
||
if self.last_block is None:
|
||
return False
|
||
|
||
if self.is_canceled():
|
||
return False
|
||
|
||
last_ouput = self.block_outputs[-1]
|
||
if last_ouput.success:
|
||
return True
|
||
|
||
if self.last_block.continue_on_failure:
|
||
return True
|
||
|
||
return False
|
||
|
||
def is_terminated(self) -> bool:
|
||
return len(self.block_outputs) > 0 and self.block_outputs[-1].status == BlockStatus.terminated
|
||
|
||
def get_failure_reason(self) -> str | None:
|
||
if self.is_completed():
|
||
return None
|
||
|
||
if self.is_canceled():
|
||
return f"Block({self.last_block.label if self.last_block else ''}) with type {self.last_block.block_type if self.last_block else ''} was canceled, canceling for loop"
|
||
|
||
return self.block_outputs[-1].failure_reason if len(self.block_outputs) > 0 else "No block has been executed"
|
||
|
||
|
||
def compute_conditional_scopes(
|
||
label_to_block: dict[str, Any],
|
||
default_next_map: dict[str, str | None],
|
||
) -> dict[str, str]:
|
||
"""Map each block label to the conditional block label whose scope it belongs to.
|
||
|
||
For each conditional block, trace each branch's chain of blocks via
|
||
``default_next_map``. Labels that appear in **all** branch chains are
|
||
considered merge-point blocks (i.e. they come *after* the conditional
|
||
reconverges) and are **not** scoped. Labels that appear in fewer chains
|
||
than the total number of branches **are** inside the conditional.
|
||
|
||
Inner conditionals are themselves scoped to an outer conditional, but
|
||
their *own* branch targets are handled by a recursive application of
|
||
the same logic (inner wins via the ``if lbl not in scopes`` guard).
|
||
"""
|
||
scopes: dict[str, str] = {}
|
||
|
||
conditional_labels = [lbl for lbl, blk in label_to_block.items() if blk.block_type == BlockType.CONDITIONAL]
|
||
|
||
for cond_label in conditional_labels:
|
||
cond_block = label_to_block[cond_label]
|
||
branch_targets: list[str | None] = [branch.next_block_label for branch in cond_block.ordered_branches]
|
||
# Deduplicate while preserving order – two branches may point to the same target
|
||
seen_targets: set[str | None] = set()
|
||
unique_targets: list[str | None] = []
|
||
for t in branch_targets:
|
||
if t not in seen_targets:
|
||
seen_targets.add(t)
|
||
unique_targets.append(t)
|
||
|
||
num_branches = len(unique_targets)
|
||
if num_branches == 0:
|
||
continue
|
||
|
||
# For each unique branch target, trace the chain via default_next_map.
|
||
# Stop at other conditional blocks (they handle their own branches).
|
||
chain_sets: list[list[str]] = []
|
||
for target in unique_targets:
|
||
chain: list[str] = []
|
||
cur = target
|
||
while cur and cur in label_to_block:
|
||
chain.append(cur)
|
||
# Stop tracing when we hit another conditional – it owns its own sub-tree
|
||
if label_to_block[cur].block_type == BlockType.CONDITIONAL:
|
||
break
|
||
cur = default_next_map.get(cur)
|
||
chain_sets.append(chain)
|
||
|
||
# Count how many branch chains each label appears in
|
||
label_count: dict[str, int] = {}
|
||
for chain in chain_sets:
|
||
for lbl in chain:
|
||
label_count[lbl] = label_count.get(lbl, 0) + 1
|
||
|
||
# Labels appearing in ALL branches are merge points (after the conditional).
|
||
# Labels appearing in fewer branches are inside the conditional.
|
||
for chain in chain_sets:
|
||
for lbl in chain:
|
||
if label_count[lbl] >= num_branches:
|
||
# This is a merge point – stop scoping further along this chain
|
||
break
|
||
if lbl not in scopes:
|
||
scopes[lbl] = cond_label
|
||
|
||
return scopes
|
||
|
||
|
||
class ForLoopBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.FOR_LOOP] = BlockType.FOR_LOOP # type: ignore
|
||
|
||
loop_blocks: list[BlockTypeVar]
|
||
loop_over: PARAMETER_TYPE | None = None
|
||
loop_variable_reference: str | None = None
|
||
complete_if_empty: bool = False
|
||
# Note: intentionally excludes `list` (unlike BaseTaskBlock.data_schema) because a list schema
|
||
# does not describe the shape of individual loop items -- only dict schemas are meaningful here.
|
||
data_schema: dict[str, Any] | str | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
parameters = set()
|
||
if self.loop_over is not None:
|
||
parameters.add(self.loop_over)
|
||
|
||
for loop_block in self.loop_blocks:
|
||
for parameter in loop_block.get_all_parameters(workflow_run_id):
|
||
parameters.add(parameter)
|
||
return list(parameters)
|
||
|
||
def get_loop_block_context_parameters(self, workflow_run_id: str, loop_data: Any) -> list[ContextParameter]:
|
||
context_parameters = []
|
||
|
||
for loop_block in self.loop_blocks:
|
||
# todo: handle the case where the loop_block is a ForLoopBlock
|
||
|
||
all_parameters = loop_block.get_all_parameters(workflow_run_id)
|
||
for parameter in all_parameters:
|
||
if isinstance(parameter, ContextParameter):
|
||
context_parameters.append(parameter)
|
||
|
||
if self.loop_over is None:
|
||
return context_parameters
|
||
|
||
for context_parameter in context_parameters:
|
||
if context_parameter.source.key != self.loop_over.key:
|
||
continue
|
||
# If the loop_data is a dict, we need to check if the key exists in the loop_data
|
||
if isinstance(loop_data, dict):
|
||
if context_parameter.key in loop_data:
|
||
context_parameter.value = loop_data[context_parameter.key]
|
||
else:
|
||
raise ContextParameterValueNotFound(
|
||
parameter_key=context_parameter.key,
|
||
existing_keys=list(loop_data.keys()),
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
else:
|
||
# If the loop_data is a list, we can directly assign the loop_data to the context_parameter value
|
||
context_parameter.value = loop_data
|
||
|
||
return context_parameters
|
||
|
||
async def get_values_from_loop_variable_reference(
|
||
self,
|
||
workflow_run_context: WorkflowRunContext,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
) -> list[Any]:
|
||
parameter_value = None
|
||
if self.loop_variable_reference:
|
||
LOG.debug("Processing loop variable reference", loop_variable_reference=self.loop_variable_reference)
|
||
|
||
# Check if this looks like a parameter path (contains dots and/or _output)
|
||
is_likely_parameter_path = "extracted_information." in self.loop_variable_reference
|
||
|
||
# Try parsing as Jinja template
|
||
parameter_value = self.try_parse_jinja_template(workflow_run_context)
|
||
|
||
if parameter_value is None and not is_likely_parameter_path:
|
||
try:
|
||
# Create and execute extraction block using the current block's workflow_id
|
||
extraction_block = self._create_initial_extraction_block(
|
||
self.loop_variable_reference, workflow_run_context=workflow_run_context
|
||
)
|
||
|
||
LOG.info(
|
||
"Processing natural language loop input",
|
||
prompt=self.loop_variable_reference,
|
||
extraction_goal=extraction_block.data_extraction_goal,
|
||
)
|
||
|
||
extraction_result = await extraction_block.execute(
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not extraction_result.success:
|
||
LOG.error("Extraction block failed", failure_reason=extraction_result.failure_reason)
|
||
raise ValueError(
|
||
f"Extraction block failed: "
|
||
f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}"
|
||
)
|
||
|
||
LOG.debug("Extraction block succeeded", output=extraction_result.output_parameter_value)
|
||
|
||
# Store the extraction result in the workflow context
|
||
await extraction_block.record_output_parameter_value(
|
||
workflow_run_context=workflow_run_context,
|
||
workflow_run_id=workflow_run_id,
|
||
value=extraction_result.output_parameter_value,
|
||
)
|
||
|
||
# Get the extracted information
|
||
if not isinstance(extraction_result.output_parameter_value, dict):
|
||
LOG.error(
|
||
"Extraction result output_parameter_value is not a dict",
|
||
output_parameter_value=extraction_result.output_parameter_value,
|
||
)
|
||
raise ValueError("Extraction result output_parameter_value is not a dictionary")
|
||
|
||
if "extracted_information" not in extraction_result.output_parameter_value:
|
||
LOG.error(
|
||
"Extraction result missing extracted_information key",
|
||
output_parameter_value=extraction_result.output_parameter_value,
|
||
)
|
||
raise ValueError("Extraction result missing extracted_information key")
|
||
|
||
extracted_info = extraction_result.output_parameter_value["extracted_information"]
|
||
|
||
# Handle different possible structures of extracted_info
|
||
if isinstance(extracted_info, list):
|
||
# If it's a list, take the first element
|
||
if len(extracted_info) > 0:
|
||
extracted_info = extracted_info[0]
|
||
else:
|
||
LOG.error("Extracted information list is empty")
|
||
raise ValueError("Extracted information list is empty")
|
||
|
||
# At this point, extracted_info should be a dict
|
||
if not isinstance(extracted_info, dict):
|
||
LOG.error("Invalid extraction result structure - not a dict", extracted_info=extracted_info)
|
||
raise ValueError("Extraction result is not a dictionary")
|
||
|
||
# Extract the loop values
|
||
loop_values = extracted_info.get("loop_values", [])
|
||
|
||
if not loop_values:
|
||
LOG.error("No loop values found in extraction result")
|
||
raise ValueError("No loop values found in extraction result")
|
||
|
||
LOG.info("Extracted loop values", count=len(loop_values), values=loop_values)
|
||
|
||
# Update the loop variable reference to point to the extracted loop values
|
||
# We'll use a temporary key that we can reference
|
||
temp_key = f"extracted_loop_values_{generate_random_string()}"
|
||
workflow_run_context.set_value(temp_key, loop_values)
|
||
self.loop_variable_reference = temp_key
|
||
|
||
# Now try parsing again with the updated reference
|
||
parameter_value = self.try_parse_jinja_template(workflow_run_context)
|
||
|
||
except Exception as e:
|
||
LOG.error("Failed to process natural language loop input", error=str(e))
|
||
raise FailedToFormatJinjaStyleParameter(self.loop_variable_reference, str(e))
|
||
|
||
if parameter_value is None:
|
||
# Fall back to the original Jinja template approach
|
||
value_template = f"{{{{ {self.loop_variable_reference.strip(' {}')} | tojson }}}}"
|
||
try:
|
||
value_json = self.format_block_parameter_template_from_workflow_run_context(
|
||
value_template, workflow_run_context
|
||
)
|
||
except Exception as e:
|
||
raise FailedToFormatJinjaStyleParameter(value_template, str(e))
|
||
parameter_value = json.loads(value_json)
|
||
|
||
if isinstance(parameter_value, list):
|
||
return parameter_value
|
||
else:
|
||
return [parameter_value]
|
||
|
||
async def get_loop_over_parameter_values(
|
||
self,
|
||
workflow_run_context: WorkflowRunContext,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
) -> list[Any]:
|
||
# parse the value from self.loop_variable_reference and then from self.loop_over
|
||
if self.loop_variable_reference:
|
||
return await self.get_values_from_loop_variable_reference(
|
||
workflow_run_context,
|
||
workflow_run_id,
|
||
workflow_run_block_id,
|
||
organization_id,
|
||
)
|
||
elif self.loop_over is not None:
|
||
if isinstance(self.loop_over, WorkflowParameter):
|
||
parameter_value = workflow_run_context.get_value(self.loop_over.key)
|
||
elif isinstance(self.loop_over, OutputParameter):
|
||
# If the output parameter is for a TaskBlock, it will be a TaskOutput object. We need to extract the
|
||
# value from the TaskOutput object's extracted_information field.
|
||
output_parameter_value = workflow_run_context.get_value(self.loop_over.key)
|
||
if isinstance(output_parameter_value, dict) and "extracted_information" in output_parameter_value:
|
||
parameter_value = output_parameter_value["extracted_information"]
|
||
else:
|
||
parameter_value = output_parameter_value
|
||
elif isinstance(self.loop_over, ContextParameter):
|
||
parameter_value = self.loop_over.value
|
||
if not parameter_value:
|
||
source_parameter_value = workflow_run_context.get_value(self.loop_over.source.key)
|
||
if isinstance(source_parameter_value, dict):
|
||
if "extracted_information" in source_parameter_value:
|
||
parameter_value = source_parameter_value["extracted_information"].get(self.loop_over.key)
|
||
else:
|
||
parameter_value = source_parameter_value.get(self.loop_over.key)
|
||
else:
|
||
raise ValueError("ContextParameter source value should be a dict")
|
||
else:
|
||
raise NotImplementedError()
|
||
|
||
else:
|
||
if self.complete_if_empty:
|
||
return []
|
||
else:
|
||
raise NoIterableValueFound()
|
||
|
||
if isinstance(parameter_value, list):
|
||
return parameter_value
|
||
else:
|
||
# TODO (kerem): Should we raise an error here?
|
||
return [parameter_value]
|
||
|
||
def try_parse_jinja_template(self, workflow_run_context: WorkflowRunContext) -> Any | None:
|
||
"""Try to parse the loop variable reference as a Jinja template."""
|
||
try:
|
||
# Try the exact reference first
|
||
try:
|
||
if self.loop_variable_reference is None:
|
||
return None
|
||
value_template = f"{{{{ {self.loop_variable_reference.strip(' {}')} | tojson }}}}"
|
||
value_json = self.format_block_parameter_template_from_workflow_run_context(
|
||
value_template, workflow_run_context
|
||
)
|
||
parameter_value = json.loads(value_json)
|
||
if parameter_value is not None:
|
||
return parameter_value
|
||
except Exception:
|
||
pass
|
||
|
||
# If that fails, try common access patterns for extraction results
|
||
if self.loop_variable_reference is None:
|
||
return None
|
||
access_patterns = [
|
||
f"{self.loop_variable_reference}.extracted_information",
|
||
f"{self.loop_variable_reference}.extracted_information.results",
|
||
f"{self.loop_variable_reference}.results",
|
||
]
|
||
|
||
for pattern in access_patterns:
|
||
try:
|
||
value_template = f"{{{{ {pattern.strip(' {}')} | tojson }}}}"
|
||
value_json = self.format_block_parameter_template_from_workflow_run_context(
|
||
value_template, workflow_run_context
|
||
)
|
||
parameter_value = json.loads(value_json)
|
||
if parameter_value is not None:
|
||
return parameter_value
|
||
except Exception:
|
||
continue
|
||
|
||
return None
|
||
except Exception:
|
||
return None
|
||
|
||
def _create_initial_extraction_block(
|
||
self,
|
||
natural_language_prompt: str,
|
||
workflow_run_context: WorkflowRunContext | None = None,
|
||
) -> ExtractionBlock:
|
||
"""Create an extraction block to process natural language input."""
|
||
|
||
# Determine the items schema for loop_values
|
||
items_schema: dict[str, Any] | None = None
|
||
if self.data_schema is not None:
|
||
if isinstance(self.data_schema, dict):
|
||
items_schema = self.data_schema
|
||
elif isinstance(self.data_schema, str):
|
||
# Interpolate Jinja templates before parsing, matching how BaseTaskBlock.setup_block_v2
|
||
# handles data_schema strings (see line 652-654)
|
||
schema_str = self.data_schema
|
||
if workflow_run_context is not None:
|
||
schema_str = self.format_block_parameter_template_from_workflow_run_context(
|
||
schema_str, workflow_run_context
|
||
)
|
||
try:
|
||
parsed = json.loads(schema_str)
|
||
if isinstance(parsed, dict):
|
||
items_schema = parsed
|
||
else:
|
||
LOG.warning(
|
||
"Parsed data_schema is not a dict, falling back to default string schema",
|
||
block_label=self.label,
|
||
data_schema=self.data_schema,
|
||
)
|
||
except (json.JSONDecodeError, TypeError):
|
||
LOG.warning(
|
||
"Failed to parse data_schema string, falling back to default string schema",
|
||
block_label=self.label,
|
||
data_schema=self.data_schema,
|
||
)
|
||
|
||
if items_schema is not None:
|
||
# User provided a custom schema — each loop iteration will produce a structured object
|
||
data_schema: dict[str, Any] = {
|
||
"type": "object",
|
||
"properties": {
|
||
"loop_values": {
|
||
"type": "array",
|
||
"description": "Array of structured values to iterate over, matching the provided schema.",
|
||
"items": items_schema,
|
||
}
|
||
},
|
||
}
|
||
else:
|
||
# Default: extract simple string array
|
||
data_schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"loop_values": {
|
||
"type": "array",
|
||
"description": "Array of values to iterate over. Each value should be the primary data needed for the loop blocks.",
|
||
"items": {
|
||
"type": "string",
|
||
"description": "The primary value to be used in the loop iteration (e.g., URL, text, identifier, etc.)",
|
||
},
|
||
}
|
||
},
|
||
}
|
||
|
||
# Create extraction goal that includes the natural language prompt
|
||
extraction_goal = prompt_engine.load_prompt(
|
||
"extraction_prompt_for_nat_language_loops", natural_language_prompt=natural_language_prompt
|
||
)
|
||
|
||
# Create a temporary output parameter using the current block's workflow_id
|
||
|
||
output_param = OutputParameter(
|
||
output_parameter_id=str(uuid.uuid4()),
|
||
key=f"natural_lang_extraction_{generate_random_string()}",
|
||
workflow_id=self.output_parameter.workflow_id,
|
||
created_at=datetime.now(),
|
||
modified_at=datetime.now(),
|
||
parameter_type=ParameterType.OUTPUT,
|
||
description="Natural language extraction result",
|
||
)
|
||
|
||
return ExtractionBlock(
|
||
label=f"natural_lang_extraction_{generate_random_string()}",
|
||
data_extraction_goal=extraction_goal,
|
||
data_schema=data_schema,
|
||
output_parameter=output_param,
|
||
)
|
||
|
||
def _build_loop_graph(
|
||
self, blocks: list[BlockTypeVar]
|
||
) -> tuple[str, dict[str, BlockTypeVar], dict[str, str | None]]:
|
||
label_to_block: dict[str, BlockTypeVar] = {}
|
||
default_next_map: dict[str, str | None] = {}
|
||
|
||
for block in blocks:
|
||
if block.label in label_to_block:
|
||
raise InvalidWorkflowDefinition(f"Duplicate block label detected in loop: {block.label}")
|
||
label_to_block[block.label] = block
|
||
default_next_map[block.label] = block.next_block_label
|
||
|
||
has_conditional_blocks = any(block.block_type == BlockType.CONDITIONAL for block in blocks)
|
||
if not has_conditional_blocks:
|
||
for idx, block in enumerate(blocks[:-1]):
|
||
if default_next_map.get(block.label) is None:
|
||
default_next_map[block.label] = blocks[idx + 1].label
|
||
|
||
adjacency: dict[str, set[str]] = {label: set() for label in label_to_block}
|
||
incoming: dict[str, int] = {label: 0 for label in label_to_block}
|
||
|
||
def _add_edge(source: str, target: str | None) -> None:
|
||
if not target:
|
||
return
|
||
if target not in label_to_block:
|
||
raise InvalidWorkflowDefinition(
|
||
f"Block {source} references unknown next_block_label {target} inside loop {self.label}"
|
||
)
|
||
# Allow multiple branches of a conditional to point to the same target
|
||
# without double-counting the incoming edge.
|
||
if target not in adjacency[source]:
|
||
adjacency[source].add(target)
|
||
incoming[target] += 1
|
||
|
||
for label, block in label_to_block.items():
|
||
if block.block_type == BlockType.CONDITIONAL:
|
||
for branch in block.ordered_branches:
|
||
_add_edge(label, branch.next_block_label)
|
||
else:
|
||
_add_edge(label, default_next_map.get(label))
|
||
|
||
roots = [label for label, count in incoming.items() if count == 0]
|
||
if not roots:
|
||
raise InvalidWorkflowDefinition(f"No entry block found for loop {self.label}")
|
||
if len(roots) > 1:
|
||
raise InvalidWorkflowDefinition(
|
||
f"Multiple entry blocks detected in loop {self.label} ({', '.join(sorted(roots))}); only one entry block is supported."
|
||
)
|
||
|
||
queue: deque[str] = deque([roots[0]])
|
||
visited_count = 0
|
||
in_degree = dict(incoming)
|
||
while queue:
|
||
node = queue.popleft()
|
||
visited_count += 1
|
||
for neighbor in adjacency[node]:
|
||
in_degree[neighbor] -= 1
|
||
if in_degree[neighbor] == 0:
|
||
queue.append(neighbor)
|
||
|
||
if visited_count != len(label_to_block):
|
||
raise InvalidWorkflowDefinition(f"Loop {self.label} contains a cycle; DAG traversal is required.")
|
||
|
||
return roots[0], label_to_block, default_next_map
|
||
|
||
async def execute_loop_helper(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
workflow_run_context: WorkflowRunContext,
|
||
loop_over_values: list[Any],
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
) -> LoopBlockExecutedResult:
|
||
outputs_with_loop_values: list[list[dict[str, Any]]] = []
|
||
block_outputs: list[BlockResult] = []
|
||
current_block: BlockTypeVar | None = None
|
||
|
||
start_label, label_to_block, default_next_map = self._build_loop_graph(self.loop_blocks)
|
||
conditional_scopes = compute_conditional_scopes(label_to_block, default_next_map)
|
||
|
||
for loop_idx, loop_over_value in enumerate(loop_over_values):
|
||
# Check max_iterations limit
|
||
if loop_idx >= DEFAULT_MAX_LOOP_ITERATIONS:
|
||
LOG.info(
|
||
f"ForLoopBlock: Reached max_iterations limit ({DEFAULT_MAX_LOOP_ITERATIONS}), stopping loop",
|
||
workflow_run_id=workflow_run_id,
|
||
loop_idx=loop_idx,
|
||
max_iterations=DEFAULT_MAX_LOOP_ITERATIONS,
|
||
)
|
||
failure_block_result = await self.build_block_result(
|
||
success=False,
|
||
status=BlockStatus.failed,
|
||
failure_reason=f"Reached max_loop_iterations limit of {DEFAULT_MAX_LOOP_ITERATIONS}",
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
block_outputs.append(failure_block_result)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
LOG.info("Starting loop iteration", loop_idx=loop_idx, loop_over_value=loop_over_value)
|
||
# context parameter has been deprecated. However, it's still used by task v2 - we should migrate away from it.
|
||
context_parameters_with_value = self.get_loop_block_context_parameters(workflow_run_id, loop_over_value)
|
||
for context_parameter in context_parameters_with_value:
|
||
workflow_run_context.set_value(context_parameter.key, context_parameter.value)
|
||
|
||
each_loop_output_values: list[dict[str, Any]] = []
|
||
|
||
iteration_step_count = 0
|
||
LOG.info(
|
||
f"ForLoopBlock: Starting iteration {loop_idx} with max_steps_per_iteration={DEFAULT_MAX_STEPS_PER_ITERATION}",
|
||
workflow_run_id=workflow_run_id,
|
||
loop_idx=loop_idx,
|
||
max_steps_per_iteration=DEFAULT_MAX_STEPS_PER_ITERATION,
|
||
)
|
||
|
||
block_idx = 0
|
||
current_label: str | None = start_label
|
||
conditional_wrb_ids: dict[str, str] = {}
|
||
while current_label:
|
||
loop_block = label_to_block.get(current_label)
|
||
if not loop_block:
|
||
LOG.error(
|
||
"Unable to find loop block with label in loop graph",
|
||
workflow_run_id=workflow_run_id,
|
||
loop_label=self.label,
|
||
current_label=current_label,
|
||
)
|
||
failure_block_result = await self.build_block_result(
|
||
success=False,
|
||
status=BlockStatus.failed,
|
||
failure_reason=f"Unable to find block with label {current_label} inside loop {self.label}",
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
block_outputs.append(failure_block_result)
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
|
||
metadata: BlockMetadata = {
|
||
"current_index": loop_idx,
|
||
"current_value": loop_over_value,
|
||
"current_item": loop_over_value,
|
||
}
|
||
workflow_run_context.update_block_metadata(self.label, metadata)
|
||
workflow_run_context.update_block_metadata(loop_block.label, metadata)
|
||
|
||
original_loop_block = loop_block
|
||
loop_block = loop_block.model_copy(deep=True)
|
||
current_block = loop_block
|
||
|
||
# Determine the parent for timeline nesting: if this block is
|
||
# inside a conditional's scope, parent it to that conditional's
|
||
# workflow_run_block rather than the loop's.
|
||
parent_wrb_id = workflow_run_block_id
|
||
if current_label in conditional_scopes:
|
||
cond_label = conditional_scopes[current_label]
|
||
if cond_label in conditional_wrb_ids:
|
||
parent_wrb_id = conditional_wrb_ids[cond_label]
|
||
|
||
block_output = await loop_block.execute_safe(
|
||
workflow_run_id=workflow_run_id,
|
||
parent_workflow_run_block_id=parent_wrb_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
|
||
# Track conditional workflow_run_block_ids so branch targets
|
||
# can be parented to them.
|
||
if loop_block.block_type == BlockType.CONDITIONAL and block_output.workflow_run_block_id:
|
||
conditional_wrb_ids[current_label] = block_output.workflow_run_block_id
|
||
|
||
output_value = (
|
||
workflow_run_context.get_value(block_output.output_parameter.key)
|
||
if workflow_run_context.has_value(block_output.output_parameter.key)
|
||
else None
|
||
)
|
||
|
||
# Log the output value for debugging
|
||
if block_output.output_parameter.key.endswith("_output"):
|
||
LOG.debug("Block output", block_type=loop_block.block_type, output_value=output_value)
|
||
|
||
# Log URL information for goto_url blocks
|
||
if loop_block.block_type == BlockType.GOTO_URL:
|
||
LOG.info("Goto URL block executed", url=loop_block.url, loop_idx=loop_idx)
|
||
each_loop_output_values.append(
|
||
{
|
||
"loop_value": loop_over_value,
|
||
"output_parameter": block_output.output_parameter,
|
||
"output_value": output_value,
|
||
}
|
||
)
|
||
try:
|
||
if block_output.workflow_run_block_id:
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=block_output.workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
current_value=str(loop_over_value),
|
||
current_index=loop_idx,
|
||
)
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to update workflow run block",
|
||
workflow_run_block_id=block_output.workflow_run_block_id,
|
||
loop_over_value=loop_over_value,
|
||
loop_idx=loop_idx,
|
||
)
|
||
loop_block = original_loop_block
|
||
block_outputs.append(block_output)
|
||
|
||
# Check max_steps_per_iteration limit after each block execution
|
||
iteration_step_count += 1 # Count each block execution as a step
|
||
if iteration_step_count >= DEFAULT_MAX_STEPS_PER_ITERATION:
|
||
LOG.info(
|
||
f"ForLoopBlock: Reached max_steps_per_iteration limit ({DEFAULT_MAX_STEPS_PER_ITERATION}) in iteration {loop_idx}, stopping iteration",
|
||
workflow_run_id=workflow_run_id,
|
||
loop_idx=loop_idx,
|
||
max_steps_per_iteration=DEFAULT_MAX_STEPS_PER_ITERATION,
|
||
iteration_step_count=iteration_step_count,
|
||
)
|
||
# Create a failure block result for this iteration
|
||
failure_block_result = await self.build_block_result(
|
||
success=False,
|
||
status=BlockStatus.failed,
|
||
failure_reason=f"Reached max_steps_per_iteration limit of {DEFAULT_MAX_STEPS_PER_ITERATION}",
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
block_outputs.append(failure_block_result)
|
||
# If next_loop_on_failure is False, stop the entire loop
|
||
if not self.next_loop_on_failure:
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
# If next_loop_on_failure is True, break out of the block loop for this iteration
|
||
break
|
||
|
||
if block_output.status == BlockStatus.canceled:
|
||
LOG.info(
|
||
f"ForLoopBlock: Block with type {loop_block.block_type} at index {block_idx} during loop {loop_idx} was canceled for workflow run {workflow_run_id}, canceling for loop",
|
||
block_type=loop_block.block_type,
|
||
workflow_run_id=workflow_run_id,
|
||
block_idx=block_idx,
|
||
block_result=block_outputs,
|
||
)
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
|
||
if (
|
||
not block_output.success
|
||
and not loop_block.continue_on_failure
|
||
and not loop_block.next_loop_on_failure
|
||
and not self.next_loop_on_failure
|
||
):
|
||
LOG.info(
|
||
f"ForLoopBlock: Encountered a failure processing block {block_idx} during loop {loop_idx}, terminating early",
|
||
block_outputs=block_outputs,
|
||
loop_idx=loop_idx,
|
||
block_idx=block_idx,
|
||
loop_over_value=loop_over_value,
|
||
loop_block_continue_on_failure=loop_block.continue_on_failure,
|
||
failure_reason=block_output.failure_reason,
|
||
next_loop_on_failure=loop_block.next_loop_on_failure or self.next_loop_on_failure,
|
||
)
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
|
||
if block_output.success or loop_block.continue_on_failure:
|
||
next_label: str | None = None
|
||
if loop_block.block_type == BlockType.CONDITIONAL:
|
||
branch_metadata = (
|
||
block_output.output_parameter_value
|
||
if isinstance(block_output.output_parameter_value, dict)
|
||
else None
|
||
)
|
||
next_label = (branch_metadata or {}).get("next_block_label")
|
||
else:
|
||
next_label = default_next_map.get(loop_block.label)
|
||
|
||
if not next_label:
|
||
break
|
||
|
||
if next_label not in label_to_block:
|
||
failure_block_result = await self.build_block_result(
|
||
success=False,
|
||
status=BlockStatus.failed,
|
||
failure_reason=f"Next block label {next_label} not found inside loop {self.label}",
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
block_outputs.append(failure_block_result)
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
|
||
current_label = next_label
|
||
block_idx += 1
|
||
continue
|
||
|
||
if loop_block.next_loop_on_failure or self.next_loop_on_failure:
|
||
LOG.info(
|
||
f"ForLoopBlock: Block {block_idx} during loop {loop_idx} failed but will continue to next iteration",
|
||
block_outputs=block_outputs,
|
||
loop_idx=loop_idx,
|
||
block_idx=block_idx,
|
||
loop_over_value=loop_over_value,
|
||
loop_block_next_loop_on_failure=loop_block.next_loop_on_failure or self.next_loop_on_failure,
|
||
)
|
||
break
|
||
|
||
break
|
||
|
||
outputs_with_loop_values.append(each_loop_output_values)
|
||
|
||
return LoopBlockExecutedResult(
|
||
outputs_with_loop_values=outputs_with_loop_values,
|
||
block_outputs=block_outputs,
|
||
last_block=current_block,
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
try:
|
||
loop_over_values = await self.get_loop_over_parameter_values(
|
||
workflow_run_context=workflow_run_context,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"failed to get loop values: {str(e)}",
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
loop_values=loop_over_values,
|
||
)
|
||
|
||
LOG.info(
|
||
f"Number of loop_over values: {len(loop_over_values)}",
|
||
block_type=self.block_type,
|
||
workflow_run_id=workflow_run_id,
|
||
num_loop_over_values=len(loop_over_values),
|
||
)
|
||
if not loop_over_values or len(loop_over_values) == 0:
|
||
LOG.info(
|
||
"No loop_over values found, terminating block",
|
||
block_type=self.block_type,
|
||
workflow_run_id=workflow_run_id,
|
||
num_loop_over_values=len(loop_over_values),
|
||
complete_if_empty=self.complete_if_empty,
|
||
)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, [])
|
||
if self.complete_if_empty:
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=[],
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
else:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No iterable value found for the loop block",
|
||
status=BlockStatus.terminated,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not self.loop_blocks or len(self.loop_blocks) == 0:
|
||
LOG.info(
|
||
"No defined blocks to loop, terminating block",
|
||
block_type=self.block_type,
|
||
workflow_run_id=workflow_run_id,
|
||
num_loop_blocks=len(self.loop_blocks),
|
||
)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, [])
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No defined blocks to loop",
|
||
status=BlockStatus.terminated,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
try:
|
||
loop_executed_result = await self.execute_loop_helper(
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
workflow_run_context=workflow_run_context,
|
||
loop_over_values=loop_over_values,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
except InvalidWorkflowDefinition as exc:
|
||
LOG.error(
|
||
"Loop graph validation failed",
|
||
error=str(exc),
|
||
workflow_run_id=workflow_run_id,
|
||
loop_label=self.label,
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=str(exc),
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
await self.record_output_parameter_value(
|
||
workflow_run_context, workflow_run_id, loop_executed_result.outputs_with_loop_values
|
||
)
|
||
block_status = BlockStatus.failed
|
||
success = False
|
||
|
||
if loop_executed_result.is_canceled():
|
||
block_status = BlockStatus.canceled
|
||
elif loop_executed_result.is_completed():
|
||
block_status = BlockStatus.completed
|
||
success = True
|
||
elif loop_executed_result.is_terminated():
|
||
block_status = BlockStatus.terminated
|
||
else:
|
||
block_status = BlockStatus.failed
|
||
|
||
return await self.build_block_result(
|
||
success=success,
|
||
failure_reason=loop_executed_result.get_failure_reason(),
|
||
output_parameter_value=loop_executed_result.outputs_with_loop_values,
|
||
status=block_status,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class Credential(SimpleNamespace):
|
||
pass
|
||
|
||
|
||
class CodeBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.CODE] = BlockType.CODE # type: ignore
|
||
|
||
code: str
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
|
||
@staticmethod
|
||
def is_safe_code(code: str) -> None:
|
||
tree = ast.parse(code)
|
||
for node in ast.walk(tree):
|
||
if hasattr(node, "attr") and str(node.attr).startswith("__"):
|
||
raise InsecureCodeDetected("Not allowed to access private methods or attributes")
|
||
if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
|
||
raise InsecureCodeDetected("Not allowed to import modules")
|
||
|
||
@staticmethod
|
||
def build_safe_vars() -> dict[str, Any]:
|
||
return {
|
||
"__builtins__": {}, # only allow several builtins due to security concerns
|
||
"locals": locals,
|
||
"print": print,
|
||
"len": len,
|
||
"range": range,
|
||
"str": str,
|
||
"int": int,
|
||
"dict": dict,
|
||
"list": list,
|
||
"tuple": tuple,
|
||
"set": set,
|
||
"bool": bool,
|
||
"asyncio": asyncio,
|
||
"re": re,
|
||
"json": json,
|
||
"Exception": Exception,
|
||
}
|
||
|
||
def generate_async_user_function(
|
||
self, code: str, page: Page, parameters: dict[str, Any] | None = None
|
||
) -> Callable[[], Awaitable[dict[str, Any]]]:
|
||
code = textwrap.indent(code, " ")
|
||
full_code = f"""
|
||
async def wrapper():
|
||
{code}
|
||
return locals()
|
||
"""
|
||
runtime_variables: dict[str, Callable[[], Awaitable[dict[str, Any]]]] = {}
|
||
safe_vars = self.build_safe_vars()
|
||
if parameters:
|
||
safe_vars.update(parameters)
|
||
safe_vars["page"] = page
|
||
exec(full_code, safe_vars, runtime_variables)
|
||
return runtime_variables["wrapper"]
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.code = self.format_block_parameter_template_from_workflow_run_context(self.code, workflow_run_context)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
await app.AGENT_FUNCTION.validate_code_block(organization_id=organization_id)
|
||
|
||
browser_state = await self.get_or_create_browser_state(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
if not browser_state:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No browser found to run the code block",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
page = await browser_state.get_working_page()
|
||
if not page:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No page found to run the code block",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# get workflow run context
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# get all parameters into a dictionary
|
||
parameter_values = {}
|
||
for parameter in self.parameters:
|
||
value = workflow_run_context.get_value(parameter.key)
|
||
if not parameter.parameter_type.is_secret_or_credential() and not (
|
||
# NOTE: skyvern credential is a 'credential_id' workflow parameter type
|
||
parameter.parameter_type == ParameterType.WORKFLOW
|
||
and parameter.workflow_parameter_type is not None
|
||
and parameter.workflow_parameter_type.is_credential_type()
|
||
):
|
||
parameter_values[parameter.key] = value
|
||
continue
|
||
if isinstance(value, dict):
|
||
real_secret_values = {}
|
||
for credential_field, credential_place_holder in value.items():
|
||
# "context" is a skyvern-defined field to reduce LLM hallucination
|
||
if credential_field == "context":
|
||
continue
|
||
secret_value = workflow_run_context.get_original_secret_value_or_none(credential_place_holder)
|
||
if (
|
||
secret_value == BitwardenConstants.TOTP
|
||
or secret_value == OnePasswordConstants.TOTP
|
||
or secret_value == AzureVaultConstants.TOTP
|
||
):
|
||
totp_secret_key = workflow_run_context.totp_secret_value_key(credential_place_holder)
|
||
totp_secret = workflow_run_context.get_original_secret_value_or_none(totp_secret_key)
|
||
if totp_secret:
|
||
secret_value = pyotp.TOTP(totp_secret).now()
|
||
else:
|
||
LOG.warning(
|
||
"No TOTP secret found, returning the parameter value as is",
|
||
parameter=credential_place_holder,
|
||
)
|
||
|
||
real_secret_value = secret_value if secret_value is not None else credential_place_holder
|
||
parameter_values[credential_field] = real_secret_value
|
||
real_secret_values[credential_field] = real_secret_value
|
||
parameter_values[parameter.key] = Credential(**real_secret_values)
|
||
else:
|
||
secret_value = workflow_run_context.get_original_secret_value_or_none(value)
|
||
parameter_values[parameter.key] = secret_value if secret_value is not None else value
|
||
|
||
try:
|
||
self.is_safe_code(self.code)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=str(e),
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
user_function = self.generate_async_user_function(self.code, page, parameter_values)
|
||
try:
|
||
result = await user_function()
|
||
except Exception as e:
|
||
exc = CustomizedCodeException(e)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=exc.message,
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
result = json.loads(
|
||
json.dumps(result, default=lambda value: f"Object '{type(value)}' is not JSON serializable")
|
||
)
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=result,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class TextPromptBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.TEXT_PROMPT] = BlockType.TEXT_PROMPT # type: ignore
|
||
|
||
llm_key: str | None = None
|
||
prompt: str
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
json_schema: dict[str, Any] | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
if self.llm_key:
|
||
self.llm_key = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.llm_key, workflow_run_context
|
||
)
|
||
self.prompt = self.format_block_parameter_template_from_workflow_run_context(self.prompt, workflow_run_context)
|
||
|
||
async def send_prompt(
|
||
self,
|
||
prompt: str,
|
||
parameter_values: dict[str, Any],
|
||
workflow_run_id: str,
|
||
organization_id: str | None = None,
|
||
workflow_run_block_id: str | None = None,
|
||
) -> dict[str, Any]:
|
||
default_llm_handler = await self._resolve_default_llm_handler(workflow_run_id, organization_id)
|
||
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
|
||
self.override_llm_key or self.llm_key, default=default_llm_handler
|
||
)
|
||
if not self.json_schema:
|
||
self.json_schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"llm_response": {
|
||
"type": "string",
|
||
"description": "Your response to the prompt",
|
||
}
|
||
},
|
||
}
|
||
|
||
prompt = prompt_engine.load_prompt_from_string(prompt, **parameter_values)
|
||
prompt += (
|
||
"\n\n"
|
||
+ "Please respond to the prompt above using the following JSON definition:\n\n"
|
||
+ "```json\n"
|
||
+ json.dumps(self.json_schema, indent=2)
|
||
+ "\n```\n\n"
|
||
)
|
||
|
||
workflow_run_block = None
|
||
artifacts_to_persist: list[tuple[ArtifactType, bytes]] = []
|
||
if workflow_run_block_id:
|
||
try:
|
||
workflow_run_block = await app.DATABASE.get_workflow_run_block(workflow_run_block_id, organization_id)
|
||
if workflow_run_block:
|
||
artifacts_to_persist.append((ArtifactType.LLM_PROMPT, prompt.encode("utf-8")))
|
||
except Exception as e:
|
||
LOG.error("Failed to fetch workflow_run_block for TextPromptBlock artifacts", error=e)
|
||
|
||
LOG.info(
|
||
"TextPromptBlock: Sending prompt to LLM",
|
||
prompt=prompt,
|
||
llm_key=self.llm_key,
|
||
)
|
||
response = await llm_api_handler(prompt=prompt, prompt_name="text-prompt")
|
||
|
||
if workflow_run_block:
|
||
artifacts_to_persist.append((ArtifactType.LLM_RESPONSE, json.dumps(response).encode("utf-8")))
|
||
try:
|
||
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifacts(
|
||
workflow_run_block=workflow_run_block,
|
||
artifacts=artifacts_to_persist,
|
||
)
|
||
except Exception as e:
|
||
LOG.error("Failed to save TextPromptBlock artifacts", error=e)
|
||
|
||
LOG.info("TextPromptBlock: Received response from LLM", response=response)
|
||
return response
|
||
|
||
async def _resolve_default_llm_handler(self, workflow_run_id: str, organization_id: str | None) -> LLMAPIHandler:
|
||
prompt_config_handler = await get_llm_handler_for_prompt_type("text-prompt", workflow_run_id, organization_id)
|
||
if prompt_config_handler:
|
||
return prompt_config_handler
|
||
|
||
secondary_handler = app.SECONDARY_LLM_API_HANDLER
|
||
if secondary_handler:
|
||
return secondary_handler
|
||
|
||
LOG.warning(
|
||
"Secondary LLM handler not configured; falling back to primary handler for TextPromptBlock",
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
return app.LLM_API_HANDLER
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# Validate block execution
|
||
await app.AGENT_FUNCTION.validate_block_execution(
|
||
block=self,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
# get workflow run context
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
prompt=self.prompt,
|
||
)
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
# get all parameters into a dictionary
|
||
parameter_values = {}
|
||
for parameter in self.parameters:
|
||
value = workflow_run_context.get_value(parameter.key)
|
||
secret_value = workflow_run_context.get_original_secret_value_or_none(value)
|
||
if secret_value:
|
||
continue
|
||
else:
|
||
parameter_values[parameter.key] = value
|
||
|
||
response = await self.send_prompt(
|
||
self.prompt,
|
||
parameter_values,
|
||
workflow_run_id,
|
||
organization_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=response,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class DownloadToS3Block(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.DOWNLOAD_TO_S3] = BlockType.DOWNLOAD_TO_S3 # type: ignore
|
||
|
||
url: str
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
if self.url and workflow_run_context.has_parameter(self.url):
|
||
return [workflow_run_context.get_parameter(self.url)]
|
||
|
||
return []
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
|
||
|
||
async def _upload_file_to_s3(self, uri: str, file_path: str) -> None:
|
||
try:
|
||
client = self.get_async_aws_client()
|
||
await client.upload_file_from_path(uri=uri, file_path=file_path)
|
||
finally:
|
||
# Clean up the temporary file since it's created with delete=False
|
||
os.unlink(file_path)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# get workflow run context
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
# get all parameters into a dictionary
|
||
if self.url and workflow_run_context.has_parameter(self.url) and workflow_run_context.has_value(self.url):
|
||
task_url_parameter_value = workflow_run_context.get_value(self.url)
|
||
if task_url_parameter_value:
|
||
LOG.info(
|
||
"DownloadToS3Block: Task URL is parameterized, using parameter value",
|
||
task_url_parameter_value=task_url_parameter_value,
|
||
task_url_parameter_key=self.url,
|
||
)
|
||
self.url = task_url_parameter_value
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
try:
|
||
file_path = await download_file(self.url, max_size_mb=10)
|
||
except Exception as e:
|
||
LOG.error("DownloadToS3Block: Failed to download file", url=self.url, error=str(e))
|
||
raise e
|
||
|
||
uri = None
|
||
try:
|
||
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/{workflow_run_id}/{uuid.uuid4()}"
|
||
await self._upload_file_to_s3(uri, file_path)
|
||
except Exception as e:
|
||
LOG.error("DownloadToS3Block: Failed to upload file to S3", uri=uri, error=str(e))
|
||
raise e
|
||
|
||
LOG.info("DownloadToS3Block: File downloaded and uploaded to S3", uri=uri)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, uri)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=uri,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class UploadToS3Block(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3 # type: ignore
|
||
|
||
# TODO (kerem): A directory upload is supported but we should also support a list of files
|
||
path: str | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
if self.path and workflow_run_context.has_parameter(self.path):
|
||
return [workflow_run_context.get_parameter(self.path)]
|
||
|
||
return []
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
if self.path:
|
||
self.path = self.format_block_parameter_template_from_workflow_run_context(self.path, workflow_run_context)
|
||
|
||
@staticmethod
|
||
def _get_s3_uri(workflow_run_id: str, path: str) -> str:
|
||
s3_bucket = settings.AWS_S3_BUCKET_UPLOADS
|
||
s3_key = f"{settings.ENV}/{workflow_run_id}/{uuid.uuid4()}_{Path(path).name}"
|
||
return f"s3://{s3_bucket}/{s3_key}"
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# get workflow run context
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
# get all parameters into a dictionary
|
||
if self.path and workflow_run_context.has_parameter(self.path) and workflow_run_context.has_value(self.path):
|
||
file_path_parameter_value = workflow_run_context.get_value(self.path)
|
||
if file_path_parameter_value:
|
||
LOG.info(
|
||
"UploadToS3Block: File path is parameterized, using parameter value",
|
||
file_path_parameter_value=file_path_parameter_value,
|
||
file_path_parameter_key=self.path,
|
||
)
|
||
self.path = file_path_parameter_value
|
||
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use the download directory for the workflow run
|
||
elif self.path == settings.WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
|
||
context = skyvern_context.current()
|
||
self.path = str(
|
||
get_path_for_workflow_download_directory(
|
||
context.run_id if context and context.run_id else workflow_run_id
|
||
).absolute()
|
||
)
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not self.path or not os.path.exists(self.path):
|
||
raise FileNotFoundError(f"UploadToS3Block: File not found at path: {self.path}")
|
||
|
||
s3_uris = []
|
||
try:
|
||
client = self.get_async_aws_client()
|
||
# is the file path a file or a directory?
|
||
if os.path.isdir(self.path):
|
||
# get all files in the directory, if there are more than 25 files, we will not upload them
|
||
files = os.listdir(self.path)
|
||
if len(files) > MAX_UPLOAD_FILE_COUNT:
|
||
raise ValueError("Too many files in the directory, not uploading")
|
||
for file in files:
|
||
# if the file is a directory, we will not upload it
|
||
if os.path.isdir(os.path.join(self.path, file)):
|
||
LOG.warning("UploadToS3Block: Skipping directory", file=file)
|
||
continue
|
||
file_path = os.path.join(self.path, file)
|
||
s3_uri = self._get_s3_uri(workflow_run_id, file_path)
|
||
s3_uris.append(s3_uri)
|
||
await client.upload_file_from_path(uri=s3_uri, file_path=file_path)
|
||
else:
|
||
s3_uri = self._get_s3_uri(workflow_run_id, self.path)
|
||
s3_uris.append(s3_uri)
|
||
await client.upload_file_from_path(uri=s3_uri, file_path=self.path)
|
||
except Exception as e:
|
||
LOG.exception("UploadToS3Block: Failed to upload file to S3", file_path=self.path)
|
||
raise e
|
||
|
||
LOG.info("UploadToS3Block: File(s) uploaded to S3", file_path=self.path)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, s3_uris)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=s3_uris,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class FileUploadBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.FILE_UPLOAD] = BlockType.FILE_UPLOAD # type: ignore
|
||
|
||
storage_type: FileStorageType = FileStorageType.S3
|
||
s3_bucket: str | None = None
|
||
aws_access_key_id: str | None = None
|
||
aws_secret_access_key: str | None = None
|
||
region_name: str | None = None
|
||
azure_storage_account_name: str | None = None
|
||
azure_storage_account_key: str | None = None
|
||
azure_blob_container_name: str | None = None
|
||
path: str | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
parameters = []
|
||
|
||
if self.path and workflow_run_context.has_parameter(self.path):
|
||
parameters.append(workflow_run_context.get_parameter(self.path))
|
||
|
||
if self.s3_bucket and workflow_run_context.has_parameter(self.s3_bucket):
|
||
parameters.append(workflow_run_context.get_parameter(self.s3_bucket))
|
||
|
||
if self.aws_access_key_id and workflow_run_context.has_parameter(self.aws_access_key_id):
|
||
parameters.append(workflow_run_context.get_parameter(self.aws_access_key_id))
|
||
|
||
if self.aws_secret_access_key and workflow_run_context.has_parameter(self.aws_secret_access_key):
|
||
parameters.append(workflow_run_context.get_parameter(self.aws_secret_access_key))
|
||
|
||
if self.azure_storage_account_name and workflow_run_context.has_parameter(self.azure_storage_account_name):
|
||
parameters.append(workflow_run_context.get_parameter(self.azure_storage_account_name))
|
||
|
||
if self.azure_storage_account_key and workflow_run_context.has_parameter(self.azure_storage_account_key):
|
||
parameters.append(workflow_run_context.get_parameter(self.azure_storage_account_key))
|
||
|
||
if self.azure_blob_container_name and workflow_run_context.has_parameter(self.azure_blob_container_name):
|
||
parameters.append(workflow_run_context.get_parameter(self.azure_blob_container_name))
|
||
|
||
return parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
if self.path:
|
||
self.path = self.format_block_parameter_template_from_workflow_run_context(self.path, workflow_run_context)
|
||
|
||
if self.s3_bucket:
|
||
self.s3_bucket = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.s3_bucket, workflow_run_context
|
||
)
|
||
if self.aws_access_key_id:
|
||
self.aws_access_key_id = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.aws_access_key_id, workflow_run_context
|
||
)
|
||
if self.aws_secret_access_key:
|
||
self.aws_secret_access_key = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.aws_secret_access_key, workflow_run_context
|
||
)
|
||
if self.azure_storage_account_name:
|
||
self.azure_storage_account_name = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.azure_storage_account_name, workflow_run_context
|
||
)
|
||
if self.azure_storage_account_key:
|
||
self.azure_storage_account_key = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.azure_storage_account_key, workflow_run_context
|
||
)
|
||
if self.azure_blob_container_name:
|
||
self.azure_blob_container_name = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.azure_blob_container_name, workflow_run_context
|
||
)
|
||
|
||
def _get_s3_uri(self, workflow_run_id: str, path: str) -> str:
|
||
folder_path = self.path or f"{workflow_run_id}"
|
||
# Remove trailing slash from folder_path to avoid double slashes
|
||
folder_path = folder_path.rstrip("/")
|
||
# Remove any empty path segments to avoid double slashes
|
||
folder_path = "/".join(segment for segment in folder_path.split("/") if segment)
|
||
s3_suffix = f"{uuid.uuid4()}_{Path(path).name}"
|
||
return f"s3://{self.s3_bucket}/{folder_path}/{s3_suffix}"
|
||
|
||
def _get_azure_blob_name(self, workflow_run_id: str, file_path: str) -> str:
|
||
blob_name = f"{uuid.uuid4()}_{Path(file_path).name}"
|
||
folder_path = self.path or workflow_run_id
|
||
# Remove trailing slash from folder_path to avoid double slashes
|
||
folder_path = folder_path.rstrip("/")
|
||
# Remove any empty path segments to avoid double slashes
|
||
folder_path = "/".join(segment for segment in folder_path.split("/") if segment)
|
||
return folder_path + "/" + blob_name
|
||
|
||
def _get_azure_blob_uri(self, workflow_run_id: str, blob_name: str) -> str:
|
||
return f"https://{self.azure_storage_account_name}.blob.core.windows.net/{self.azure_blob_container_name}/{blob_name}"
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# get workflow run context
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
# get all parameters into a dictionary
|
||
# data validate before uploading
|
||
missing_parameters = []
|
||
if self.storage_type == FileStorageType.S3:
|
||
if not self.s3_bucket:
|
||
missing_parameters.append("s3_bucket")
|
||
if not self.aws_access_key_id:
|
||
missing_parameters.append("aws_access_key_id")
|
||
if not self.aws_secret_access_key:
|
||
missing_parameters.append("aws_secret_access_key")
|
||
elif self.storage_type == FileStorageType.AZURE:
|
||
if not self.azure_storage_account_name or self.azure_storage_account_name == "":
|
||
missing_parameters.append("azure_storage_account_name")
|
||
if not self.azure_storage_account_key or self.azure_storage_account_key == "":
|
||
missing_parameters.append("azure_storage_account_key")
|
||
if not self.azure_blob_container_name or self.azure_blob_container_name == "":
|
||
missing_parameters.append("azure_blob_container_name")
|
||
else:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Unsupported storage type: {self.storage_type}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if missing_parameters:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Required block values are missing in the FileUploadBlock (label: {self.label}): {', '.join(missing_parameters)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
context = skyvern_context.current()
|
||
download_files_path = str(
|
||
get_path_for_workflow_download_directory(
|
||
context.run_id if context and context.run_id else workflow_run_id
|
||
).absolute()
|
||
)
|
||
|
||
uploaded_uris = []
|
||
try:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
files_to_upload = []
|
||
if os.path.isdir(download_files_path):
|
||
files = os.listdir(download_files_path)
|
||
max_file_count = (
|
||
MAX_UPLOAD_FILE_COUNT
|
||
if self.storage_type == FileStorageType.S3
|
||
else AZURE_BLOB_STORAGE_MAX_UPLOAD_FILE_COUNT
|
||
)
|
||
if len(files) > max_file_count:
|
||
raise ValueError(f"Too many files in the directory, not uploading. Max: {max_file_count}")
|
||
for file in files:
|
||
if os.path.isdir(os.path.join(download_files_path, file)):
|
||
LOG.warning("FileUploadBlock: Skipping directory", file=file)
|
||
continue
|
||
files_to_upload.append(os.path.join(download_files_path, file))
|
||
else:
|
||
files_to_upload.append(download_files_path)
|
||
|
||
if self.storage_type == FileStorageType.S3:
|
||
actual_aws_access_key_id = (
|
||
workflow_run_context.get_original_secret_value_or_none(self.aws_access_key_id)
|
||
or self.aws_access_key_id
|
||
)
|
||
actual_aws_secret_access_key = (
|
||
workflow_run_context.get_original_secret_value_or_none(self.aws_secret_access_key)
|
||
or self.aws_secret_access_key
|
||
)
|
||
aws_client = AsyncAWSClient(
|
||
aws_access_key_id=actual_aws_access_key_id,
|
||
aws_secret_access_key=actual_aws_secret_access_key,
|
||
region_name=self.region_name,
|
||
)
|
||
for file_path in files_to_upload:
|
||
s3_uri = self._get_s3_uri(workflow_run_id, file_path)
|
||
uploaded_uris.append(s3_uri)
|
||
await aws_client.upload_file_from_path(uri=s3_uri, file_path=file_path, raise_exception=True)
|
||
LOG.info("FileUploadBlock: File(s) uploaded to S3", file_path=self.path)
|
||
elif self.storage_type == FileStorageType.AZURE:
|
||
actual_azure_storage_account_name = (
|
||
workflow_run_context.get_original_secret_value_or_none(self.azure_storage_account_name)
|
||
or self.azure_storage_account_name
|
||
)
|
||
actual_azure_storage_account_key = (
|
||
workflow_run_context.get_original_secret_value_or_none(self.azure_storage_account_key)
|
||
or self.azure_storage_account_key
|
||
)
|
||
if actual_azure_storage_account_name is None or actual_azure_storage_account_key is None:
|
||
raise AzureConfigurationError("Azure Storage is not configured")
|
||
|
||
azure_client = app.AZURE_CLIENT_FACTORY.create_storage_client(
|
||
storage_account_name=actual_azure_storage_account_name,
|
||
storage_account_key=actual_azure_storage_account_key,
|
||
)
|
||
for file_path in files_to_upload:
|
||
LOG.info("FileUploadBlock: Uploading file to Azure Blob Storage", file_path=file_path)
|
||
blob_name = self._get_azure_blob_name(workflow_run_id, file_path)
|
||
azure_uri = self._get_azure_blob_uri(workflow_run_id, blob_name)
|
||
uploaded_uris.append(azure_uri)
|
||
uri = f"azure://{self.azure_blob_container_name or ''}/{blob_name}"
|
||
await azure_client.upload_file_from_path(uri, file_path)
|
||
LOG.info("FileUploadBlock: File(s) uploaded to Azure Blob Storage", file_path=self.path)
|
||
else:
|
||
# This case should ideally be caught by the initial validation
|
||
raise ValueError(f"Unsupported storage type: {self.storage_type}")
|
||
|
||
except Exception as e:
|
||
LOG.exception("FileUploadBlock: Failed to upload file", file_path=self.path, storage_type=self.storage_type)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to upload file to {self.storage_type}: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, uploaded_uris)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=uploaded_uris,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class SendEmailBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.SEND_EMAIL] = BlockType.SEND_EMAIL # type: ignore
|
||
|
||
smtp_host: AWSSecretParameter
|
||
smtp_port: AWSSecretParameter
|
||
smtp_username: AWSSecretParameter
|
||
# if you're using a Gmail account, you need to pass in an app password instead of your regular password
|
||
smtp_password: AWSSecretParameter
|
||
sender: str
|
||
recipients: list[str]
|
||
subject: str
|
||
body: str
|
||
file_attachments: list[str] = []
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
parameters = [
|
||
self.smtp_host,
|
||
self.smtp_port,
|
||
self.smtp_username,
|
||
self.smtp_password,
|
||
]
|
||
|
||
if self.file_attachments:
|
||
for file_path in self.file_attachments:
|
||
if workflow_run_context.has_parameter(file_path):
|
||
parameters.append(workflow_run_context.get_parameter(file_path))
|
||
|
||
if self.subject and workflow_run_context.has_parameter(self.subject):
|
||
parameters.append(workflow_run_context.get_parameter(self.subject))
|
||
|
||
if self.body and workflow_run_context.has_parameter(self.body):
|
||
parameters.append(workflow_run_context.get_parameter(self.body))
|
||
|
||
return parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.sender = self.format_block_parameter_template_from_workflow_run_context(self.sender, workflow_run_context)
|
||
self.subject = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.subject, workflow_run_context
|
||
)
|
||
self.body = self.format_block_parameter_template_from_workflow_run_context(self.body, workflow_run_context)
|
||
|
||
# Format recipients
|
||
formatted_recipients = []
|
||
for recipient in self.recipients:
|
||
formatted_recipient = self.format_block_parameter_template_from_workflow_run_context(
|
||
recipient, workflow_run_context
|
||
)
|
||
formatted_recipients.append(formatted_recipient)
|
||
self.recipients = formatted_recipients
|
||
|
||
def _decrypt_smtp_parameters(self, workflow_run_context: WorkflowRunContext) -> tuple[str, int, str, str]:
|
||
obfuscated_smtp_host_value = workflow_run_context.get_value(self.smtp_host.key)
|
||
obfuscated_smtp_port_value = workflow_run_context.get_value(self.smtp_port.key)
|
||
obfuscated_smtp_username_value = workflow_run_context.get_value(self.smtp_username.key)
|
||
obfuscated_smtp_password_value = workflow_run_context.get_value(self.smtp_password.key)
|
||
smtp_host_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_host_value)
|
||
smtp_port_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_port_value)
|
||
smtp_username_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_username_value)
|
||
smtp_password_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_password_value)
|
||
|
||
email_config_problems = []
|
||
if smtp_host_value is None:
|
||
email_config_problems.append("Missing SMTP server")
|
||
if smtp_port_value is None:
|
||
email_config_problems.append("Missing SMTP port")
|
||
elif not smtp_port_value.isdigit():
|
||
email_config_problems.append("SMTP port should be a number")
|
||
if smtp_username_value is None:
|
||
email_config_problems.append("Missing SMTP username")
|
||
if smtp_password_value is None:
|
||
email_config_problems.append("Missing SMTP password")
|
||
|
||
if email_config_problems:
|
||
raise InvalidEmailClientConfiguration(email_config_problems)
|
||
|
||
return (
|
||
smtp_host_value,
|
||
smtp_port_value,
|
||
smtp_username_value,
|
||
smtp_password_value,
|
||
)
|
||
|
||
def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run_id: str) -> list[str]:
|
||
file_paths = []
|
||
for path in self.file_attachments:
|
||
# if the file path is a parameter, get the value from the workflow run context first
|
||
if workflow_run_context.has_parameter(path):
|
||
file_path_parameter_value = workflow_run_context.get_value(path)
|
||
# if the file path is a secret, get the original secret value from the workflow run context
|
||
file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
|
||
file_path_parameter_value
|
||
)
|
||
if file_path_parameter_secret_value:
|
||
path = file_path_parameter_secret_value
|
||
else:
|
||
path = file_path_parameter_value
|
||
|
||
if path == settings.WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
|
||
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use download directory for the workflow run
|
||
context = skyvern_context.current()
|
||
path = str(
|
||
get_path_for_workflow_download_directory(
|
||
context.run_id if context and context.run_id else workflow_run_id
|
||
).absolute()
|
||
)
|
||
LOG.info(
|
||
"SendEmailBlock: Using download directory for the workflow run",
|
||
workflow_run_id=workflow_run_id,
|
||
file_path=path,
|
||
)
|
||
|
||
path = self.format_block_parameter_template_from_workflow_run_context(path, workflow_run_context)
|
||
# if the file path is a directory, add all files in the directory, skip directories, limit to 10 files
|
||
if os.path.exists(path):
|
||
if os.path.isdir(path):
|
||
for file in os.listdir(path):
|
||
if os.path.isdir(os.path.join(path, file)):
|
||
LOG.warning("SendEmailBlock: Skipping directory", file=file)
|
||
continue
|
||
file_path = os.path.join(path, file)
|
||
file_paths.append(file_path)
|
||
else:
|
||
# covers the case where the file path is a single file
|
||
file_paths.append(path)
|
||
# check if path is a url, or an S3 uri
|
||
elif (
|
||
path.startswith("http://")
|
||
or path.startswith("https://")
|
||
or path.startswith("s3://")
|
||
or path.startswith("www.")
|
||
):
|
||
file_paths.append(path)
|
||
else:
|
||
LOG.warning("SendEmailBlock: File not found", file_path=path)
|
||
|
||
return file_paths
|
||
|
||
async def _download_from_s3(self, s3_uri: str) -> str:
|
||
client = self.get_async_aws_client()
|
||
downloaded_bytes = await client.download_file(uri=s3_uri)
|
||
file_path = create_named_temporary_file(delete=False)
|
||
file_path.write(downloaded_bytes)
|
||
return file_path.name
|
||
|
||
def get_real_email_recipients(self, workflow_run_context: WorkflowRunContext) -> list[str]:
|
||
recipients = []
|
||
for recipient in self.recipients:
|
||
# Check if the recipient is a parameter and get its value
|
||
if workflow_run_context.has_parameter(recipient):
|
||
maybe_recipient = workflow_run_context.get_value(recipient)
|
||
else:
|
||
maybe_recipient = recipient
|
||
|
||
recipient = self.format_block_parameter_template_from_workflow_run_context(recipient, workflow_run_context)
|
||
# check if maybe_recipient is a valid email address
|
||
try:
|
||
validate_email(maybe_recipient)
|
||
recipients.append(maybe_recipient)
|
||
except EmailNotValidError as e:
|
||
LOG.warning(
|
||
"SendEmailBlock: Invalid email address",
|
||
recipient=maybe_recipient,
|
||
reason=str(e),
|
||
)
|
||
|
||
if not recipients:
|
||
raise NoValidEmailRecipient(recipients=recipients)
|
||
|
||
return recipients
|
||
|
||
async def _build_email_message(
|
||
self, workflow_run_context: WorkflowRunContext, workflow_run_id: str
|
||
) -> EmailMessage:
|
||
msg = EmailMessage()
|
||
msg["Subject"] = (
|
||
self.subject.strip().replace("\n", "").replace("\r", "") + f" - Workflow Run ID: {workflow_run_id}"
|
||
)
|
||
msg["To"] = ", ".join(self.get_real_email_recipients(workflow_run_context))
|
||
msg["BCC"] = self.sender # BCC the sender so there is a record of the email being sent
|
||
msg["From"] = self.sender
|
||
if self.body and workflow_run_context.has_parameter(self.body) and workflow_run_context.has_value(self.body):
|
||
# We're purposely not decrypting the body parameter value here because we don't want to expose secrets
|
||
body_parameter_value = workflow_run_context.get_value(self.body)
|
||
msg.set_content(str(body_parameter_value))
|
||
else:
|
||
msg.set_content(self.body)
|
||
|
||
file_names_by_hash: dict[str, list[str]] = defaultdict(list)
|
||
|
||
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
|
||
if filename.startswith("s3://"):
|
||
path = await download_from_s3(self.get_async_aws_client(), filename)
|
||
elif filename.startswith("http://") or filename.startswith("https://"):
|
||
path = await download_file(filename)
|
||
else:
|
||
LOG.info("SendEmailBlock: Looking for file locally", filename=filename)
|
||
if not os.path.exists(filename):
|
||
raise FileNotFoundError(f"File not found: {filename}")
|
||
if not os.path.isfile(filename):
|
||
raise IsADirectoryError(f"Path is a directory: {filename}")
|
||
|
||
path = filename
|
||
LOG.info("SendEmailBlock: Found file locally", path=path)
|
||
|
||
if not path:
|
||
raise FileNotFoundError(f"File not found: {filename}")
|
||
|
||
# Guess the content type based on the file's extension. Encoding
|
||
# will be ignored, although we should check for simple things like
|
||
# gzip'd or compressed files.
|
||
kind = filetype.guess(path)
|
||
if kind:
|
||
ctype = kind.mime
|
||
extension = kind.extension
|
||
else:
|
||
# No guess could be made, or the file is encoded (compressed), so
|
||
# use a generic bag-of-bits type.
|
||
ctype = "application/octet-stream"
|
||
extension = None
|
||
|
||
maintype, subtype = ctype.split("/", 1)
|
||
attachment_path = Path(path)
|
||
attachment_filename = attachment_path.name
|
||
|
||
# Check if the filename has an extension
|
||
if not attachment_path.suffix:
|
||
# If no extension, guess it based on the MIME type
|
||
if extension:
|
||
attachment_filename += f".{extension}"
|
||
|
||
LOG.info(
|
||
"SendEmailBlock: Adding attachment",
|
||
filename=attachment_filename,
|
||
maintype=maintype,
|
||
subtype=subtype,
|
||
)
|
||
with open(path, "rb") as fp:
|
||
msg.add_attachment(
|
||
fp.read(),
|
||
maintype=maintype,
|
||
subtype=subtype,
|
||
filename=attachment_filename,
|
||
)
|
||
file_hash = calculate_sha256_for_file(path)
|
||
file_names_by_hash[file_hash].append(path)
|
||
|
||
# Calculate file stats based on content hashes
|
||
total_files = sum(len(files) for files in file_names_by_hash.values())
|
||
unique_files = len(file_names_by_hash)
|
||
duplicate_files_list = [files for files in file_names_by_hash.values() if len(files) > 1]
|
||
|
||
# Log file statistics
|
||
LOG.info("SendEmailBlock: Total files attached", total_files=total_files)
|
||
LOG.info("SendEmailBlock: Unique files (based on content) attached", unique_files=unique_files)
|
||
if duplicate_files_list:
|
||
LOG.info(
|
||
"SendEmailBlock: Duplicate files (based on content) attached", duplicate_files_list=duplicate_files_list
|
||
)
|
||
|
||
return msg
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
recipients=self.recipients,
|
||
attachments=self.file_attachments,
|
||
subject=self.subject,
|
||
body=self.body,
|
||
)
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value = self._decrypt_smtp_parameters(
|
||
workflow_run_context
|
||
)
|
||
|
||
smtp_host = None
|
||
try:
|
||
smtp_host = smtplib.SMTP(smtp_host_value, smtp_port_value)
|
||
LOG.info("SendEmailBlock: Connected to SMTP server")
|
||
smtp_host.starttls()
|
||
smtp_host.login(smtp_username_value, smtp_password_value)
|
||
LOG.info("SendEmailBlock: Logged in to SMTP server")
|
||
message = await self._build_email_message(workflow_run_context, workflow_run_id)
|
||
smtp_host.send_message(message)
|
||
LOG.info("SendEmailBlock: Email sent")
|
||
except Exception as e:
|
||
LOG.error("SendEmailBlock: Failed to send email", exc_info=True)
|
||
result_dict = {"success": False, "error": str(e)}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=str(e),
|
||
output_parameter_value=result_dict,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
finally:
|
||
if smtp_host:
|
||
smtp_host.quit()
|
||
|
||
result_dict = {"success": True}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=result_dict,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class FileParserBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.FILE_URL_PARSER] = BlockType.FILE_URL_PARSER # type: ignore
|
||
|
||
file_url: str
|
||
file_type: FileType
|
||
json_schema: dict[str, Any] | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
if self.file_url and workflow_run_context.has_parameter(self.file_url):
|
||
return [workflow_run_context.get_parameter(self.file_url)]
|
||
return []
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.file_url = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.file_url, workflow_run_context
|
||
)
|
||
|
||
def _detect_file_type_from_url(self, file_url: str) -> FileType:
|
||
"""Detect file type based on file extension in the URL."""
|
||
url_parsed = urlparse(file_url)
|
||
# TODO: use filetype.guess(file_path) to make the detection more robust
|
||
suffix = Path(url_parsed.path).suffix.lower()
|
||
if suffix in (".xlsx", ".xls", ".xlsm"):
|
||
return FileType.EXCEL
|
||
elif suffix == ".pdf":
|
||
return FileType.PDF
|
||
elif suffix == ".tsv":
|
||
return FileType.CSV # TSV files are handled by the CSV parser
|
||
elif suffix in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif"):
|
||
return FileType.IMAGE
|
||
elif suffix == ".docx":
|
||
return FileType.DOCX
|
||
elif suffix == ".doc":
|
||
raise InvalidFileType(
|
||
file_url=file_url,
|
||
file_type=FileType.DOCX,
|
||
error="Legacy .doc format (Word 97-2003) is not supported. Please convert the file to .docx format.",
|
||
)
|
||
else:
|
||
return FileType.CSV # Default to CSV for .csv and any other extensions
|
||
|
||
def _detect_file_encoding(self, file_path: str) -> str:
|
||
"""Detect the encoding of a file using charset-normalizer with fallbacks.
|
||
|
||
Reads a sample of the file (first 64KB) to detect encoding efficiently.
|
||
Falls back through common encodings if detection fails.
|
||
"""
|
||
sample_size = 65536 # 64KB sample for detection
|
||
with open(file_path, "rb") as f:
|
||
raw_data = f.read(sample_size)
|
||
|
||
result = from_bytes(raw_data)
|
||
best_match = result.best()
|
||
if best_match and best_match.encoding:
|
||
return best_match.encoding
|
||
|
||
for encoding in ["utf-8", "cp1252", "latin-1"]:
|
||
try:
|
||
raw_data.decode(encoding)
|
||
return encoding
|
||
except UnicodeDecodeError:
|
||
continue
|
||
|
||
# latin-1 always succeeds (1:1 byte mapping), so this is a safety fallback
|
||
return "latin-1"
|
||
|
||
def validate_file_type(self, file_url_used: str, file_path: str) -> None:
|
||
if self.file_type == FileType.CSV:
|
||
try:
|
||
encoding = self._detect_file_encoding(file_path)
|
||
with open(file_path, encoding=encoding, errors="replace") as file:
|
||
csv.Sniffer().sniff(file.read(1024))
|
||
except csv.Error as e:
|
||
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
|
||
elif self.file_type == FileType.EXCEL:
|
||
try:
|
||
# Try to read the file with pandas to validate it's a valid Excel file
|
||
pd.read_excel(file_path, nrows=1, engine="calamine")
|
||
except Exception as e:
|
||
raise InvalidFileType(
|
||
file_url=file_url_used, file_type=self.file_type, error=f"Invalid Excel file format: {str(e)}"
|
||
)
|
||
elif self.file_type == FileType.PDF:
|
||
try:
|
||
validate_pdf_file(file_path, file_identifier=file_url_used)
|
||
except PDFParsingError as e:
|
||
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
|
||
elif self.file_type == FileType.IMAGE:
|
||
kind = filetype.guess(file_path)
|
||
if kind is None or not kind.mime.startswith("image/"):
|
||
raise InvalidFileType(
|
||
file_url=file_url_used, file_type=self.file_type, error="File is not a valid image"
|
||
)
|
||
elif self.file_type == FileType.DOCX:
|
||
try:
|
||
# Try to open the file with python-docx to validate it's a valid DOCX file
|
||
docx.Document(file_path)
|
||
except Exception as e:
|
||
raise InvalidFileType(
|
||
file_url=file_url_used, file_type=self.file_type, error=f"Invalid DOCX file format: {str(e)}"
|
||
)
|
||
|
||
async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
|
||
"""Parse CSV/TSV file and return list of dictionaries."""
|
||
parsed_data = []
|
||
encoding = self._detect_file_encoding(file_path)
|
||
with open(file_path, encoding=encoding, errors="replace") as file:
|
||
# Try to detect the delimiter (comma for CSV, tab for TSV)
|
||
sample = file.read(1024)
|
||
file.seek(0) # Reset file pointer
|
||
|
||
# Use csv.Sniffer to detect the delimiter
|
||
try:
|
||
dialect = csv.Sniffer().sniff(sample)
|
||
delimiter = dialect.delimiter
|
||
except csv.Error:
|
||
# Default to comma if detection fails
|
||
delimiter = ","
|
||
|
||
reader = csv.DictReader(file, delimiter=delimiter)
|
||
for row in reader:
|
||
parsed_data.append(row)
|
||
return parsed_data
|
||
|
||
def _clean_dataframe_for_json(self, df: pd.DataFrame) -> list[dict[str, Any]]:
|
||
"""Clean DataFrame to ensure it can be serialized to JSON."""
|
||
# Replace NaN and NaT values with "nan" string
|
||
df_cleaned = df.replace({pd.NA: "nan", pd.NaT: "nan"})
|
||
df_cleaned = df_cleaned.where(pd.notna(df_cleaned), "nan")
|
||
|
||
# Convert to list of dictionaries
|
||
records = df_cleaned.to_dict("records")
|
||
|
||
# Additional cleaning for any remaining problematic values
|
||
for record in records:
|
||
for key, value in record.items():
|
||
if pd.isna(value) or value == "NaN" or value == "NaT":
|
||
record[key] = "nan"
|
||
elif isinstance(value, (pd.Timestamp, pd.DatetimeTZDtype)):
|
||
# Convert pandas timestamps to ISO format strings
|
||
record[key] = value.isoformat() if pd.notna(value) else "nan"
|
||
|
||
return records
|
||
|
||
async def _parse_excel_file(self, file_path: str) -> list[dict[str, Any]]:
|
||
"""Parse Excel file and return list of dictionaries."""
|
||
try:
|
||
# Read Excel file with pandas, specifying engine explicitly
|
||
df = pd.read_excel(file_path, engine="calamine")
|
||
# Clean and convert DataFrame to list of dictionaries
|
||
return self._clean_dataframe_for_json(df)
|
||
except ImportError as e:
|
||
raise InvalidFileType(
|
||
file_url=self.file_url,
|
||
file_type=self.file_type,
|
||
error=f"Missing required dependency for Excel parsing: {str(e)}. Please install calamine: pip install python-calamine",
|
||
)
|
||
except Exception as e:
|
||
raise InvalidFileType(
|
||
file_url=self.file_url, file_type=self.file_type, error=f"Failed to parse Excel file: {str(e)}"
|
||
)
|
||
|
||
async def _parse_pdf_file(self, file_path: str) -> str:
|
||
"""Parse PDF file and return extracted text.
|
||
|
||
Uses the shared PDF parsing utility that tries pypdf first,
|
||
then falls back to pdfplumber if pypdf fails.
|
||
"""
|
||
try:
|
||
return extract_pdf_file(file_path, file_identifier=self.file_url)
|
||
except PDFParsingError as e:
|
||
raise InvalidFileType(file_url=self.file_url, file_type=self.file_type, error=str(e))
|
||
|
||
async def _parse_image_file(self, file_path: str) -> str:
|
||
"""Parse image file using vision LLM for OCR."""
|
||
try:
|
||
with open(file_path, "rb") as f:
|
||
image_bytes = f.read()
|
||
|
||
llm_prompt = prompt_engine.load_prompt("extract-text-from-image")
|
||
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
|
||
self.override_llm_key, default=app.LLM_API_HANDLER
|
||
)
|
||
llm_response = await llm_api_handler(
|
||
prompt=llm_prompt,
|
||
prompt_name="extract-text-from-image",
|
||
screenshots=[image_bytes],
|
||
force_dict=True,
|
||
)
|
||
return llm_response.get("extracted_text", "")
|
||
except Exception:
|
||
LOG.exception("Failed to extract text from image via OCR", file_url=self.file_url)
|
||
raise
|
||
|
||
async def _parse_docx_file(self, file_path: str, max_tokens: int = MAX_FILE_PARSE_INPUT_TOKENS) -> str:
|
||
"""Parse DOCX file and return extracted text.
|
||
|
||
Extracts text from all paragraphs and tables in the document,
|
||
respecting the token limit.
|
||
"""
|
||
try:
|
||
document = docx.Document(file_path)
|
||
text_parts = []
|
||
current_tokens = 0
|
||
truncated = False
|
||
|
||
# Extract text from paragraphs
|
||
for paragraph in document.paragraphs:
|
||
if paragraph.text.strip():
|
||
para_tokens = count_tokens(paragraph.text)
|
||
if max_tokens and current_tokens + para_tokens > max_tokens:
|
||
LOG.warning(
|
||
"DOCX text exceeds token limit, truncating",
|
||
file_url=self.file_url,
|
||
current_tokens=current_tokens,
|
||
max_tokens=max_tokens,
|
||
)
|
||
truncated = True
|
||
break
|
||
text_parts.append(paragraph.text)
|
||
current_tokens += para_tokens
|
||
|
||
# Extract text from tables (only if not already truncated)
|
||
if not truncated:
|
||
for table in document.tables:
|
||
if truncated:
|
||
break
|
||
for row in table.rows:
|
||
row_text = []
|
||
for cell in row.cells:
|
||
cell_text = cell.text.strip()
|
||
if cell_text:
|
||
row_text.append(cell_text)
|
||
if row_text:
|
||
row_str = " | ".join(row_text)
|
||
row_tokens = count_tokens(row_str)
|
||
if max_tokens and current_tokens + row_tokens > max_tokens:
|
||
LOG.warning(
|
||
"DOCX text exceeds token limit, truncating at table",
|
||
file_url=self.file_url,
|
||
current_tokens=current_tokens,
|
||
max_tokens=max_tokens,
|
||
)
|
||
truncated = True
|
||
break
|
||
text_parts.append(row_str)
|
||
current_tokens += row_tokens
|
||
|
||
extracted_text = "\n".join(text_parts)
|
||
extracted_text = sanitize_postgres_text(extracted_text)
|
||
LOG.info(
|
||
"Successfully parsed DOCX file",
|
||
file_url=self.file_url,
|
||
paragraph_count=len(document.paragraphs),
|
||
table_count=len(document.tables),
|
||
text_length=len(extracted_text),
|
||
truncated=truncated,
|
||
)
|
||
return extracted_text
|
||
except Exception as e:
|
||
raise InvalidFileType(
|
||
file_url=self.file_url, file_type=self.file_type, error=f"Failed to parse DOCX file: {str(e)}"
|
||
)
|
||
|
||
async def _extract_with_ai(
|
||
self, content: str | list[dict[str, Any]], workflow_run_context: WorkflowRunContext
|
||
) -> dict[str, Any]:
|
||
"""Extract structured data using AI based on json_schema."""
|
||
# Use local variable to avoid mutating the instance
|
||
schema_to_use = self.json_schema or {
|
||
"type": "object",
|
||
"properties": {
|
||
"output": {
|
||
"type": "object",
|
||
"description": "Information extracted from the file",
|
||
}
|
||
},
|
||
}
|
||
|
||
# Convert content to string for AI processing
|
||
if isinstance(content, list):
|
||
# For CSV/Excel data, convert to a readable format
|
||
content_str = json.dumps(content, indent=2)
|
||
else:
|
||
content_str = content
|
||
|
||
llm_prompt = prompt_engine.load_prompt(
|
||
"extract-information-from-file-text", extracted_text_content=content_str, json_schema=schema_to_use
|
||
)
|
||
|
||
llm_key = self.override_llm_key
|
||
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key, default=app.LLM_API_HANDLER)
|
||
|
||
llm_response = await llm_api_handler(
|
||
prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
|
||
)
|
||
return llm_response
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
if (
|
||
self.file_url
|
||
and workflow_run_context.has_parameter(self.file_url)
|
||
and workflow_run_context.has_value(self.file_url)
|
||
):
|
||
file_url_parameter_value = workflow_run_context.get_value(self.file_url)
|
||
if file_url_parameter_value:
|
||
LOG.info(
|
||
"FileParserBlock: File URL is parameterized, using parameter value",
|
||
file_url_parameter_value=file_url_parameter_value,
|
||
file_url_parameter_key=self.file_url,
|
||
)
|
||
self.file_url = file_url_parameter_value
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Download the file
|
||
if self.file_url.startswith("s3://"):
|
||
file_path = await download_from_s3(self.get_async_aws_client(), self.file_url)
|
||
else:
|
||
file_path = await download_file(self.file_url)
|
||
|
||
# Auto-detect file type if not explicitly set (IMAGE/EXCEL/PDF/DOCX are explicit choices)
|
||
if self.file_type not in (FileType.IMAGE, FileType.EXCEL, FileType.PDF, FileType.DOCX):
|
||
self.file_type = self._detect_file_type_from_url(self.file_url)
|
||
|
||
# Validate the file type
|
||
self.validate_file_type(self.file_url, file_path)
|
||
|
||
LOG.debug(
|
||
"FileParserBlock: After file type validation",
|
||
file_type=self.file_type,
|
||
json_schema_present=self.json_schema is not None,
|
||
json_schema_type=type(self.json_schema),
|
||
)
|
||
|
||
# Parse the file based on type
|
||
parsed_data: str | list[dict[str, Any]]
|
||
if self.file_type == FileType.CSV:
|
||
parsed_data = await self._parse_csv_file(file_path)
|
||
elif self.file_type == FileType.EXCEL:
|
||
parsed_data = await self._parse_excel_file(file_path)
|
||
elif self.file_type == FileType.PDF:
|
||
parsed_data = await self._parse_pdf_file(file_path)
|
||
elif self.file_type == FileType.IMAGE:
|
||
parsed_data = await self._parse_image_file(file_path)
|
||
elif self.file_type == FileType.DOCX:
|
||
parsed_data = await self._parse_docx_file(file_path)
|
||
else:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Unsupported file type: {self.file_type}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# If json_schema is provided, use AI to extract structured data
|
||
final_data: str | list[dict[str, Any]] | dict[str, Any]
|
||
LOG.debug(
|
||
"FileParserBlock: JSON schema check",
|
||
has_json_schema=self.json_schema is not None,
|
||
json_schema_type=type(self.json_schema),
|
||
json_schema=self.json_schema,
|
||
)
|
||
|
||
if self.json_schema:
|
||
try:
|
||
ai_extracted_data = await self._extract_with_ai(parsed_data, workflow_run_context)
|
||
final_data = ai_extracted_data
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to extract data with AI: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
else:
|
||
# Return raw parsed data
|
||
final_data = parsed_data
|
||
|
||
# Record the parsed data
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, final_data)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=final_data,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class PDFParserBlock(Block):
|
||
"""
|
||
DEPRECATED: Use FileParserBlock with file_type=FileType.PDF instead.
|
||
This block will be removed in a future version.
|
||
"""
|
||
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.PDF_PARSER] = BlockType.PDF_PARSER # type: ignore
|
||
|
||
file_url: str
|
||
json_schema: dict[str, Any] | None = None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
if self.file_url and workflow_run_context.has_parameter(self.file_url):
|
||
return [workflow_run_context.get_parameter(self.file_url)]
|
||
return []
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.file_url = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.file_url, workflow_run_context
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
if (
|
||
self.file_url
|
||
and workflow_run_context.has_parameter(self.file_url)
|
||
and workflow_run_context.has_value(self.file_url)
|
||
):
|
||
file_url_parameter_value = workflow_run_context.get_value(self.file_url)
|
||
if file_url_parameter_value:
|
||
LOG.info(
|
||
"PDFParserBlock: File URL is parameterized, using parameter value",
|
||
file_url_parameter_value=file_url_parameter_value,
|
||
file_url_parameter_key=self.file_url,
|
||
)
|
||
self.file_url = file_url_parameter_value
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Download the file
|
||
file_path = None
|
||
if self.file_url.startswith("s3://"):
|
||
file_path = await download_from_s3(self.get_async_aws_client(), self.file_url)
|
||
else:
|
||
file_path = await download_file(self.file_url)
|
||
|
||
try:
|
||
extracted_text = extract_pdf_file(file_path, file_identifier=self.file_url)
|
||
except PDFParsingError:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="Failed to parse PDF file",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not self.json_schema:
|
||
self.json_schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"output": {
|
||
"type": "object",
|
||
"description": "Information extracted from the text",
|
||
}
|
||
},
|
||
}
|
||
|
||
llm_prompt = prompt_engine.load_prompt(
|
||
"extract-information-from-file-text", extracted_text_content=extracted_text, json_schema=self.json_schema
|
||
)
|
||
llm_response = await app.LLM_API_HANDLER(
|
||
prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
|
||
)
|
||
# Record the parsed data
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, llm_response)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=llm_response,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class WaitBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.WAIT] = BlockType.WAIT # type: ignore
|
||
|
||
wait_sec: int
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# TODO: we need to support to interrupt the sleep when the workflow run failed/cancelled/terminated
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
wait_sec=self.wait_sec,
|
||
)
|
||
LOG.info(
|
||
"Going to pause the workflow for a while",
|
||
second=self.wait_sec,
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
await asyncio.sleep(self.wait_sec)
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
result_dict = {"success": True}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=result_dict,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class HumanInteractionBlock(BaseTaskBlock):
|
||
"""
|
||
A block for human/agent interaction.
|
||
|
||
For the first pass at this, the implicit behaviour is that the user is given a single binary
|
||
choice (a go//no-go).
|
||
|
||
If the human:
|
||
- chooses positively, the workflow continues
|
||
- chooses negatively, the workflow is terminated
|
||
- does not respond within the timeout period, the workflow terminates
|
||
"""
|
||
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.HUMAN_INTERACTION] = BlockType.HUMAN_INTERACTION # type: ignore
|
||
|
||
instructions: str = "Please review and approve or reject to continue the workflow."
|
||
positive_descriptor: str = "Approve"
|
||
negative_descriptor: str = "Reject"
|
||
timeout_seconds: int = 60 * 60 * 2 # two hours
|
||
|
||
# email options
|
||
sender: str = "hello@skyvern.com"
|
||
recipients: list[str] = []
|
||
subject: str = "Human interaction required for workflow run"
|
||
body: str = "Your interaction is required for a workflow run!"
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
super().format_potential_template_parameters(workflow_run_context)
|
||
|
||
self.instructions = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.instructions, workflow_run_context
|
||
)
|
||
|
||
self.body = self.format_block_parameter_template_from_workflow_run_context(self.body, workflow_run_context)
|
||
|
||
self.subject = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.subject, workflow_run_context
|
||
)
|
||
|
||
formatted: list[str] = []
|
||
for recipient in self.recipients:
|
||
formatted.append(
|
||
self.format_block_parameter_template_from_workflow_run_context(recipient, workflow_run_context)
|
||
)
|
||
|
||
self.recipients = formatted
|
||
|
||
self.negative_descriptor = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.negative_descriptor, workflow_run_context
|
||
)
|
||
|
||
self.positive_descriptor = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.positive_descriptor, workflow_run_context
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
# avoid circular import
|
||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus # noqa: PLC0415
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
recipients=self.recipients,
|
||
subject=self.subject,
|
||
body=self.body,
|
||
instructions=self.instructions,
|
||
positive_descriptor=self.positive_descriptor,
|
||
negative_descriptor=self.negative_descriptor,
|
||
)
|
||
|
||
LOG.info(
|
||
"Pausing workflow for human interaction",
|
||
workflow_run_id=workflow_run_id,
|
||
recipients=self.recipients,
|
||
timeout=self.timeout_seconds,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
|
||
await app.DATABASE.update_workflow_run(
|
||
workflow_run_id=workflow_run_id,
|
||
status=WorkflowRunStatus.paused,
|
||
)
|
||
|
||
workflow_run = await app.DATABASE.get_workflow_run(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not workflow_run:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="Workflow run not found",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
app_url = f"{settings.SKYVERN_APP_URL}/runs/{workflow_run_id}/overview"
|
||
body = f"{self.body}\n\nKindly visit {app_url}\n\n{self.instructions}\n\n"
|
||
subject = f"{self.subject} - Workflow Run ID: {workflow_run_id}"
|
||
|
||
try:
|
||
await email.send(
|
||
body=body,
|
||
sender=self.sender,
|
||
subject=subject,
|
||
recipients=self.recipients,
|
||
)
|
||
|
||
email_success = True
|
||
email_failure_reason = None
|
||
except Exception as ex:
|
||
LOG.error(
|
||
"Failed to send human interaction email",
|
||
workflow_run_id=workflow_run_id,
|
||
error=str(ex),
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
email_success = False
|
||
email_failure_reason = str(ex)
|
||
|
||
if not email_success:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to send human interaction email: {email_failure_reason or 'email failed'}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Wait for the timeout_seconds or until the workflow run status changes from paused
|
||
start_time = asyncio.get_event_loop().time()
|
||
check_interval = 5 # Check every 5 seconds
|
||
log_that_we_are_waiting = True
|
||
log_wait = 0
|
||
|
||
while True:
|
||
if not log_that_we_are_waiting:
|
||
log_wait += check_interval
|
||
if log_wait >= 60: # Log every 1 minute
|
||
log_that_we_are_waiting = True
|
||
log_wait = 0
|
||
|
||
elapsed_time_seconds = asyncio.get_event_loop().time() - start_time
|
||
|
||
if log_that_we_are_waiting:
|
||
LOG.info(
|
||
"Waiting for human interaction...",
|
||
workflow_run_id=workflow_run_id,
|
||
elapsed_time_seconds=elapsed_time_seconds,
|
||
timeout_seconds=self.timeout_seconds,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
log_that_we_are_waiting = False
|
||
|
||
# Check if timeout_seconds has elapsed
|
||
if elapsed_time_seconds >= self.timeout_seconds:
|
||
LOG.info(
|
||
"Human Interaction block timeout_seconds reached",
|
||
workflow_run_id=workflow_run_id,
|
||
elapsed_time_seconds=elapsed_time_seconds,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
success = False
|
||
reason = "Timeout elapsed with no human interaction"
|
||
result_dict = {"success": success, "reason": reason}
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
|
||
|
||
return await self.build_block_result(
|
||
success=success,
|
||
failure_reason=reason,
|
||
output_parameter_value=result_dict,
|
||
status=BlockStatus.timed_out,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
workflow_run = await app.DATABASE.get_workflow_run(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if workflow_run and workflow_run.status != WorkflowRunStatus.paused:
|
||
LOG.info(
|
||
"Workflow run status changed from paused",
|
||
workflow_run_id=workflow_run_id,
|
||
new_status=workflow_run.status,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
result_dict = {"success": True, "reason": f"status_changed:{workflow_run.status}"}
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
|
||
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=result_dict,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
await asyncio.sleep(min(check_interval, self.timeout_seconds - elapsed_time_seconds))
|
||
|
||
|
||
class ValidationBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION # type: ignore
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
task_order, _ = await self.get_task_order(workflow_run_id, 0)
|
||
is_first_task = task_order == 0
|
||
if is_first_task:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="Validation block should not be the first block",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.terminated,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
return await super().execute(
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
kwargs=kwargs,
|
||
)
|
||
|
||
|
||
class ActionBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
|
||
|
||
|
||
class NavigationBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.NAVIGATION] = BlockType.NAVIGATION # type: ignore
|
||
|
||
navigation_goal: str
|
||
|
||
|
||
class ExtractionBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.EXTRACTION] = BlockType.EXTRACTION # type: ignore
|
||
|
||
data_extraction_goal: str
|
||
|
||
|
||
class LoginBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.LOGIN] = BlockType.LOGIN # type: ignore
|
||
|
||
|
||
class FileDownloadBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.FILE_DOWNLOAD] = BlockType.FILE_DOWNLOAD # type: ignore
|
||
|
||
|
||
class UrlBlock(BaseTaskBlock):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.GOTO_URL] = BlockType.GOTO_URL # type: ignore
|
||
url: str
|
||
|
||
|
||
class TaskV2Block(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.TaskV2] = BlockType.TaskV2 # type: ignore
|
||
prompt: str
|
||
url: str | None = None
|
||
totp_verification_url: str | None = None
|
||
totp_identifier: str | None = None
|
||
max_iterations: int = settings.MAX_ITERATIONS_PER_TASK_V2
|
||
max_steps: int = settings.MAX_STEPS_PER_TASK_V2
|
||
|
||
def _resolve_totp_identifier(self, workflow_run_context: WorkflowRunContext) -> str | None:
|
||
if self.totp_identifier:
|
||
return self.totp_identifier
|
||
if workflow_run_context.credential_totp_identifiers:
|
||
return next(iter(workflow_run_context.credential_totp_identifiers.values()), None)
|
||
return None
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return []
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.prompt = self.format_block_parameter_template_from_workflow_run_context(self.prompt, workflow_run_context)
|
||
if self.url:
|
||
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
|
||
|
||
if self.totp_identifier:
|
||
self.totp_identifier = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.totp_identifier, workflow_run_context
|
||
)
|
||
|
||
if self.totp_verification_url:
|
||
self.totp_verification_url = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.totp_verification_url, workflow_run_context
|
||
)
|
||
self.totp_verification_url = prepend_scheme_and_validate_url(self.totp_verification_url)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus # noqa: PLC0415
|
||
from skyvern.services import task_v2_service # noqa: PLC0415
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
# Simple template resolution - no complex dynamic resolution to prevent recursion
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
|
||
# Use the resolved values directly
|
||
resolved_prompt = self.prompt
|
||
resolved_url = self.url
|
||
resolved_totp_identifier = self._resolve_totp_identifier(workflow_run_context)
|
||
resolved_totp_verification_url = self.totp_verification_url
|
||
|
||
except Exception as e:
|
||
output_reason = f"Failed to format jinja template: {str(e)}"
|
||
await self.record_output_parameter_value(
|
||
workflow_run_context, workflow_run_id, {"failure_reason": output_reason}
|
||
)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not resolved_url:
|
||
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
|
||
if browser_state:
|
||
page = await browser_state.get_working_page()
|
||
if page:
|
||
current_url = await SkyvernFrame.get_url(frame=page)
|
||
if current_url != "about:blank":
|
||
resolved_url = current_url
|
||
|
||
if not organization_id:
|
||
raise ValueError("Running TaskV2Block requires organization_id")
|
||
|
||
organization = await app.DATABASE.get_organization(organization_id)
|
||
if not organization:
|
||
raise ValueError(f"Organization not found {organization_id}")
|
||
workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id, organization_id)
|
||
if not workflow_run:
|
||
raise ValueError(f"WorkflowRun not found {workflow_run_id} when running TaskV2Block")
|
||
try:
|
||
task_v2 = await task_v2_service.initialize_task_v2(
|
||
organization=organization,
|
||
user_prompt=resolved_prompt,
|
||
user_url=resolved_url,
|
||
parent_workflow_run_id=workflow_run_id,
|
||
proxy_location=workflow_run.proxy_location,
|
||
totp_identifier=resolved_totp_identifier,
|
||
totp_verification_url=resolved_totp_verification_url,
|
||
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolls,
|
||
)
|
||
await app.DATABASE.update_task_v2(
|
||
task_v2.observer_cruise_id, status=TaskV2Status.queued, organization_id=organization_id
|
||
)
|
||
if task_v2.workflow_run_id:
|
||
await app.DATABASE.update_workflow_run(
|
||
workflow_run_id=task_v2.workflow_run_id,
|
||
status=WorkflowRunStatus.queued,
|
||
)
|
||
await app.DATABASE.update_workflow_run_block(
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
block_workflow_run_id=task_v2.workflow_run_id,
|
||
)
|
||
|
||
task_v2 = await task_v2_service.run_task_v2(
|
||
organization=organization,
|
||
task_v2_id=task_v2.observer_cruise_id,
|
||
request_id=None,
|
||
max_steps_override=self.max_steps,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
finally:
|
||
context: skyvern_context.SkyvernContext | None = skyvern_context.current()
|
||
current_run_id = context.run_id if context and context.run_id else workflow_run_id
|
||
root_workflow_run_id = (
|
||
context.root_workflow_run_id if context and context.root_workflow_run_id else workflow_run_id
|
||
)
|
||
skyvern_context.set(
|
||
skyvern_context.SkyvernContext(
|
||
organization_id=organization_id,
|
||
organization_name=organization.organization_name,
|
||
workflow_id=workflow_run.workflow_id,
|
||
workflow_permanent_id=workflow_run.workflow_permanent_id,
|
||
workflow_run_id=workflow_run_id,
|
||
root_workflow_run_id=root_workflow_run_id,
|
||
run_id=current_run_id,
|
||
browser_session_id=browser_session_id,
|
||
max_screenshot_scrolls=workflow_run.max_screenshot_scrolls,
|
||
)
|
||
)
|
||
result_dict = None
|
||
if task_v2:
|
||
result_dict = task_v2.output
|
||
|
||
# Determine block status from task status using module-level mapping
|
||
block_status = TASKV2_TO_BLOCK_STATUS.get(task_v2.status, BlockStatus.failed)
|
||
success = task_v2.status == TaskV2Status.completed
|
||
failure_reason: str | None = None
|
||
task_v2_workflow_run_id = task_v2.workflow_run_id
|
||
if task_v2_workflow_run_id:
|
||
task_v2_workflow_run = await app.DATABASE.get_workflow_run(task_v2_workflow_run_id, organization_id)
|
||
if task_v2_workflow_run:
|
||
failure_reason = task_v2_workflow_run.failure_reason
|
||
|
||
# If continue_on_failure is True, we treat the block as successful even if the task failed
|
||
# This allows the workflow to continue execution despite this block's failure
|
||
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
|
||
organization_id=organization_id,
|
||
task_v2_id=task_v2.observer_cruise_id,
|
||
)
|
||
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
task_v2_output = {
|
||
"task_id": task_v2.observer_cruise_id,
|
||
"status": task_v2.status,
|
||
"summary": task_v2.summary,
|
||
"extracted_information": result_dict,
|
||
"failure_reason": failure_reason,
|
||
"task_screenshot_artifact_ids": [a.artifact_id for a in task_screenshot_artifacts],
|
||
"workflow_screenshot_artifact_ids": [a.artifact_id for a in workflow_screenshot_artifacts],
|
||
}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, task_v2_output)
|
||
return await self.build_block_result(
|
||
success=success or self.continue_on_failure,
|
||
failure_reason=failure_reason,
|
||
output_parameter_value=result_dict,
|
||
status=block_status,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class HttpRequestBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST # type: ignore
|
||
|
||
# Individual HTTP parameters
|
||
method: str = "GET"
|
||
url: str | None = None
|
||
headers: dict[str, str] | None = None
|
||
body: dict[str, Any] | None = None # Changed to consistently be dict only
|
||
files: dict[str, str] | None = None # Dictionary mapping field names to file paths for multipart file uploads
|
||
timeout: int = 30
|
||
follow_redirects: bool = True
|
||
download_filename: str | None = None
|
||
save_response_as_file: bool = False
|
||
|
||
# Parameters for templating
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
|
||
# Allowed directories for local file access (class variable, not a Pydantic field)
|
||
_allowed_dirs: ClassVar[list[str] | None] = None
|
||
|
||
@classmethod
|
||
def get_allowed_dirs(cls) -> list[str]:
|
||
"""Get the list of allowed directories for local file access.
|
||
Computed once and cached for performance.
|
||
"""
|
||
if cls._allowed_dirs is None:
|
||
allowed_dirs: list[str] = []
|
||
if settings.ARTIFACT_STORAGE_PATH:
|
||
allowed_dirs.append(os.path.abspath(settings.ARTIFACT_STORAGE_PATH))
|
||
if settings.VIDEO_PATH:
|
||
allowed_dirs.append(os.path.abspath(settings.VIDEO_PATH))
|
||
if settings.HAR_PATH:
|
||
allowed_dirs.append(os.path.abspath(settings.HAR_PATH))
|
||
if settings.LOG_PATH:
|
||
allowed_dirs.append(os.path.abspath(settings.LOG_PATH))
|
||
if settings.DOWNLOAD_PATH:
|
||
allowed_dirs.append(os.path.abspath(settings.DOWNLOAD_PATH))
|
||
cls._allowed_dirs = allowed_dirs
|
||
return cls._allowed_dirs or []
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
parameters = self.parameters
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
# Check if url is a parameter
|
||
if self.url and workflow_run_context.has_parameter(self.url):
|
||
if self.url not in [parameter.key for parameter in parameters]:
|
||
parameters.append(workflow_run_context.get_parameter(self.url))
|
||
|
||
return parameters
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
"""Format template parameters in the block fields"""
|
||
template_kwargs = {"force_include_secrets": True}
|
||
|
||
def _render_templates_in_json(value: object) -> object:
|
||
"""
|
||
Recursively render Jinja templates in nested JSON-like structures.
|
||
|
||
This is required because HTTP request bodies are often deeply nested
|
||
dict/list structures, and templates may appear at any depth.
|
||
|
||
Supports {{ expr | json }} filter for type-preserving JSON injection.
|
||
"""
|
||
if isinstance(value, str):
|
||
rendered = self.format_block_parameter_template_from_workflow_run_context(
|
||
value, workflow_run_context, **template_kwargs
|
||
)
|
||
|
||
if rendered.startswith(_JSON_TYPE_MARKER) and rendered.endswith(_JSON_TYPE_MARKER):
|
||
json_str = rendered[len(_JSON_TYPE_MARKER) : -len(_JSON_TYPE_MARKER)]
|
||
try:
|
||
return json.loads(json_str)
|
||
except json.JSONDecodeError:
|
||
raise FailedToFormatJinjaStyleParameter(
|
||
value, f"Raw JSON filter produced invalid JSON: {json_str}"
|
||
)
|
||
elif _JSON_TYPE_MARKER in rendered:
|
||
raise FailedToFormatJinjaStyleParameter(
|
||
value,
|
||
"The '| json' filter can only be used for complete value replacement. "
|
||
"It cannot be combined with other text (e.g., 'prefix-{{ val | json }}'). "
|
||
"Remove the surrounding text or remove the '| json' filter.",
|
||
)
|
||
return rendered
|
||
if isinstance(value, list):
|
||
return [_render_templates_in_json(item) for item in value]
|
||
if isinstance(value, dict):
|
||
return {
|
||
cast(str, _render_templates_in_json(key)): _render_templates_in_json(val)
|
||
for key, val in value.items()
|
||
}
|
||
return value
|
||
|
||
if self.url:
|
||
self.url = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.url, workflow_run_context, **template_kwargs
|
||
)
|
||
|
||
if self.body:
|
||
self.body = cast(dict[str, Any], _render_templates_in_json(self.body))
|
||
|
||
if self.files:
|
||
self.files = cast(dict[str, str], _render_templates_in_json(self.files))
|
||
|
||
if self.headers:
|
||
self.headers = cast(dict[str, str], _render_templates_in_json(self.headers))
|
||
|
||
if self.download_filename:
|
||
self.download_filename = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.download_filename, workflow_run_context, **template_kwargs
|
||
)
|
||
|
||
def validate_url(self, url: str) -> bool:
|
||
"""Validate if the URL is properly formatted"""
|
||
try:
|
||
result = urlparse(url)
|
||
return all([result.scheme, result.netloc])
|
||
except Exception:
|
||
return False
|
||
|
||
async def _execute_file_download(
|
||
self,
|
||
workflow_run_context: WorkflowRunContext,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None,
|
||
) -> BlockResult:
|
||
if not self.url:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="URL is required for file download",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
try:
|
||
max_size_mb = settings.MAX_HTTP_DOWNLOAD_FILE_SIZE // (1024 * 1024)
|
||
output_dir = get_download_dir(workflow_run_id)
|
||
file_path = await download_file(
|
||
self.url,
|
||
max_size_mb=max_size_mb,
|
||
headers=self.headers,
|
||
output_dir=output_dir,
|
||
filename=self.download_filename,
|
||
)
|
||
|
||
response_data = {
|
||
"file_path": file_path,
|
||
"file_name": os.path.basename(file_path),
|
||
"file_size": os.path.getsize(file_path),
|
||
}
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response_data)
|
||
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=response_data,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
except aiohttp.ClientResponseError as e:
|
||
error_data = {"error": f"HTTP {e.status}", "error_type": "http_error"}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"HTTP {e.status}",
|
||
output_parameter_value=error_data,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
except DownloadFileMaxSizeExceeded as e:
|
||
max_size_str = f"{e.max_size:.1f}"
|
||
error_data = {"error": f"File exceeds maximum size of {max_size_str}MB", "error_type": "file_too_large"}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"File exceeds maximum size of {max_size_str}MB",
|
||
output_parameter_value=error_data,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
except Exception as e:
|
||
error_data = {"error": str(e), "error_type": "unknown"}
|
||
LOG.warning(
|
||
"File download failed",
|
||
error=str(e),
|
||
url=self.url,
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"File download failed: {str(e)}",
|
||
output_parameter_value=error_data,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
"""Execute the HTTP request and return the response"""
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to format jinja template: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Validate URL
|
||
if not self.url:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="URL is required for HTTP request",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
if not self.validate_url(self.url):
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Invalid URL format: {self.url}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Add default content-type as application/json if not provided (unless files are being uploaded)
|
||
if not self.headers:
|
||
self.headers = {}
|
||
|
||
# If files are provided, don't set default Content-Type (aiohttp will set multipart/form-data)
|
||
if not self.files:
|
||
if not self.headers.get("Content-Type") and not self.headers.get("content-type"):
|
||
LOG.info("Adding default content-type as application/json", headers=self.headers)
|
||
self.headers["Content-Type"] = "application/json"
|
||
|
||
# Download files from HTTP URLs or S3 URIs if needed
|
||
# Also allow local files from allowed directories (ARTIFACT_STORAGE_PATH, VIDEO_PATH, HAR_PATH, LOG_PATH)
|
||
if self.files:
|
||
downloaded_files: dict[str, str] = {}
|
||
for field_name, file_path in self.files.items():
|
||
# Parse file path (handle file:// URI format)
|
||
actual_file_path: str | None = None
|
||
is_file_uri = file_path.startswith("file://")
|
||
|
||
if is_file_uri:
|
||
try:
|
||
actual_file_path = parse_uri_to_path(file_path)
|
||
except ValueError as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Invalid file URI format: {file_path}. Error: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
else:
|
||
actual_file_path = file_path
|
||
|
||
# Check if file_path is a URL or S3 URI
|
||
is_url = (
|
||
file_path.startswith("http://") or file_path.startswith("https://") or file_path.startswith("www.")
|
||
)
|
||
is_s3_uri = file_path.startswith("s3://")
|
||
|
||
# Check if file is in allowed directories
|
||
is_allowed_local_file = False
|
||
if actual_file_path:
|
||
# Convert to absolute path for comparison (handles both absolute and relative paths)
|
||
abs_file_path = os.path.abspath(actual_file_path)
|
||
|
||
# Get allowed directory paths (using class method for cached result)
|
||
allowed_dirs = self.get_allowed_dirs()
|
||
LOG.debug("HttpRequestBlock: Allowed directories", allowed_dirs=allowed_dirs)
|
||
|
||
# Check if file is within any allowed directory
|
||
for allowed_dir in allowed_dirs:
|
||
# Use os.path.commonpath to check if file is within allowed directory
|
||
try:
|
||
common_path = os.path.commonpath([abs_file_path, allowed_dir])
|
||
if common_path == allowed_dir:
|
||
is_allowed_local_file = True
|
||
break
|
||
except ValueError:
|
||
# Paths are on different drives (Windows) or incompatible
|
||
continue
|
||
|
||
# If not URL, S3 URI, or allowed local file, reject
|
||
if not (is_url or is_s3_uri or is_allowed_local_file):
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"No permission to access local file: {file_path}. Only HTTP/HTTPS URLs, S3 URIs, or files in allowed directories are allowed.",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Handle different file sources
|
||
if is_allowed_local_file:
|
||
# Use local file directly
|
||
local_file_path_str: str = cast(str, actual_file_path)
|
||
if not os.path.exists(local_file_path_str):
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"File not found: {local_file_path_str}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
downloaded_files[field_name] = local_file_path_str
|
||
LOG.info(
|
||
"HttpRequestBlock: Using allowed local file",
|
||
field_name=field_name,
|
||
file_path=local_file_path_str,
|
||
)
|
||
else:
|
||
# Download from remote source
|
||
try:
|
||
LOG.info(
|
||
"HttpRequestBlock: Downloading file from remote source",
|
||
field_name=field_name,
|
||
file_path=file_path,
|
||
is_url=is_url,
|
||
is_s3_uri=is_s3_uri,
|
||
)
|
||
if is_s3_uri:
|
||
local_file_path = await download_from_s3(self.get_async_aws_client(), file_path)
|
||
else:
|
||
local_file_path = await download_file(file_path)
|
||
downloaded_files[field_name] = local_file_path
|
||
LOG.info(
|
||
"HttpRequestBlock: File downloaded successfully",
|
||
field_name=field_name,
|
||
original_path=file_path,
|
||
local_path=local_file_path,
|
||
)
|
||
except Exception as e:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to download file {file_path}: {str(e)}",
|
||
output_parameter_value=None,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# Update self.files with local file paths
|
||
self.files = downloaded_files
|
||
|
||
if self.save_response_as_file:
|
||
return await self._execute_file_download(
|
||
workflow_run_context=workflow_run_context,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
try:
|
||
LOG.info(
|
||
"Executing HTTP request",
|
||
method=self.method,
|
||
url=self.url,
|
||
headers=self.headers,
|
||
workflow_run_id=workflow_run_id,
|
||
body=self.body,
|
||
files=self.files,
|
||
)
|
||
|
||
status_code, response_headers, response_body = await aiohttp_request(
|
||
method=self.method,
|
||
url=self.url,
|
||
headers=self.headers,
|
||
data=self.body,
|
||
files=self.files,
|
||
timeout=self.timeout,
|
||
follow_redirects=self.follow_redirects,
|
||
)
|
||
|
||
response_data = {
|
||
"status_code": status_code,
|
||
"response_headers": response_headers,
|
||
"response_body": response_body,
|
||
"request_method": self.method,
|
||
"request_url": self.url,
|
||
"request_headers": self.headers,
|
||
"request_body": self.body,
|
||
"headers": response_headers,
|
||
"body": response_body,
|
||
"url": self.url,
|
||
}
|
||
|
||
response_data = workflow_run_context.mask_secrets_in_data(response_data)
|
||
|
||
LOG.info(
|
||
"HTTP request completed",
|
||
status_code=status_code,
|
||
url=self.url,
|
||
method=self.method,
|
||
workflow_run_id=workflow_run_id,
|
||
response_data=response_data,
|
||
)
|
||
|
||
success = 200 <= status_code < 300
|
||
failure_reason = None if success else f"HTTP {status_code}: {response_data.get('response_body', '')}"
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response_data)
|
||
|
||
return await self.build_block_result(
|
||
success=success,
|
||
failure_reason=failure_reason,
|
||
output_parameter_value=response_data,
|
||
status=BlockStatus.completed if success else BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
except asyncio.TimeoutError:
|
||
error_data = {"error": "Request timed out", "error_type": "timeout"}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Request timed out after {self.timeout} seconds",
|
||
output_parameter_value=error_data,
|
||
status=BlockStatus.timed_out,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
except Exception as e:
|
||
error_data = {"error": str(e), "error_type": "unknown"}
|
||
LOG.warning(
|
||
"HTTP request failed with unexpected error",
|
||
error=str(e),
|
||
url=self.url,
|
||
method=self.method,
|
||
workflow_run_id=workflow_run_id,
|
||
)
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"HTTP request failed: {str(e)}",
|
||
output_parameter_value=error_data,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class PrintPageBlock(Block):
|
||
block_type: Literal[BlockType.PRINT_PAGE] = BlockType.PRINT_PAGE # type: ignore
|
||
|
||
include_timestamp: bool = True
|
||
custom_filename: str | None = None
|
||
format: str = "A4"
|
||
landscape: bool = False
|
||
print_background: bool = True
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
|
||
VALID_FORMATS: ClassVar[set[str]] = {"A4", "Letter", "Legal", "Tabloid"}
|
||
|
||
def get_all_parameters(self, workflow_run_id: str) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
@staticmethod
|
||
def _sanitize_filename(filename: str) -> str:
|
||
sanitized = re.sub(r'[<>:"/\\|?*]', "_", filename)
|
||
sanitized = sanitized.strip(". ")
|
||
return sanitized[:200] if sanitized else "document"
|
||
|
||
def _build_pdf_options(self) -> dict[str, Any]:
|
||
pdf_format = self.format if self.format in self.VALID_FORMATS else "A4"
|
||
pdf_options: dict[str, Any] = {
|
||
"format": pdf_format,
|
||
"landscape": self.landscape,
|
||
"print_background": self.print_background,
|
||
}
|
||
|
||
if self.include_timestamp:
|
||
pdf_options["display_header_footer"] = True
|
||
pdf_options["header_template"] = (
|
||
'<div style="font-size:10px;width:100%;display:flex;justify-content:space-between;padding:0 10px;">'
|
||
'<span class="date"></span><span class="title"></span><span></span></div>'
|
||
)
|
||
pdf_options["footer_template"] = (
|
||
'<div style="font-size:10px;width:100%;display:flex;justify-content:space-between;padding:0 10px;">'
|
||
'<span class="url"></span><span></span><span><span class="pageNumber"></span>/<span class="totalPages"></span></span></div>'
|
||
)
|
||
pdf_options["margin"] = {"top": "40px", "bottom": "40px"}
|
||
|
||
return pdf_options
|
||
|
||
async def _upload_pdf_artifact(
|
||
self,
|
||
*,
|
||
pdf_bytes: bytes,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
workflow_run_context: WorkflowRunContext,
|
||
organization_id: str | None,
|
||
) -> str | None:
|
||
artifact_org_id = organization_id or workflow_run_context.organization_id
|
||
if not artifact_org_id:
|
||
LOG.warning(
|
||
"PrintPageBlock: Missing organization_id, skipping artifact upload",
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
)
|
||
return None
|
||
|
||
try:
|
||
workflow_run_block = await app.DATABASE.get_workflow_run_block(
|
||
workflow_run_block_id,
|
||
organization_id=artifact_org_id,
|
||
)
|
||
except NotFoundError:
|
||
LOG.warning(
|
||
"PrintPageBlock: Workflow run block not found, skipping artifact upload",
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=artifact_org_id,
|
||
)
|
||
return None
|
||
|
||
_, artifact_uri = await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact_with_uri(
|
||
workflow_run_block=workflow_run_block,
|
||
artifact_type=ArtifactType.PDF,
|
||
data=pdf_bytes,
|
||
)
|
||
try:
|
||
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([workflow_run_block.workflow_run_block_id])
|
||
except Exception:
|
||
LOG.warning(
|
||
"PrintPageBlock: Failed to upload PDF artifact",
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block.workflow_run_block_id,
|
||
exc_info=True,
|
||
)
|
||
return None
|
||
|
||
return artifact_uri
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
browser_state = await self.get_or_create_browser_state(
|
||
workflow_run_id=workflow_run_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
if not browser_state:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No browser state available",
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
page = await browser_state.get_working_page()
|
||
if not page:
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason="No page available",
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
pdf_options = self._build_pdf_options()
|
||
|
||
try:
|
||
pdf_bytes = await page.pdf(**pdf_options)
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "pdf" in error_msg.lower() and ("not supported" in error_msg.lower() or "chromium" in error_msg.lower()):
|
||
error_msg = "PDF generation requires Chromium browser. Current browser does not support page.pdf()."
|
||
LOG.warning("PrintPageBlock: Failed to generate PDF", error=error_msg, workflow_run_id=workflow_run_id)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=f"Failed to generate PDF: {error_msg}",
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
timestamp_str = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||
if self.custom_filename:
|
||
filename = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.custom_filename, workflow_run_context
|
||
)
|
||
filename = self._sanitize_filename(filename)
|
||
if not filename.endswith(".pdf"):
|
||
filename += ".pdf"
|
||
else:
|
||
filename = f"page_{timestamp_str}.pdf"
|
||
|
||
# Save PDF to download directory so it appears in runs UI
|
||
download_dir = get_download_dir(workflow_run_id)
|
||
file_path = os.path.join(download_dir, filename)
|
||
async with aiofiles.open(file_path, "wb") as f:
|
||
await f.write(pdf_bytes)
|
||
|
||
# Upload to artifact storage for downstream block access (e.g., File Extraction Block)
|
||
artifact_uri = await self._upload_pdf_artifact(
|
||
pdf_bytes=pdf_bytes,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
workflow_run_context=workflow_run_context,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
output = {
|
||
"filename": filename,
|
||
"file_path": file_path,
|
||
"size_bytes": len(pdf_bytes),
|
||
"artifact_uri": artifact_uri,
|
||
}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output)
|
||
|
||
return await self.build_block_result(
|
||
success=True,
|
||
failure_reason=None,
|
||
output_parameter_value=output,
|
||
status=BlockStatus.completed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
class BranchEvaluationContext:
|
||
"""Collection of runtime data that BranchCriteria evaluators can consume."""
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
workflow_run_context: WorkflowRunContext | None = None,
|
||
block_label: str | None = None,
|
||
template_renderer: Callable[[str], str] | None = None,
|
||
) -> None:
|
||
self.workflow_run_context = workflow_run_context
|
||
self.block_label = block_label
|
||
self.template_renderer = template_renderer
|
||
|
||
def build_llm_safe_context_snapshot(self) -> dict[str, Any]:
|
||
"""
|
||
Build a minimal context blob for LLM-facing branch evaluation.
|
||
|
||
Only includes essential data the LLM needs to evaluate conditions:
|
||
- Parameter values (base_date, date_1, etc.)
|
||
- Extracted information from previous blocks
|
||
- Loop variables (current_value, current_index, current_item)
|
||
"""
|
||
if self.workflow_run_context is None:
|
||
return {}
|
||
|
||
ctx = self.workflow_run_context
|
||
raw_values: dict[str, Any] = ctx.values.copy()
|
||
|
||
# Keys to skip - these are not useful for evaluating conditions
|
||
keys_to_skip = {
|
||
"blocks_metadata",
|
||
"params",
|
||
"outputs",
|
||
"environment",
|
||
"env",
|
||
"llm",
|
||
"workflow_title",
|
||
"workflow_id",
|
||
"workflow_permanent_id",
|
||
"workflow_run_id",
|
||
}
|
||
|
||
snapshot: dict[str, Any] = {}
|
||
for key, value in raw_values.items():
|
||
# Skip noisy keys
|
||
if key in keys_to_skip:
|
||
continue
|
||
|
||
# For block outputs (dicts with extracted_information), only include extracted_information
|
||
if isinstance(value, dict) and "extracted_information" in value:
|
||
extracted = value.get("extracted_information")
|
||
if extracted is not None:
|
||
snapshot[key] = extracted
|
||
else:
|
||
# Include parameter values directly
|
||
snapshot[key] = value
|
||
|
||
# Copy loop variables (current_value, current_index, current_item) to top level
|
||
# Required for pure NatLang expressions like "current_value['date']" to work
|
||
if self.block_label:
|
||
block_metadata = ctx.get_block_metadata(self.block_label)
|
||
if "current_value" in block_metadata:
|
||
snapshot["current_value"] = block_metadata["current_value"]
|
||
if "current_index" in block_metadata:
|
||
snapshot["current_index"] = block_metadata["current_index"]
|
||
if "current_item" in block_metadata:
|
||
snapshot["current_item"] = block_metadata["current_item"]
|
||
|
||
# Mask any real secret values that may have leaked into values
|
||
snapshot = ctx.mask_secrets_in_data(snapshot)
|
||
|
||
return snapshot
|
||
|
||
def build_template_data(self) -> dict[str, Any]:
|
||
"""Build Jinja template data mirroring block parameter rendering context."""
|
||
if self.workflow_run_context is None:
|
||
return {
|
||
"params": {},
|
||
"outputs": {},
|
||
"environment": {},
|
||
"env": {},
|
||
"llm": {},
|
||
}
|
||
|
||
ctx = self.workflow_run_context
|
||
template_data = ctx.values.copy()
|
||
if ctx.include_secrets_in_templates:
|
||
template_data.update(ctx.secrets)
|
||
|
||
credential_params: list[tuple[str, dict[str, Any]]] = []
|
||
for key, value in template_data.items():
|
||
if isinstance(value, dict) and "context" in value and "username" in value and "password" in value:
|
||
credential_params.append((key, value))
|
||
|
||
for key, value in credential_params:
|
||
username_secret_id = value.get("username", "")
|
||
password_secret_id = value.get("password", "")
|
||
real_username = template_data.get(username_secret_id, "")
|
||
real_password = template_data.get(password_secret_id, "")
|
||
template_data[f"{key}_real_username"] = real_username
|
||
template_data[f"{key}_real_password"] = real_password
|
||
|
||
if self.block_label:
|
||
block_reference_data: dict[str, Any] = ctx.get_block_metadata(self.block_label)
|
||
if self.block_label in template_data:
|
||
current_value = template_data[self.block_label]
|
||
if isinstance(current_value, dict):
|
||
block_reference_data.update(current_value)
|
||
template_data[self.block_label] = block_reference_data
|
||
|
||
if "current_index" in block_reference_data:
|
||
template_data["current_index"] = block_reference_data["current_index"]
|
||
if "current_item" in block_reference_data:
|
||
template_data["current_item"] = block_reference_data["current_item"]
|
||
if "current_value" in block_reference_data:
|
||
template_data["current_value"] = block_reference_data["current_value"]
|
||
|
||
template_data.setdefault("workflow_title", ctx.workflow_title)
|
||
template_data.setdefault("workflow_id", ctx.workflow_id)
|
||
template_data.setdefault("workflow_permanent_id", ctx.workflow_permanent_id)
|
||
template_data.setdefault("workflow_run_id", ctx.workflow_run_id)
|
||
template_data.setdefault("current_date", datetime.now(timezone.utc).strftime(CURRENT_DATE_FORMAT))
|
||
|
||
template_data.setdefault("params", template_data.get("params", {}))
|
||
template_data.setdefault("outputs", template_data.get("outputs", {}))
|
||
template_data.setdefault("environment", template_data.get("environment", {}))
|
||
template_data.setdefault("env", template_data.get("environment"))
|
||
template_data.setdefault("llm", template_data.get("llm", {}))
|
||
|
||
return template_data
|
||
|
||
|
||
class BranchCriteria(BaseModel, abc.ABC):
|
||
"""Abstract interface describing how a branch condition should be evaluated."""
|
||
|
||
criteria_type: str
|
||
expression: str
|
||
description: str | None = None
|
||
|
||
@abc.abstractmethod
|
||
async def evaluate(self, context: BranchEvaluationContext) -> bool:
|
||
"""Return True when the branch should execute."""
|
||
raise NotImplementedError
|
||
|
||
def requires_llm(self) -> bool:
|
||
"""Whether the criteria relies on an LLM classification step."""
|
||
return False
|
||
|
||
|
||
def _evaluate_truthy_string(value: str) -> bool:
|
||
"""
|
||
Evaluate a string as a boolean, handling common truthy/falsy representations.
|
||
|
||
Truthy: "true", "True", "TRUE", "1", "yes", "y", "on", non-zero numbers
|
||
Falsy: "", "false", "False", "FALSE", "0", "no", "n", "off", "null", "None", whitespace-only
|
||
|
||
For other strings, use Python's default bool() behavior (non-empty = truthy).
|
||
"""
|
||
if not value or not value.strip():
|
||
return False
|
||
|
||
normalized = value.strip().lower()
|
||
|
||
# Explicit falsy values
|
||
if normalized in ("false", "0", "no", "n", "off", "null", "none"):
|
||
return False
|
||
|
||
# Explicit truthy values
|
||
if normalized in ("true", "1", "yes", "y", "on"):
|
||
return True
|
||
|
||
# Try to parse as a number
|
||
try:
|
||
num = float(normalized)
|
||
return num != 0.0
|
||
except ValueError:
|
||
pass
|
||
|
||
# For any other non-empty string, consider it truthy
|
||
# This allows expressions like "{{ 'some text' }}" to be truthy
|
||
return True
|
||
|
||
|
||
class JinjaBranchCriteria(BranchCriteria):
|
||
"""Jinja2-templated branch criteria (only supported criteria type for now)."""
|
||
|
||
criteria_type: Literal["jinja2_template"] = "jinja2_template"
|
||
|
||
async def evaluate(self, context: BranchEvaluationContext) -> bool:
|
||
# Prefer the renderer provided by the caller (matches block parameter rendering),
|
||
# otherwise build a minimal sandboxed renderer using the evaluation context.
|
||
if context.template_renderer:
|
||
try:
|
||
rendered = context.template_renderer(self.expression)
|
||
except MissingJinjaVariables:
|
||
# Let upstream MissingJinjaVariables bubble as-is.
|
||
raise
|
||
except Exception as exc: # pragma: no cover - caught for robustness
|
||
raise FailedToFormatJinjaStyleParameter(self.expression, str(exc)) from exc
|
||
else:
|
||
template_data = context.build_template_data()
|
||
sandbox_env = (
|
||
SandboxedEnvironment(undefined=StrictUndefined)
|
||
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict"
|
||
else SandboxedEnvironment()
|
||
)
|
||
|
||
try:
|
||
missing_vars = get_missing_variables(self.expression, template_data)
|
||
if missing_vars:
|
||
raise MissingJinjaVariables(self.expression, missing_vars)
|
||
|
||
template = sandbox_env.from_string(self.expression)
|
||
rendered = template.render(template_data)
|
||
except MissingJinjaVariables:
|
||
raise
|
||
except Exception as exc:
|
||
# Covers syntax errors and rendering issues
|
||
raise FailedToFormatJinjaStyleParameter(self.expression, str(exc)) from exc
|
||
|
||
return _evaluate_truthy_string(rendered)
|
||
|
||
|
||
class PromptBranchCriteria(BranchCriteria):
|
||
"""Natural language branch criteria."""
|
||
|
||
criteria_type: Literal["prompt"] = "prompt"
|
||
|
||
async def evaluate(self, context: BranchEvaluationContext) -> bool:
|
||
# Natural language criteria are evaluated in batch by ConditionalBlock.execute.
|
||
raise NotImplementedError("PromptBranchCriteria is evaluated in batch, not per-branch.")
|
||
|
||
def requires_llm(self) -> bool:
|
||
return True
|
||
|
||
|
||
def _is_pure_jinja_expression(expression: str) -> bool:
|
||
"""
|
||
Determine if an expression is a pure Jinja template (single block) vs Jinja+NatLang (mixed).
|
||
|
||
Pure Jinja: "{{ A == B }}" - single Jinja block, should be evaluated server-side
|
||
Jinja+NatLang: "{{ A }} is same as {{ B }}" - multiple Jinja blocks mixed with natural language
|
||
|
||
Returns True only for pure Jinja expressions that can be evaluated to boolean server-side.
|
||
"""
|
||
if not expression:
|
||
return False
|
||
|
||
stripped = expression.strip()
|
||
|
||
# Must start with {{ and end with }}
|
||
if not (stripped.startswith("{{") and stripped.endswith("}}")):
|
||
return False
|
||
|
||
# Count the number of {{ occurrences
|
||
# If there's more than one, it's Jinja+NatLang (e.g., "{{ A }} is same as {{ B }}")
|
||
jinja_open_count = stripped.count("{{")
|
||
if jinja_open_count > 1:
|
||
return False
|
||
|
||
# Single {{ and ends with }} - this is pure Jinja
|
||
return True
|
||
|
||
|
||
def _resolve_nested_path(value: Any, path: str) -> Any:
|
||
"""
|
||
Resolve a dotted/bracket access path on a nested value.
|
||
|
||
Examples:
|
||
_resolve_nested_path({"a": {"b": 1}}, ".a.b") -> 1
|
||
_resolve_nested_path([{"x": 2}], "[0].x") -> 2
|
||
|
||
Args:
|
||
value: The root value to traverse
|
||
path: The access path (e.g., ".field1.field2[0].field3")
|
||
|
||
Returns:
|
||
The resolved leaf value
|
||
|
||
Raises:
|
||
LookupError: If the path cannot be resolved
|
||
"""
|
||
segments = re.findall(r"\.([a-zA-Z_]\w*)|\[(\d+)\]", path)
|
||
current = value
|
||
for dot_key, bracket_idx in segments:
|
||
if dot_key:
|
||
if isinstance(current, dict):
|
||
if dot_key not in current:
|
||
raise LookupError(f"Key {dot_key!r} not found")
|
||
current = current[dot_key]
|
||
else:
|
||
raise LookupError(f"Cannot access .{dot_key} on {type(current).__name__}")
|
||
elif bracket_idx:
|
||
idx = int(bracket_idx)
|
||
if isinstance(current, (list, tuple)):
|
||
if idx >= len(current):
|
||
raise LookupError(f"Index [{idx}] out of range")
|
||
current = current[idx]
|
||
else:
|
||
raise LookupError(f"Cannot index [{idx}] on {type(current).__name__}")
|
||
return current
|
||
|
||
|
||
_JINJA_DISPLAY_FILTERS: dict[str, Callable[[Any], Any]] = {
|
||
"lower": lambda v: str(v).lower(),
|
||
"upper": lambda v: str(v).upper(),
|
||
"trim": lambda v: str(v).strip(),
|
||
"title": lambda v: str(v).title(),
|
||
"capitalize": lambda v: str(v).capitalize(),
|
||
"int": lambda v: int(v),
|
||
"float": lambda v: float(v),
|
||
"string": lambda v: str(v),
|
||
"length": lambda v: len(v),
|
||
"abs": lambda v: abs(v),
|
||
}
|
||
|
||
|
||
def _render_jinja_expression_for_display(
|
||
expression: str,
|
||
context_values: dict[str, Any],
|
||
block_label: str | None = None,
|
||
) -> str:
|
||
"""
|
||
Render a pure Jinja expression for UI display by substituting variable names with values.
|
||
|
||
This is for display purposes only - it shows users what values were compared
|
||
without actually evaluating the expression. For example:
|
||
- Input: "{{ base_date == date_1 }}" with context {"base_date": "01-25-2026", "date_1": "01-25-2026"}
|
||
- Output: '"01-25-2026" == "01-25-2026"'
|
||
- Input: "{{ output.extracted_information.field != None }}" with nested dict context
|
||
- Output: '"some_value" != None'
|
||
- Input: "{{ output.status|lower == 'active' }}" with context {"output": {"status": "Active"}}
|
||
- Output: '"active" == \'active\''
|
||
|
||
Known Jinja filters (lower, upper, trim, etc.) are applied to the resolved value.
|
||
Unknown filters are left as-is in the output.
|
||
|
||
Returns the original expression if it's not a pure Jinja expression or if rendering fails.
|
||
"""
|
||
if not _is_pure_jinja_expression(expression):
|
||
return expression
|
||
|
||
try:
|
||
# Extract inner expression (strip {{ and }})
|
||
inner_expr = expression.strip()[2:-2].strip()
|
||
display_expr = inner_expr
|
||
|
||
# Substitute variable references (including dotted/bracket access paths and filters)
|
||
# with their values.
|
||
# Match var_name optionally followed by .field or [index] segments,
|
||
# then optionally followed by a |filter_name.
|
||
# Sort by key length (longest first) to avoid partial matches.
|
||
for var_name in sorted(context_values.keys(), key=len, reverse=True):
|
||
pattern = r"\b" + re.escape(var_name) + r"((?:\.[a-zA-Z_]\w*|\[\d+\])*)(\|[a-zA-Z_]\w*)?"
|
||
|
||
def _replacer(match: re.Match, _var_name: str = var_name) -> str:
|
||
access_path = match.group(1) # the dotted/bracket part after var_name
|
||
filter_expr = match.group(2) # e.g., "|lower" or None
|
||
var_value = context_values[_var_name]
|
||
|
||
if access_path:
|
||
try:
|
||
var_value = _resolve_nested_path(var_value, access_path)
|
||
except LookupError:
|
||
# Path couldn't be resolved — return original text unchanged
|
||
return match.group(0)
|
||
|
||
if filter_expr:
|
||
filter_name = filter_expr[1:] # strip the leading |
|
||
filter_fn = _JINJA_DISPLAY_FILTERS.get(filter_name)
|
||
if filter_fn is not None:
|
||
try:
|
||
var_value = filter_fn(var_value)
|
||
except Exception:
|
||
# Filter application failed — show value with filter text
|
||
if isinstance(var_value, str):
|
||
return f'"{var_value}"{filter_expr}'
|
||
return f"{var_value}{filter_expr}"
|
||
else:
|
||
# Unknown filter — show value with filter text preserved
|
||
if isinstance(var_value, str):
|
||
return f'"{var_value}"{filter_expr}'
|
||
return f"{var_value}{filter_expr}"
|
||
|
||
if isinstance(var_value, str):
|
||
return f'"{var_value}"'
|
||
return str(var_value)
|
||
|
||
display_expr = re.sub(pattern, _replacer, display_expr)
|
||
|
||
return display_expr
|
||
except Exception as exc:
|
||
LOG.debug(
|
||
"Failed to render Jinja expression for display",
|
||
block_label=block_label,
|
||
expression=expression,
|
||
error=str(exc),
|
||
)
|
||
return expression
|
||
|
||
|
||
def _find_evaluations_array(output_value: dict[str, Any]) -> list[Any]:
|
||
"""
|
||
Extract the evaluations array from LLM output.
|
||
|
||
ExtractionBlock wraps output in 'extracted_information', so we check there first.
|
||
Falls back to direct access if not found in the nested structure.
|
||
|
||
Args:
|
||
output_value: The raw output from ExtractionBlock
|
||
|
||
Returns:
|
||
List of evaluation objects from the LLM
|
||
|
||
Raises:
|
||
ValueError: If evaluations array is not found or has wrong type
|
||
"""
|
||
# Try standard ExtractionBlock format: output_value.extracted_information.evaluations
|
||
extracted_info = output_value.get("extracted_information")
|
||
if isinstance(extracted_info, dict):
|
||
raw_evaluations = extracted_info.get("evaluations")
|
||
else:
|
||
# Fallback: try direct access at output_value.evaluations
|
||
raw_evaluations = output_value.get("evaluations")
|
||
|
||
if not isinstance(raw_evaluations, list):
|
||
raise ValueError(f"Expected array of evaluations, got: {type(raw_evaluations)}")
|
||
|
||
return raw_evaluations
|
||
|
||
|
||
def _parse_single_evaluation(
|
||
evaluation: Any,
|
||
idx: int,
|
||
fallback_rendered_expressions: list[str],
|
||
) -> tuple[bool, str]:
|
||
"""
|
||
Parse a single evaluation from the LLM response.
|
||
|
||
Handles two formats:
|
||
- Dict format: {result: bool, reasoning: str}
|
||
- Legacy format: just a boolean value
|
||
|
||
The rendered expression always comes from the Jinja pre-rendering step (fallback),
|
||
not from the LLM response, to avoid the LLM re-interpreting already-resolved values.
|
||
|
||
Args:
|
||
evaluation: Single evaluation object from LLM (dict or bool)
|
||
idx: Index of this evaluation (for fallback lookup)
|
||
fallback_rendered_expressions: Pre-rendered expressions from Jinja rendering
|
||
|
||
Returns:
|
||
Tuple of (boolean_result, rendered_expression_string)
|
||
"""
|
||
rendered_expression = fallback_rendered_expressions[idx] if idx < len(fallback_rendered_expressions) else ""
|
||
|
||
if isinstance(evaluation, dict):
|
||
result = evaluation.get("result")
|
||
if isinstance(result, bool):
|
||
bool_result = result
|
||
else:
|
||
bool_result = _evaluate_truthy_string(str(result))
|
||
LOG.warning(
|
||
"Prompt branch evaluation returned non-boolean result",
|
||
branch_index=idx,
|
||
result=result,
|
||
evaluated_result=bool_result,
|
||
)
|
||
|
||
return (bool_result, rendered_expression)
|
||
else:
|
||
# Legacy format: just a boolean
|
||
if isinstance(evaluation, bool):
|
||
bool_result = evaluation
|
||
else:
|
||
bool_result = _evaluate_truthy_string(str(evaluation))
|
||
|
||
return (bool_result, rendered_expression)
|
||
|
||
|
||
# Pattern to find Jinja template blocks like {{ variable_name }}
|
||
_JINJA_BLOCK_RE = re.compile(r"\{\{(.*?)\}\}")
|
||
# Marker inserted into rendered expressions when a Jinja variable resolved to
|
||
# an empty/whitespace-only value. The LLM uses this to reason about emptiness.
|
||
_EMPTY_VALUE_MARKER = "(empty value)"
|
||
|
||
|
||
def _make_empty_params_explicit(
|
||
original_expression: str,
|
||
rendered_expression: str,
|
||
) -> tuple[str, bool]:
|
||
"""
|
||
Detect Jinja template variables that resolved to empty values and replace
|
||
the empty gaps with explicit ``(empty value)`` markers.
|
||
|
||
When ``{{test_parameter}}`` resolves to ``""``, the rendered expression becomes
|
||
malformed (e.g., ``"if is not empty"``). This function detects such cases by
|
||
comparing the *original* expression (with ``{{ }}`` blocks) against the
|
||
*rendered* expression and rebuilds it with clear markers so the LLM can
|
||
evaluate the condition correctly.
|
||
|
||
Returns:
|
||
``(patched_expression, was_patched)``
|
||
"""
|
||
if not original_expression or "{{" not in original_expression:
|
||
return rendered_expression, False
|
||
|
||
# Split the original expression into alternating [static, var, static, var, ...] parts.
|
||
parts = _JINJA_BLOCK_RE.split(original_expression)
|
||
if len(parts) <= 1:
|
||
return rendered_expression, False
|
||
|
||
# Extract static parts (even indices) and build a regex that captures what
|
||
# each Jinja block rendered to by using the static text as anchors.
|
||
static_parts = [parts[i] for i in range(0, len(parts), 2)]
|
||
num_vars = len(parts) // 2
|
||
|
||
# When two Jinja variables are adjacent (e.g. "{{a}}{{b}}") the interior
|
||
# static separator is an empty string and the non-greedy regex cannot
|
||
# reliably attribute rendered text to the correct variable. Bail out.
|
||
if num_vars > 1 and any(static == "" for static in static_parts[1:-1]):
|
||
return rendered_expression, False
|
||
|
||
# NOTE: if a rendered value happens to contain the same text as a static
|
||
# anchor the regex may split on the wrong occurrence. This is extremely
|
||
# unlikely in user-authored conditional expressions and the worst-case
|
||
# outcome is an unnecessary "(empty value)" marker, which still beats the
|
||
# invisible empty-string that caused SKY-8073.
|
||
|
||
regex_fragments: list[str] = []
|
||
for i, static in enumerate(static_parts):
|
||
regex_fragments.append(re.escape(static))
|
||
if i < num_vars:
|
||
regex_fragments.append("(.*?)")
|
||
|
||
match = re.match("^" + "".join(regex_fragments) + "$", rendered_expression, re.DOTALL)
|
||
if not match:
|
||
return rendered_expression, False
|
||
|
||
rendered_values = match.groups()
|
||
has_empty = any(not v.strip() for v in rendered_values)
|
||
if not has_empty:
|
||
return rendered_expression, False
|
||
|
||
# Rebuild the expression, replacing empty rendered values with an explicit marker.
|
||
result_parts: list[str] = []
|
||
for i, static in enumerate(static_parts):
|
||
result_parts.append(static)
|
||
if i < len(rendered_values):
|
||
if not rendered_values[i].strip():
|
||
result_parts.append(_EMPTY_VALUE_MARKER)
|
||
else:
|
||
result_parts.append(rendered_values[i])
|
||
|
||
return "".join(result_parts), True
|
||
|
||
|
||
class BranchCondition(BaseModel):
|
||
"""Represents a single conditional branch edge within a ConditionalBlock."""
|
||
|
||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
criteria: BranchCriteriaTypeVar | None = None
|
||
next_block_label: str | None = None
|
||
description: str | None = None
|
||
is_default: bool = False
|
||
|
||
@model_validator(mode="after")
|
||
def validate_condition(cls, condition_obj: BranchCondition) -> BranchCondition:
|
||
if isinstance(condition_obj.criteria, dict):
|
||
criteria_type = condition_obj.criteria.get("criteria_type")
|
||
if criteria_type is None:
|
||
# Infer criteria type from expression format
|
||
expression = condition_obj.criteria.get("expression", "")
|
||
if _is_pure_jinja_expression(expression):
|
||
criteria_type = "jinja2_template"
|
||
else:
|
||
criteria_type = "prompt"
|
||
if criteria_type == "prompt":
|
||
condition_obj.criteria = PromptBranchCriteria(**condition_obj.criteria)
|
||
else:
|
||
condition_obj.criteria = JinjaBranchCriteria(**condition_obj.criteria)
|
||
if condition_obj.criteria is None and not condition_obj.is_default:
|
||
raise ValueError("Branches without criteria must be marked as default.")
|
||
if condition_obj.criteria is not None and condition_obj.is_default:
|
||
raise ValueError("Default branches may not define criteria.")
|
||
if condition_obj.criteria and isinstance(condition_obj.criteria, BranchCriteria):
|
||
expression = condition_obj.criteria.expression
|
||
criteria_dict = condition_obj.criteria.model_dump()
|
||
if _is_pure_jinja_expression(expression):
|
||
criteria_dict["criteria_type"] = "jinja2_template"
|
||
condition_obj.criteria = JinjaBranchCriteria(**criteria_dict)
|
||
else:
|
||
criteria_dict["criteria_type"] = "prompt"
|
||
condition_obj.criteria = PromptBranchCriteria(**criteria_dict)
|
||
return condition_obj
|
||
|
||
|
||
class ConditionalBlock(Block):
|
||
"""Branching block that selects the next block label based on list-ordered conditions."""
|
||
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.CONDITIONAL] = BlockType.CONDITIONAL # type: ignore
|
||
|
||
branch_conditions: list[BranchCondition] = Field(default_factory=list)
|
||
|
||
@model_validator(mode="after")
|
||
def validate_branches(cls, block: ConditionalBlock) -> ConditionalBlock:
|
||
if not block.branch_conditions:
|
||
raise ValueError("Conditional blocks require at least one branch.")
|
||
|
||
default_branches = [branch for branch in block.branch_conditions if branch.is_default]
|
||
if len(default_branches) > 1:
|
||
raise ValueError("Only one default branch is permitted per conditional block.")
|
||
|
||
return block
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str, # noqa: ARG002 - preserved for interface compatibility
|
||
) -> list[PARAMETER_TYPE]:
|
||
# BranchCriteria subclasses will surface their parameter dependencies once implemented.
|
||
return []
|
||
|
||
async def _evaluate_prompt_branches(
|
||
self,
|
||
*,
|
||
branches: list[BranchCondition],
|
||
evaluation_context: BranchEvaluationContext,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
) -> tuple[list[bool], list[str], str | None, dict | None]:
|
||
"""
|
||
Evaluate natural language branch conditions in batch.
|
||
|
||
All prompt-based conditions are batched into ONE LLM call for performance.
|
||
Jinja parts ({{ }}) are pre-rendered before sending to LLM.
|
||
|
||
Evaluation strategy:
|
||
- If any condition is pure natural language, use ExtractionBlock for browser/page context.
|
||
- If all conditions contain Jinja and are pre-rendered, use direct LLM call (no browser context).
|
||
|
||
Returns:
|
||
A tuple of (results, rendered_expressions, extraction_goal, llm_response):
|
||
- results: List of boolean results for each branch
|
||
- rendered_expressions: List of expressions after Jinja pre-rendering
|
||
- extraction_goal: The prompt sent to the LLM (for UI display)
|
||
- llm_response: The raw LLM response for debugging
|
||
"""
|
||
if organization_id is None:
|
||
raise ValueError("organization_id is required to evaluate natural language branches")
|
||
|
||
if not branches:
|
||
return ([], [], None, None)
|
||
|
||
workflow_run_context = evaluation_context.workflow_run_context
|
||
|
||
# Step 1: Pre-render all expressions (resolve any Jinja {{ }} parts)
|
||
rendered_expressions: list[str] = []
|
||
has_any_pure_natlang = False
|
||
|
||
for idx, branch in enumerate(branches):
|
||
expression = branch.criteria.expression if branch.criteria else ""
|
||
has_jinja = "{{" in expression
|
||
|
||
if has_jinja:
|
||
try:
|
||
rendered_expression = (
|
||
evaluation_context.template_renderer(expression)
|
||
if evaluation_context.template_renderer
|
||
else expression
|
||
)
|
||
except Exception as render_exc:
|
||
LOG.error(
|
||
"Conditional branch expression rendering FAILED",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
original_expression=expression,
|
||
error=str(render_exc),
|
||
exc_info=True,
|
||
)
|
||
rendered_expression = expression
|
||
# Rendering failed, so this expression is effectively unresolved and must
|
||
# take the ExtractionBlock path (with context) instead of direct LLM mode.
|
||
has_any_pure_natlang = True
|
||
else:
|
||
# When a Jinja variable resolves to an empty string the rendered
|
||
# expression becomes malformed (e.g. "if is not empty") and the
|
||
# LLM cannot reason about emptiness correctly. Replace empty gaps
|
||
# with an explicit "(empty value)" marker so the intent is clear.
|
||
rendered_expression, was_patched = _make_empty_params_explicit(expression, rendered_expression)
|
||
if was_patched:
|
||
LOG.info(
|
||
"Conditional branch expression patched for empty parameter(s)",
|
||
workflow_run_id=workflow_run_id,
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
original_expression=expression,
|
||
patched_expression=rendered_expression,
|
||
)
|
||
else:
|
||
rendered_expression = expression
|
||
has_any_pure_natlang = True
|
||
|
||
LOG.info(
|
||
"Conditional branch expression rendering",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
original_expression=expression,
|
||
rendered_expression=rendered_expression,
|
||
has_jinja=has_jinja,
|
||
expression_changed=expression != rendered_expression,
|
||
)
|
||
|
||
rendered_expressions.append(rendered_expression)
|
||
|
||
# Step 2: Build extraction goal with all conditions
|
||
# Include context only if there are pure NatLang expressions that need variable resolution
|
||
if has_any_pure_natlang:
|
||
context_snapshot = evaluation_context.build_llm_safe_context_snapshot()
|
||
context_json = json.dumps(context_snapshot, default=str)
|
||
else:
|
||
context_json = None
|
||
|
||
extraction_goal = prompt_engine.load_prompt(
|
||
"conditional-prompt-branch-evaluation",
|
||
conditions=rendered_expressions,
|
||
context_json=context_json,
|
||
)
|
||
|
||
# Step 3: Build schema for array of evaluation results
|
||
# Order matters: reasoning -> result (chain-of-thought)
|
||
data_schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"evaluations": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "object",
|
||
"properties": {
|
||
"reasoning": {
|
||
"type": "string",
|
||
"description": "Explanation of the reasoning behind evaluating the condition.",
|
||
},
|
||
"result": {
|
||
"type": "boolean",
|
||
"description": "TRUE if the condition is satisfied, FALSE otherwise.",
|
||
},
|
||
},
|
||
"required": ["reasoning", "result"],
|
||
},
|
||
"description": "Array of evaluation results for each condition in the same order.",
|
||
"minItems": len(branches),
|
||
"maxItems": len(branches),
|
||
}
|
||
},
|
||
"required": ["evaluations"],
|
||
}
|
||
|
||
# Step 4: Create and execute single ExtractionBlock.
|
||
# When all expressions have been Jinja-rendered successfully, omit
|
||
# browser_session_id so the LLM won't reinterpret resolved literal
|
||
# values as on-screen references (SKY-7985).
|
||
effective_browser_session_id = browser_session_id if has_any_pure_natlang else None
|
||
|
||
output_param = OutputParameter(
|
||
output_parameter_id=str(uuid.uuid4()),
|
||
key=f"conditional_branch_eval_{generate_random_string()}",
|
||
workflow_id=self.output_parameter.workflow_id,
|
||
created_at=datetime.now(),
|
||
modified_at=datetime.now(),
|
||
parameter_type=ParameterType.OUTPUT,
|
||
description=f"Conditional branch evaluation results ({len(branches)} conditions)",
|
||
)
|
||
extraction_block = ExtractionBlock(
|
||
label=f"conditional_branch_eval_{generate_random_string()}",
|
||
data_extraction_goal=extraction_goal,
|
||
data_schema=data_schema,
|
||
output_parameter=output_param,
|
||
)
|
||
|
||
LOG.info(
|
||
"Conditional branch ExtractionBlock created (batched)",
|
||
block_label=self.label,
|
||
num_conditions=len(branches),
|
||
extraction_goal_preview=extraction_goal[:500] if extraction_goal else None,
|
||
has_browser_session=effective_browser_session_id is not None,
|
||
has_any_pure_natlang=has_any_pure_natlang,
|
||
has_context=context_json is not None,
|
||
)
|
||
|
||
try:
|
||
extraction_result = await extraction_block.execute(
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=effective_browser_session_id,
|
||
)
|
||
|
||
if not extraction_result.success:
|
||
LOG.error(
|
||
"Conditional branch ExtractionBlock failed",
|
||
block_label=self.label,
|
||
failure_reason=extraction_result.failure_reason,
|
||
)
|
||
raise ValueError(
|
||
f"Branch evaluation failed: "
|
||
f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}"
|
||
)
|
||
|
||
if workflow_run_context:
|
||
try:
|
||
await extraction_block.record_output_parameter_value(
|
||
workflow_run_context=workflow_run_context,
|
||
workflow_run_id=workflow_run_id,
|
||
value=extraction_result.output_parameter_value,
|
||
)
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to record conditional branch evaluation output",
|
||
workflow_run_id=workflow_run_id,
|
||
block_label=self.label,
|
||
exc_info=True,
|
||
)
|
||
|
||
output_value = extraction_result.output_parameter_value
|
||
|
||
# Step 5: Extract the evaluation results (reasoning + result)
|
||
results_array: list[bool] = []
|
||
llm_rendered_expressions: list[str] = []
|
||
|
||
if isinstance(output_value, list):
|
||
output_value = {"evaluations": output_value}
|
||
|
||
if not isinstance(output_value, dict):
|
||
raise ValueError(f"Unexpected output format: {type(output_value)}")
|
||
|
||
# Find evaluations array from LLM output (handles ExtractionBlock nesting)
|
||
raw_evaluations = _find_evaluations_array(output_value)
|
||
|
||
# Parse each evaluation to extract result (rendered expression comes from Jinja pre-rendering)
|
||
for idx, evaluation in enumerate(raw_evaluations):
|
||
bool_result, rendered_expr = _parse_single_evaluation(
|
||
evaluation=evaluation,
|
||
idx=idx,
|
||
fallback_rendered_expressions=rendered_expressions,
|
||
)
|
||
results_array.append(bool_result)
|
||
llm_rendered_expressions.append(rendered_expr)
|
||
|
||
LOG.info(
|
||
"Conditional branch evaluation results",
|
||
block_label=self.label,
|
||
results=results_array,
|
||
llm_rendered_expressions=llm_rendered_expressions,
|
||
raw_output=output_value,
|
||
)
|
||
|
||
if len(results_array) != len(branches):
|
||
raise ValueError(
|
||
f"Prompt branch evaluation returned {len(results_array)} results for {len(branches)} branches"
|
||
)
|
||
|
||
return (results_array, llm_rendered_expressions, extraction_goal, output_value)
|
||
|
||
except Exception as exc:
|
||
LOG.error(
|
||
"Conditional branch prompt evaluation failed",
|
||
block_label=self.label,
|
||
error=str(exc),
|
||
exc_info=True,
|
||
)
|
||
raise ValueError(f"Prompt branch evaluation failed: {str(exc)}") from exc
|
||
|
||
async def execute( # noqa: D401
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
"""
|
||
Evaluate conditional branches and determine next block to execute.
|
||
|
||
Returns a BlockResult with branch metadata in the output_parameter_value.
|
||
"""
|
||
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
|
||
evaluation_context = BranchEvaluationContext(
|
||
workflow_run_context=workflow_run_context,
|
||
block_label=self.label,
|
||
template_renderer=(
|
||
lambda potential_template: self.format_block_parameter_template_from_workflow_run_context(
|
||
potential_template,
|
||
workflow_run_context,
|
||
)
|
||
)
|
||
if workflow_run_context
|
||
else None,
|
||
)
|
||
|
||
matched_branch = None
|
||
failure_reason: str | None = None
|
||
|
||
# Track all branch evaluations for UI display
|
||
branch_evaluations_list: list[dict] = []
|
||
prompt_rendered_by_id: dict[str, str] = {}
|
||
|
||
natural_language_branches = [
|
||
branch for branch in self.ordered_branches if isinstance(branch.criteria, PromptBranchCriteria)
|
||
]
|
||
prompt_results_by_id: dict[str, bool] = {}
|
||
prompt_llm_response: dict | None = None
|
||
prompt_extraction_goal: str | None = None
|
||
if natural_language_branches:
|
||
try:
|
||
(
|
||
prompt_results,
|
||
prompt_rendered_expressions,
|
||
prompt_extraction_goal,
|
||
prompt_llm_response,
|
||
) = await self._evaluate_prompt_branches(
|
||
branches=natural_language_branches,
|
||
evaluation_context=evaluation_context,
|
||
workflow_run_id=workflow_run_id,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
browser_session_id=browser_session_id,
|
||
)
|
||
prompt_results_by_id = {
|
||
branch.id: result for branch, result in zip(natural_language_branches, prompt_results, strict=False)
|
||
}
|
||
prompt_rendered_by_id = {
|
||
branch.id: rendered
|
||
for branch, rendered in zip(natural_language_branches, prompt_rendered_expressions, strict=False)
|
||
}
|
||
except Exception as exc:
|
||
failure_reason = f"Failed to evaluate natural language branches: {str(exc)}"
|
||
LOG.error(
|
||
"Failed to evaluate natural language branches",
|
||
block_label=self.label,
|
||
error=str(exc),
|
||
exc_info=True,
|
||
)
|
||
|
||
for idx, branch in enumerate(self.ordered_branches):
|
||
branch_eval: dict = {
|
||
"branch_id": branch.id,
|
||
"branch_index": idx,
|
||
"criteria_type": branch.criteria.criteria_type if branch.criteria else None,
|
||
"original_expression": branch.criteria.expression if branch.criteria else None,
|
||
"rendered_expression": None,
|
||
"result": None,
|
||
"is_matched": False,
|
||
"is_default": branch.is_default,
|
||
"next_block_label": branch.next_block_label,
|
||
"error": None,
|
||
}
|
||
|
||
# Handle default branch (no criteria to evaluate)
|
||
if branch.criteria is None:
|
||
# Default branch - only matched if no other branch matches
|
||
branch_evaluations_list.append(branch_eval)
|
||
continue
|
||
|
||
if branch.criteria.criteria_type == "prompt":
|
||
if failure_reason:
|
||
branch_eval["error"] = failure_reason
|
||
branch_evaluations_list.append(branch_eval)
|
||
break
|
||
prompt_result = prompt_results_by_id.get(branch.id)
|
||
rendered_expr = prompt_rendered_by_id.get(branch.id)
|
||
branch_eval["rendered_expression"] = rendered_expr
|
||
if prompt_result is None:
|
||
failure_reason = "Missing result for natural language branch evaluation"
|
||
branch_eval["error"] = failure_reason
|
||
LOG.error(
|
||
"Missing prompt evaluation result",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
branch_id=branch.id,
|
||
)
|
||
branch_evaluations_list.append(branch_eval)
|
||
break
|
||
branch_eval["result"] = prompt_result
|
||
branch_evaluations_list.append(branch_eval)
|
||
if prompt_result:
|
||
matched_branch = branch
|
||
branch_eval["is_matched"] = True
|
||
LOG.info(
|
||
"Conditional natural language branch matched",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
next_block_label=branch.next_block_label,
|
||
)
|
||
break
|
||
continue
|
||
|
||
# Jinja template branch
|
||
try:
|
||
# Render the expression for UI display - substitute variables without evaluating
|
||
rendered_expression = _render_jinja_expression_for_display(
|
||
expression=branch.criteria.expression,
|
||
context_values=evaluation_context.workflow_run_context.values
|
||
if evaluation_context.workflow_run_context
|
||
else {},
|
||
block_label=self.label,
|
||
)
|
||
branch_eval["rendered_expression"] = rendered_expression
|
||
|
||
result = await branch.criteria.evaluate(evaluation_context)
|
||
branch_eval["result"] = result
|
||
branch_evaluations_list.append(branch_eval)
|
||
|
||
if result:
|
||
matched_branch = branch
|
||
branch_eval["is_matched"] = True
|
||
LOG.info(
|
||
"Conditional branch matched",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
next_block_label=branch.next_block_label,
|
||
)
|
||
break
|
||
except Exception as exc:
|
||
failure_reason = f"Failed to evaluate branch {idx} for {self.label}: {str(exc)}"
|
||
branch_eval["error"] = str(exc)
|
||
branch_eval["result"] = None
|
||
branch_evaluations_list.append(branch_eval)
|
||
LOG.error(
|
||
"Failed to evaluate conditional branch",
|
||
block_label=self.label,
|
||
branch_index=idx,
|
||
error=str(exc),
|
||
exc_info=True,
|
||
)
|
||
break
|
||
|
||
if matched_branch is None and failure_reason is None:
|
||
matched_branch = self.get_default_branch()
|
||
# Update is_matched for default branch in evaluations
|
||
if matched_branch:
|
||
for eval_entry in branch_evaluations_list:
|
||
if eval_entry["branch_id"] == matched_branch.id:
|
||
eval_entry["is_matched"] = True
|
||
break
|
||
|
||
matched_index = self.ordered_branches.index(matched_branch) if matched_branch in self.ordered_branches else None
|
||
next_block_label = matched_branch.next_block_label if matched_branch else None
|
||
executed_branch_id = matched_branch.id if matched_branch else None
|
||
|
||
# Extract execution details for frontend display
|
||
executed_branch_expression: str | None = None
|
||
executed_branch_result: bool | None = None
|
||
executed_branch_next_block: str | None = None
|
||
|
||
if matched_branch:
|
||
executed_branch_next_block = matched_branch.next_block_label
|
||
if matched_branch.is_default:
|
||
# Default/else branch - no expression to evaluate
|
||
executed_branch_expression = None
|
||
executed_branch_result = None
|
||
elif matched_branch.criteria:
|
||
# Regular condition branch - it matched
|
||
executed_branch_expression = matched_branch.criteria.expression
|
||
executed_branch_result = True
|
||
|
||
branch_metadata: BlockMetadata = {
|
||
"branch_taken": next_block_label,
|
||
"branch_index": matched_index,
|
||
"branch_id": executed_branch_id,
|
||
"branch_description": matched_branch.description if matched_branch else None,
|
||
"criteria_type": matched_branch.criteria.criteria_type
|
||
if matched_branch and matched_branch.criteria
|
||
else None,
|
||
"criteria_expression": matched_branch.criteria.expression
|
||
if matched_branch and matched_branch.criteria
|
||
else None,
|
||
"next_block_label": next_block_label,
|
||
# Detailed evaluation info for all branches
|
||
"evaluations": branch_evaluations_list if branch_evaluations_list else None,
|
||
# Raw LLM response for debugging prompt-based evaluations (masked for secrets)
|
||
"llm_response": (
|
||
workflow_run_context.mask_secrets_in_data(prompt_llm_response)
|
||
if workflow_run_context and prompt_llm_response
|
||
else prompt_llm_response
|
||
),
|
||
# The exact prompt sent to LLM for debugging (masked for secrets)
|
||
"llm_prompt": (
|
||
workflow_run_context.mask_secrets_in_data(prompt_extraction_goal)
|
||
if workflow_run_context and prompt_extraction_goal
|
||
else prompt_extraction_goal
|
||
),
|
||
}
|
||
|
||
status = BlockStatus.completed
|
||
success = True
|
||
|
||
if failure_reason:
|
||
status = BlockStatus.failed
|
||
success = False
|
||
elif matched_branch is None:
|
||
failure_reason = "No conditional branch matched and no default branch configured"
|
||
status = BlockStatus.failed
|
||
success = False
|
||
|
||
if workflow_run_context:
|
||
workflow_run_context.update_block_metadata(self.label, branch_metadata)
|
||
try:
|
||
await self.record_output_parameter_value(
|
||
workflow_run_context=workflow_run_context,
|
||
workflow_run_id=workflow_run_id,
|
||
value=branch_metadata,
|
||
)
|
||
except Exception as exc:
|
||
LOG.warning(
|
||
"Failed to record branch metadata as output parameter",
|
||
workflow_run_id=workflow_run_id,
|
||
block_label=self.label,
|
||
error=str(exc),
|
||
)
|
||
|
||
block_result = await self.build_block_result(
|
||
success=success,
|
||
failure_reason=failure_reason,
|
||
output_parameter_value=branch_metadata,
|
||
status=status,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
executed_branch_id=executed_branch_id,
|
||
executed_branch_expression=executed_branch_expression,
|
||
executed_branch_result=executed_branch_result,
|
||
executed_branch_next_block=executed_branch_next_block,
|
||
)
|
||
return block_result
|
||
|
||
@property
|
||
def ordered_branches(self) -> list[BranchCondition]:
|
||
"""Convenience accessor that returns branches in author-specified list order."""
|
||
return list(self.branch_conditions)
|
||
|
||
def get_default_branch(self) -> BranchCondition | None:
|
||
"""Return the default/else branch when configured."""
|
||
return next((branch for branch in self.branch_conditions if branch.is_default), None)
|
||
|
||
|
||
class WorkflowTriggerBlock(Block):
|
||
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
|
||
# Parameter 1 of Literal[...] cannot be of type "Any"
|
||
block_type: Literal[BlockType.WORKFLOW_TRIGGER] = BlockType.WORKFLOW_TRIGGER # type: ignore
|
||
|
||
# The permanent ID of the target workflow to trigger
|
||
workflow_permanent_id: str
|
||
# Parameters/payload to pass to the triggered workflow
|
||
payload: dict[str, Any] | None = None
|
||
# Whether to wait for the triggered workflow to complete
|
||
wait_for_completion: bool = True
|
||
# Optional browser session ID for the triggered workflow
|
||
browser_session_id: str | None = None
|
||
# When True, the child workflow inherits the parent's browser session
|
||
use_parent_browser_session: bool = False
|
||
# Parameters for Jinja2 template interpolation
|
||
parameters: list[PARAMETER_TYPE] = []
|
||
|
||
MAX_TRIGGER_DEPTH: ClassVar[int] = 10
|
||
|
||
def get_all_parameters(
|
||
self,
|
||
workflow_run_id: str,
|
||
) -> list[PARAMETER_TYPE]:
|
||
return self.parameters
|
||
|
||
async def _check_trigger_depth(self, workflow_run_id: str) -> int:
|
||
"""Check the nesting depth of workflow triggers to prevent infinite recursion.
|
||
|
||
Note: This depth guard walks the parent_workflow_run_id chain, which is only
|
||
populated for synchronous triggers. For async (fire-and-forget) dispatch, the
|
||
parent may have already completed before the child runs, so circular async
|
||
chains (A->B->A) are only blocked while A is still running. A full
|
||
visited-workflow guard would require persistent state and is left as a future
|
||
enhancement.
|
||
"""
|
||
depth = 0
|
||
current_run_id: str | None = workflow_run_id
|
||
while current_run_id:
|
||
if depth >= self.MAX_TRIGGER_DEPTH:
|
||
raise InvalidWorkflowDefinition(
|
||
f"Workflow trigger depth exceeds maximum of {self.MAX_TRIGGER_DEPTH}. "
|
||
"This may indicate a circular workflow trigger chain."
|
||
)
|
||
run = await app.DATABASE.get_workflow_run(current_run_id)
|
||
if not run or not run.parent_workflow_run_id:
|
||
break
|
||
current_run_id = run.parent_workflow_run_id
|
||
depth += 1
|
||
return depth
|
||
|
||
def _render_template_value(
|
||
self,
|
||
value: str,
|
||
workflow_run_context: WorkflowRunContext,
|
||
) -> Any:
|
||
"""Render a single Jinja2 template string, handling the | json filter marker."""
|
||
rendered = self.format_block_parameter_template_from_workflow_run_context(
|
||
value, workflow_run_context, force_include_secrets=True
|
||
)
|
||
if rendered.startswith(_JSON_TYPE_MARKER) and rendered.endswith(_JSON_TYPE_MARKER):
|
||
json_str = rendered[len(_JSON_TYPE_MARKER) : -len(_JSON_TYPE_MARKER)]
|
||
try:
|
||
return json.loads(json_str)
|
||
except json.JSONDecodeError:
|
||
raise FailedToFormatJinjaStyleParameter(value, f"Raw JSON filter produced invalid JSON: {json_str}")
|
||
elif _JSON_TYPE_MARKER in rendered:
|
||
raise FailedToFormatJinjaStyleParameter(
|
||
value,
|
||
"The '| json' filter can only be used for complete value replacement. "
|
||
"It cannot be combined with other text (e.g., 'prefix-{{ val | json }}'). "
|
||
"Remove the surrounding text or remove the '| json' filter.",
|
||
)
|
||
return rendered
|
||
|
||
def _render_templates_in_payload(
|
||
self,
|
||
payload: dict[str, Any],
|
||
workflow_run_context: WorkflowRunContext,
|
||
) -> dict[str, Any]:
|
||
"""Recursively render Jinja2 templates in payload values."""
|
||
resolved: dict[str, Any] = {}
|
||
for key, value in payload.items():
|
||
if isinstance(value, str):
|
||
resolved[key] = self._render_template_value(value, workflow_run_context)
|
||
elif isinstance(value, dict):
|
||
resolved[key] = self._render_templates_in_payload(value, workflow_run_context)
|
||
elif isinstance(value, list):
|
||
resolved[key] = self._render_templates_in_list(value, workflow_run_context)
|
||
else:
|
||
resolved[key] = value
|
||
return resolved
|
||
|
||
def _render_templates_in_list(
|
||
self,
|
||
items: list[Any],
|
||
workflow_run_context: WorkflowRunContext,
|
||
) -> list[Any]:
|
||
"""Recursively render Jinja2 templates in list items (strings, nested dicts, and nested lists)."""
|
||
result: list[Any] = []
|
||
for item in items:
|
||
if isinstance(item, str):
|
||
result.append(self._render_template_value(item, workflow_run_context))
|
||
elif isinstance(item, dict):
|
||
result.append(self._render_templates_in_payload(item, workflow_run_context))
|
||
elif isinstance(item, list):
|
||
result.append(self._render_templates_in_list(item, workflow_run_context))
|
||
else:
|
||
result.append(item)
|
||
return result
|
||
|
||
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
|
||
self.workflow_permanent_id = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.workflow_permanent_id, workflow_run_context, force_include_secrets=True
|
||
)
|
||
if self.payload:
|
||
self.payload = self._render_templates_in_payload(self.payload, workflow_run_context)
|
||
if self.browser_session_id:
|
||
self.browser_session_id = self.format_block_parameter_template_from_workflow_run_context(
|
||
self.browser_session_id, workflow_run_context, force_include_secrets=True
|
||
)
|
||
|
||
async def execute(
|
||
self,
|
||
workflow_run_id: str,
|
||
workflow_run_block_id: str,
|
||
organization_id: str | None = None,
|
||
browser_session_id: str | None = None,
|
||
**kwargs: dict,
|
||
) -> BlockResult:
|
||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRequestBody, WorkflowRunStatus # noqa: PLC0415
|
||
|
||
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
|
||
|
||
# Helper to record output and build a failed block result in one step.
|
||
# This ensures downstream blocks referencing block_X_output see the
|
||
# failure reason instead of "parameter not found".
|
||
async def _fail(failure_reason: str) -> BlockResult:
|
||
error_output = {"failure_reason": failure_reason}
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_output)
|
||
return await self.build_block_result(
|
||
success=False,
|
||
failure_reason=failure_reason,
|
||
output_parameter_value=error_output,
|
||
status=BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
# 1. Resolve Jinja2 templates
|
||
try:
|
||
self.format_potential_template_parameters(workflow_run_context)
|
||
except Exception as e:
|
||
return await _fail(f"Failed to resolve templates: {str(e)}")
|
||
|
||
resolved_workflow_permanent_id = self.workflow_permanent_id
|
||
resolved_payload = self.payload
|
||
|
||
# 2. Check recursion depth
|
||
try:
|
||
await self._check_trigger_depth(workflow_run_id)
|
||
except InvalidWorkflowDefinition as e:
|
||
return await _fail(str(e))
|
||
|
||
# 3. Get the organization
|
||
if not organization_id:
|
||
return await _fail("organization_id is required for WorkflowTriggerBlock")
|
||
organization = await app.DATABASE.get_organization(organization_id)
|
||
if not organization:
|
||
return await _fail(f"Organization {organization_id} not found")
|
||
|
||
# 4. Resolve browser session
|
||
# Browser session priority:
|
||
# 1. Explicit browser_session_id configured on the block
|
||
# 2. use_parent_browser_session → inherit parent's session (persistent
|
||
# or in-memory via self.pages[parent_workflow_run_id] lookup)
|
||
# 3. Neither → for sync (wait_for_completion), create a fresh persistent
|
||
# session; for async (fire-and-forget), let the child's Temporal worker
|
||
# handle its own browser.
|
||
created_fresh_session = False
|
||
if self.browser_session_id:
|
||
resolved_browser_session_id = self.browser_session_id
|
||
elif self.use_parent_browser_session and browser_session_id:
|
||
resolved_browser_session_id = browser_session_id
|
||
elif self.use_parent_browser_session:
|
||
# Parent uses an in-memory browser (no persistent session).
|
||
# Pass None so the child inherits via the parent_workflow_run_id
|
||
# lookup in get_or_create_for_workflow_run.
|
||
resolved_browser_session_id = None
|
||
elif self.wait_for_completion:
|
||
# Sync mode: child runs inline in the same process, so it needs
|
||
# its own persistent session to avoid sharing the parent's browser.
|
||
parent_workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id)
|
||
proxy_location = parent_workflow_run.proxy_location if parent_workflow_run else None
|
||
try:
|
||
child_browser_session = await app.PERSISTENT_SESSIONS_MANAGER.create_session(
|
||
organization_id=organization_id,
|
||
proxy_location=proxy_location,
|
||
timeout_minutes=30,
|
||
)
|
||
resolved_browser_session_id = child_browser_session.persistent_browser_session_id
|
||
created_fresh_session = True
|
||
LOG.info(
|
||
"Created fresh browser session for triggered workflow",
|
||
parent_workflow_run_id=workflow_run_id,
|
||
child_browser_session_id=resolved_browser_session_id,
|
||
)
|
||
except Exception as e:
|
||
return await _fail(f"Failed to create browser session for triggered workflow: {str(e)}")
|
||
else:
|
||
# Async (fire-and-forget): the child runs in its own Temporal worker
|
||
# and will create its own browser. No pre-creation needed.
|
||
resolved_browser_session_id = None
|
||
|
||
# 5. Execute based on wait mode
|
||
output_data: dict[str, Any] = {}
|
||
success = False
|
||
if self.wait_for_completion:
|
||
# Synchronous: setup + execute inline in the same process.
|
||
workflow_request = WorkflowRequestBody(
|
||
data=resolved_payload,
|
||
browser_session_id=resolved_browser_session_id,
|
||
)
|
||
|
||
# Save the parent's skyvern_context because setup_workflow_run and
|
||
# execute_workflow overwrite it with the child's values. We restore
|
||
# it after the child finishes so subsequent parent blocks get correct
|
||
# context (logs, observability, workflow_run_id, etc.).
|
||
from skyvern.forge.sdk.core import skyvern_context # noqa: PLC0415
|
||
|
||
parent_context = skyvern_context.current()
|
||
try:
|
||
triggered_workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run(
|
||
request_id=None,
|
||
workflow_request=workflow_request,
|
||
workflow_permanent_id=resolved_workflow_permanent_id,
|
||
organization=organization,
|
||
parent_workflow_run_id=workflow_run_id,
|
||
)
|
||
except Exception as e:
|
||
error_msg = get_user_facing_exception_message(e)
|
||
if parent_context:
|
||
skyvern_context.set(parent_context)
|
||
if created_fresh_session and resolved_browser_session_id:
|
||
try:
|
||
await app.PERSISTENT_SESSIONS_MANAGER.close_session(
|
||
organization_id, resolved_browser_session_id
|
||
)
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to close child browser session after setup failure",
|
||
child_browser_session_id=resolved_browser_session_id,
|
||
exc_info=True,
|
||
)
|
||
return await _fail(f"Failed to setup triggered workflow run: {error_msg}")
|
||
|
||
triggered_run_id = triggered_workflow_run.workflow_run_id
|
||
|
||
LOG.info(
|
||
"Triggered workflow run (sync)",
|
||
parent_workflow_run_id=workflow_run_id,
|
||
triggered_workflow_run_id=triggered_run_id,
|
||
triggered_workflow_permanent_id=resolved_workflow_permanent_id,
|
||
)
|
||
|
||
try:
|
||
final_run = await app.WORKFLOW_SERVICE.execute_workflow(
|
||
workflow_run_id=triggered_run_id,
|
||
api_key=None,
|
||
organization=organization,
|
||
browser_session_id=resolved_browser_session_id,
|
||
)
|
||
success = final_run.status == WorkflowRunStatus.completed
|
||
output_data = {
|
||
"workflow_run_id": triggered_run_id,
|
||
"workflow_permanent_id": resolved_workflow_permanent_id,
|
||
"status": str(final_run.status),
|
||
"failure_reason": final_run.failure_reason,
|
||
}
|
||
# Include the child workflow's output parameters so downstream
|
||
# blocks can reference them (e.g. block_3_output.outputs.block_2_output)
|
||
try:
|
||
child_output_params = (
|
||
await app.WORKFLOW_SERVICE.get_output_parameter_workflow_run_output_parameter_tuples(
|
||
workflow_id=final_run.workflow_id,
|
||
workflow_run_id=triggered_run_id,
|
||
)
|
||
)
|
||
child_outputs: dict[str, Any] = {}
|
||
for output_param, run_output_param in child_output_params:
|
||
child_outputs[output_param.key] = run_output_param.value
|
||
output_data["outputs"] = child_outputs
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to fetch child workflow outputs",
|
||
triggered_workflow_run_id=triggered_run_id,
|
||
exc_info=True,
|
||
)
|
||
except Exception as e:
|
||
error_msg = get_user_facing_exception_message(e)
|
||
output_data = {
|
||
"workflow_run_id": triggered_run_id,
|
||
"workflow_permanent_id": resolved_workflow_permanent_id,
|
||
"status": "failed",
|
||
"failure_reason": f"Triggered workflow execution failed: {error_msg}",
|
||
}
|
||
success = False
|
||
finally:
|
||
if parent_context:
|
||
skyvern_context.set(parent_context)
|
||
if created_fresh_session and resolved_browser_session_id:
|
||
try:
|
||
await app.PERSISTENT_SESSIONS_MANAGER.close_session(
|
||
organization_id, resolved_browser_session_id
|
||
)
|
||
except Exception:
|
||
LOG.warning(
|
||
"Failed to close child browser session",
|
||
child_browser_session_id=resolved_browser_session_id,
|
||
triggered_workflow_run_id=triggered_run_id,
|
||
exc_info=True,
|
||
)
|
||
else:
|
||
# Fire and forget: dispatch the child workflow via Temporal so it
|
||
# gets its own independent worker process. This ensures the child
|
||
# survives even if the parent workflow finishes first.
|
||
# NOTE: This path requires Temporal (cloud). On self-hosted
|
||
# (BackgroundTaskExecutor), the workflow run record is created but
|
||
# execution is silently skipped because background_tasks=None.
|
||
from skyvern.services.workflow_service import run_workflow # noqa: PLC0415
|
||
|
||
workflow_request = WorkflowRequestBody(
|
||
data=resolved_payload,
|
||
browser_session_id=resolved_browser_session_id,
|
||
)
|
||
try:
|
||
triggered_workflow_run = await run_workflow(
|
||
workflow_id=resolved_workflow_permanent_id,
|
||
organization=organization,
|
||
workflow_request=workflow_request,
|
||
request=None,
|
||
background_tasks=None,
|
||
parent_workflow_run_id=workflow_run_id,
|
||
)
|
||
except Exception as e:
|
||
error_msg = get_user_facing_exception_message(e)
|
||
return await _fail(f"Failed to dispatch triggered workflow: {error_msg}")
|
||
|
||
triggered_run_id = triggered_workflow_run.workflow_run_id
|
||
|
||
LOG.info(
|
||
"Async workflow dispatch succeeded (via Temporal)",
|
||
parent_workflow_run_id=workflow_run_id,
|
||
triggered_workflow_run_id=triggered_run_id,
|
||
triggered_workflow_permanent_id=resolved_workflow_permanent_id,
|
||
)
|
||
output_data = {
|
||
"workflow_run_id": triggered_run_id,
|
||
"workflow_permanent_id": resolved_workflow_permanent_id,
|
||
"status": "queued",
|
||
}
|
||
success = True
|
||
|
||
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_data)
|
||
|
||
return await self.build_block_result(
|
||
success=success,
|
||
failure_reason=output_data.get("failure_reason") if not success else None,
|
||
output_parameter_value=output_data,
|
||
status=BlockStatus.completed if success else BlockStatus.failed,
|
||
workflow_run_block_id=workflow_run_block_id,
|
||
organization_id=organization_id,
|
||
)
|
||
|
||
|
||
def get_all_blocks(blocks: list[BlockTypeVar]) -> list[BlockTypeVar]:
|
||
"""
|
||
Recursively get "all blocks" in a workflow definition.
|
||
|
||
At time of writing, blocks can be nested via the ForLoop block. This function
|
||
returns all blocks, flattened.
|
||
"""
|
||
|
||
all_blocks: list[BlockTypeVar] = []
|
||
|
||
for block in blocks:
|
||
all_blocks.append(block)
|
||
|
||
if block.block_type == BlockType.FOR_LOOP:
|
||
nested_blocks = get_all_blocks(block.loop_blocks)
|
||
all_blocks.extend(nested_blocks)
|
||
|
||
return all_blocks
|
||
|
||
|
||
BlockSubclasses = Union[
|
||
ConditionalBlock,
|
||
ForLoopBlock,
|
||
TaskBlock,
|
||
CodeBlock,
|
||
TextPromptBlock,
|
||
DownloadToS3Block,
|
||
UploadToS3Block,
|
||
SendEmailBlock,
|
||
FileParserBlock,
|
||
PDFParserBlock,
|
||
ValidationBlock,
|
||
ActionBlock,
|
||
NavigationBlock,
|
||
ExtractionBlock,
|
||
LoginBlock,
|
||
WaitBlock,
|
||
HumanInteractionBlock,
|
||
FileDownloadBlock,
|
||
UrlBlock,
|
||
TaskV2Block,
|
||
FileUploadBlock,
|
||
HttpRequestBlock,
|
||
PrintPageBlock,
|
||
WorkflowTriggerBlock,
|
||
]
|
||
BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]
|
||
|
||
|
||
BranchCriteriaSubclasses = Union[JinjaBranchCriteria, PromptBranchCriteria]
|
||
BranchCriteriaTypeVar = Annotated[BranchCriteriaSubclasses, Field(discriminator="criteria_type")]
|