Skyvern/skyvern/forge/sdk/workflow/models/block.py

6590 lines
285 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import abc
import ast
import asyncio
import csv
import json
import os
import re
import smtplib
import textwrap
import uuid
from collections import defaultdict, deque
from datetime import datetime, timezone
from email.message import EmailMessage
from pathlib import Path
from types import SimpleNamespace
from typing import Annotated, Any, Awaitable, Callable, ClassVar, Literal, Union, cast
from urllib.parse import quote, urlparse
import aiofiles
import aiohttp
import docx
import filetype
import pandas as pd
import pyotp
import structlog
from charset_normalizer import from_bytes
from email_validator import EmailNotValidError, validate_email
from jinja2 import StrictUndefined
from jinja2.sandbox import SandboxedEnvironment
from playwright.async_api import Page
from pydantic import BaseModel, Field, model_validator
from skyvern.config import settings
from skyvern.constants import (
AZURE_BLOB_STORAGE_MAX_UPLOAD_FILE_COUNT,
GET_DOWNLOADED_FILES_TIMEOUT,
MAX_FILE_PARSE_INPUT_TOKENS,
MAX_UPLOAD_FILE_COUNT,
)
from skyvern.exceptions import (
AzureConfigurationError,
ContextParameterValueNotFound,
DownloadFileMaxSizeExceeded,
MissingBrowserState,
MissingBrowserStatePage,
PDFParsingError,
TaskNotFound,
UnexpectedTaskStatus,
get_user_facing_exception_message,
)
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api import email
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import (
calculate_sha256_for_file,
create_named_temporary_file,
download_file,
download_from_s3,
get_download_dir,
get_path_for_workflow_download_directory,
parse_uri_to_path,
)
from skyvern.forge.sdk.api.llm.api_handler import LLMAPIHandler
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_request
from skyvern.forge.sdk.db.enums import TaskType
from skyvern.forge.sdk.db.exceptions import NotFoundError
from skyvern.forge.sdk.experimentation.llm_prompt_config import get_llm_handler_for_prompt_type
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.task_v2 import TaskV2Status
from skyvern.forge.sdk.schemas.tasks import Task, TaskOutput, TaskStatus
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
from skyvern.forge.sdk.services.credentials import AzureVaultConstants, OnePasswordConstants
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.forge.sdk.trace import traced
from skyvern.forge.sdk.utils.pdf_parser import extract_pdf_file, validate_pdf_file
from skyvern.forge.sdk.utils.sanitization import sanitize_postgres_text
from skyvern.forge.sdk.workflow.context_manager import BlockMetadata, WorkflowRunContext
from skyvern.forge.sdk.workflow.exceptions import (
CustomizedCodeException,
FailedToFormatJinjaStyleParameter,
InsecureCodeDetected,
InvalidEmailClientConfiguration,
InvalidFileType,
InvalidWorkflowDefinition,
MissingJinjaVariables,
NoIterableValueFound,
NoValidEmailRecipient,
)
from skyvern.forge.sdk.workflow.models.parameter import (
PARAMETER_TYPE,
AWSSecretParameter,
ContextParameter,
OutputParameter,
ParameterType,
WorkflowParameter,
)
from skyvern.schemas.runs import RunEngine
from skyvern.schemas.workflows import BlockResult, BlockStatus, BlockType, FileStorageType, FileType
from skyvern.services.error_detection_service import detect_user_defined_errors_for_task
from skyvern.utils.strings import generate_random_string
from skyvern.utils.templating import get_missing_variables
from skyvern.utils.token_counter import count_tokens
from skyvern.utils.url_validators import prepend_scheme_and_validate_url
from skyvern.webeye.browser_state import BrowserState
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict":
jinja_sandbox_env = SandboxedEnvironment(undefined=StrictUndefined)
else:
jinja_sandbox_env = SandboxedEnvironment()
# Date format used for the built-in {{current_date}} reserved parameter.
CURRENT_DATE_FORMAT = "%Y-%m-%d"
# Sentinel marker for native JSON type injection via | json filter.
_JSON_TYPE_MARKER = "__SKYVERN_RAW_JSON__"
def _json_type_filter(value: Any) -> str:
"""Jinja filter that marks a value for native JSON type injection.
Usage in templates: {{ some_bool | json }}
The filter serializes the value to JSON and wraps it with sentinel markers.
When _render_templates_in_json() detects these markers, it unwraps and
parses the JSON to get the native typed value (bool, int, list, etc.).
Uses default=str to handle non-JSON-serializable types (datetime, Enum, etc.)
"""
return f"{_JSON_TYPE_MARKER}{json.dumps(value, default=str)}{_JSON_TYPE_MARKER}"
jinja_sandbox_env.filters["json"] = _json_type_filter
# Mapping from TaskV2Status to the corresponding BlockStatus. Declared once at
# import time so it is not recreated on each block execution.
TASKV2_TO_BLOCK_STATUS: dict[TaskV2Status, BlockStatus] = {
TaskV2Status.completed: BlockStatus.completed,
TaskV2Status.terminated: BlockStatus.terminated,
TaskV2Status.failed: BlockStatus.failed,
TaskV2Status.canceled: BlockStatus.canceled,
TaskV2Status.timed_out: BlockStatus.timed_out,
}
# ForLoop constants
DEFAULT_MAX_LOOP_ITERATIONS = 100
DEFAULT_MAX_STEPS_PER_ITERATION = 50
class Block(BaseModel, abc.ABC):
"""Base class for workflow nodes (see branching spec [[s-4bnl]] for metadata semantics)."""
# Must be unique within workflow definition
label: str = Field(description="Author-facing identifier for a block; unique within a workflow.")
next_block_label: str | None = Field(
default=None,
description="Optional pointer to the next block label when constructing a DAG. "
"Defaults to sequential order when omitted.",
)
block_type: BlockType
output_parameter: OutputParameter
continue_on_failure: bool = False
model: dict[str, Any] | None = None
disable_cache: bool = False
# Only valid for blocks inside a for loop block
# Whether to continue to the next iteration when the block fails
next_loop_on_failure: bool = False
@property
def override_llm_key(self) -> str | None:
"""
If the `Block` has a `model` defined, then return the mapped llm_key for it.
Otherwise return `None`.
"""
if self.model:
model_name = self.model.get("model_name")
if model_name:
mapping = SettingsManager.get_settings().get_model_name_to_llm_key()
return mapping.get(model_name, {}).get("llm_key")
return None
async def record_output_parameter_value(
self,
workflow_run_context: WorkflowRunContext,
workflow_run_id: str,
value: dict[str, Any] | list | str | None = None,
) -> None:
await workflow_run_context.register_output_parameter_value_post_execution(
parameter=self.output_parameter,
value=value,
)
await app.DATABASE.create_or_update_workflow_run_output_parameter(
workflow_run_id=workflow_run_id,
output_parameter_id=self.output_parameter.output_parameter_id,
value=value,
)
LOG.info(
"Registered output parameter value",
output_parameter_id=self.output_parameter.output_parameter_id,
workflow_run_id=workflow_run_id,
output_parameter_value=value,
)
async def build_block_result(
self,
success: bool,
failure_reason: str | None,
output_parameter_value: dict[str, Any] | list | str | None = None,
status: BlockStatus | None = None,
workflow_run_block_id: str | None = None,
organization_id: str | None = None,
executed_branch_id: str | None = None,
executed_branch_expression: str | None = None,
executed_branch_result: bool | None = None,
executed_branch_next_block: str | None = None,
) -> BlockResult:
# TODO: update workflow run block status and failure reason
if isinstance(output_parameter_value, str):
output_parameter_value = {"value": output_parameter_value}
if workflow_run_block_id:
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
output=output_parameter_value,
status=status,
failure_reason=failure_reason,
organization_id=organization_id,
executed_branch_id=executed_branch_id,
executed_branch_expression=executed_branch_expression,
executed_branch_result=executed_branch_result,
executed_branch_next_block=executed_branch_next_block,
)
return BlockResult(
success=success,
failure_reason=failure_reason,
output_parameter=self.output_parameter,
output_parameter_value=output_parameter_value,
status=status,
workflow_run_block_id=workflow_run_block_id,
)
async def get_or_create_browser_state(
self,
workflow_run_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
) -> BrowserState | None:
"""
Acquire or create browser state for block execution.
Checks persistent sessions first (debugger use case), then falls back to
workflow run browser manager. If no state exists, creates a new one.
Returns BrowserState if successful, None if creation failed.
"""
browser_state: BrowserState | None = None
if browser_session_id and organization_id:
browser_state = await app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id, organization_id)
else:
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
if not browser_state:
workflow_run = await app.WORKFLOW_SERVICE.get_workflow_run(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run,
url=None,
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
)
await browser_state.check_and_fix_state(
url=None,
proxy_location=workflow_run.proxy_location,
workflow_run_id=workflow_run_id,
workflow_permanent_id=workflow_run.workflow_permanent_id,
organization_id=workflow_run.organization_id,
extra_http_headers=workflow_run.extra_http_headers,
browser_address=workflow_run.browser_address,
browser_profile_id=workflow_run.browser_profile_id,
)
except Exception:
LOG.exception(
"Failed to create browser state",
workflow_run_id=workflow_run_id,
)
return None
return browser_state
def format_block_parameter_template_from_workflow_run_context(
self,
potential_template: str,
workflow_run_context: WorkflowRunContext,
*,
force_include_secrets: bool = False,
) -> str:
"""
Format a template string using the workflow run context.
Security Note:
Real secret values are ONLY resolved for blocks that do NOT expose data to the LLM
(like HttpRequestBlock, CodeBlock), as determined by is_safe_block_for_secrets.
"""
if not potential_template:
return potential_template
# Security: only allow real secret values for non-LLM blocks (HttpRequestBlock, CodeBlock)
is_safe_block_for_secrets = self.block_type in [
BlockType.CODE,
BlockType.HTTP_REQUEST,
BlockType.WORKFLOW_TRIGGER,
]
template = jinja_sandbox_env.from_string(potential_template)
block_reference_data: dict[str, Any] = workflow_run_context.get_block_metadata(self.label)
template_data = workflow_run_context.values.copy()
include_secrets = workflow_run_context.include_secrets_in_templates or force_include_secrets
# FORCE DISABLE if block is not safe (sends data to LLM)
if include_secrets and not is_safe_block_for_secrets:
include_secrets = False
if include_secrets:
template_data.update(workflow_run_context.secrets)
# Create easier-to-access entries for credentials
# Look for credential parameters and create real_username/real_password entries
# First collect all credential parameters to avoid modifying dict during iteration
credential_params = []
for key, value in list(template_data.items()):
if isinstance(value, dict) and "context" in value:
# PASSWORD credential: has username and password
if "username" in value and "password" in value:
credential_params.append((key, value))
# SECRET credential: has secret_value
elif "secret_value" in value:
credential_params.append((key, value))
# Now add the real_username/real_password entries
for key, value in credential_params:
username_secret_id = value.get("username", "")
password_secret_id = value.get("password", "")
# Get the actual values from the secrets
real_username = template_data.get(username_secret_id, "")
real_password = template_data.get(password_secret_id, "")
# Add easier-to-access entries
template_data[f"{key}_real_username"] = real_username
template_data[f"{key}_real_password"] = real_password
if is_safe_block_for_secrets:
resolved_credential = value.copy()
for credential_field, credential_placeholder in value.items():
if credential_field == "context":
continue
secret_value = workflow_run_context.get_original_secret_value_or_none(credential_placeholder)
if secret_value is not None:
resolved_credential[credential_field] = secret_value
resolved_credential.pop("context", None)
template_data[key] = resolved_credential
if self.label in template_data:
current_value = template_data[self.label]
if isinstance(current_value, dict):
block_reference_data.update(current_value)
else:
LOG.warning(
f"Parameter {self.label} has a registered reference value, going to overwrite it by block metadata"
)
template_data[self.label] = block_reference_data
# TODO (suchintan): This is pretty hacky - we should have a standard way to initialize the workflow run context
# inject the forloop metadata as global variables
if "current_index" in block_reference_data:
template_data["current_index"] = block_reference_data["current_index"]
if "current_item" in block_reference_data:
template_data["current_item"] = block_reference_data["current_item"]
if "current_value" in block_reference_data:
template_data["current_value"] = block_reference_data["current_value"]
# Initialize workflow-level parameters
if "workflow_title" not in template_data:
template_data["workflow_title"] = workflow_run_context.workflow_title
if "workflow_id" not in template_data:
template_data["workflow_id"] = workflow_run_context.workflow_id
if "workflow_permanent_id" not in template_data:
template_data["workflow_permanent_id"] = workflow_run_context.workflow_permanent_id
if "workflow_run_id" not in template_data:
template_data["workflow_run_id"] = workflow_run_context.workflow_run_id
if "current_date" not in template_data:
template_data["current_date"] = datetime.now(timezone.utc).strftime(CURRENT_DATE_FORMAT)
if "browser_session_id" not in template_data:
template_data["browser_session_id"] = workflow_run_context.browser_session_id or ""
template_data["workflow_run_outputs"] = workflow_run_context.workflow_run_outputs
template_data["workflow_run_summary"] = workflow_run_context.build_workflow_run_summary()
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict":
if missing_variables := get_missing_variables(potential_template, template_data):
raise MissingJinjaVariables(
template=potential_template,
variables=missing_variables,
)
return template.render(template_data)
@classmethod
def get_subclasses(cls) -> tuple[type[Block], ...]:
return tuple(cls.__subclasses__())
@staticmethod
def get_workflow_run_context(workflow_run_id: str) -> WorkflowRunContext:
return app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
@staticmethod
def get_async_aws_client() -> AsyncAWSClient:
return app.WORKFLOW_CONTEXT_MANAGER.aws_client
@abc.abstractmethod
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
pass
async def _generate_workflow_run_block_description(
self, workflow_run_block_id: str, organization_id: str | None = None
) -> None:
description = None
try:
block_data = self.model_dump(
exclude={
"workflow_run_block_id",
"organization_id",
"task_id",
"workflow_run_id",
"parent_workflow_run_block_id",
"label",
"status",
"output",
"continue_on_failure",
"failure_reason",
"actions",
"created_at",
"modified_at",
},
exclude_none=True,
)
description_generation_prompt = prompt_engine.load_prompt(
"generate_workflow_run_block_description",
block=block_data,
)
json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=description_generation_prompt, prompt_name="generate-workflow-run-block-description"
)
description = json_response.get("summary")
LOG.info(
"Generated description for the workflow run block",
description=description,
workflow_run_block_id=workflow_run_block_id,
)
except Exception as e:
LOG.exception("Failed to generate description for the workflow run block", error=e)
if description:
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
description=description,
organization_id=organization_id,
)
@traced()
async def execute_safe(
self,
workflow_run_id: str,
parent_workflow_run_block_id: str | None = None,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_block_id = None
engine: RunEngine | None = None
try:
if isinstance(self, BaseTaskBlock):
engine = self.engine
workflow_run_block = await app.DATABASE.create_workflow_run_block(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
parent_workflow_run_block_id=parent_workflow_run_block_id,
label=self.label,
block_type=self.block_type,
continue_on_failure=self.continue_on_failure,
engine=engine,
)
workflow_run_block_id = workflow_run_block.workflow_run_block_id
# generate the description for the workflow run block asynchronously
asyncio.create_task(self._generate_workflow_run_block_description(workflow_run_block_id, organization_id))
# create a screenshot
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
if not browser_state:
LOG.warning(
"No browser state found when creating workflow_run_block",
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
browser_session_id=browser_session_id,
block_label=self.label,
)
else:
try:
screenshot = await browser_state.take_fullpage_screenshot()
except Exception:
LOG.warning(
"Failed to take screenshot before executing the block, ignoring the exception",
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
)
screenshot = None
if screenshot:
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
workflow_run_block=workflow_run_block,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
LOG.info(
"Executing block", workflow_run_id=workflow_run_id, block_label=self.label, block_type=self.block_type
)
return await self.execute(
workflow_run_id,
workflow_run_block_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
**kwargs,
)
except Exception as e:
LOG.exception(
"Block execution failed",
workflow_run_id=workflow_run_id,
block_label=self.label,
block_type=self.block_type,
)
# Record output parameter value if it hasn't been recorded yet
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if not workflow_run_context.has_value(self.output_parameter.key):
await self.record_output_parameter_value(workflow_run_context, workflow_run_id)
failure_reason = get_user_facing_exception_message(e)
return await self.build_block_result(
success=False,
failure_reason=failure_reason,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
@abc.abstractmethod
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
pass
class BaseTaskBlock(Block):
task_type: str = TaskType.general
url: str | None = None
title: str = ""
engine: RunEngine = RunEngine.skyvern_v1
complete_criterion: str | None = None
terminate_criterion: str | None = None
navigation_goal: str | None = None
data_extraction_goal: str | None = None
data_schema: dict[str, Any] | list | str | None = None
# error code to error description for the LLM
error_code_mapping: dict[str, str] | None = None
max_retries: int = 0
max_steps_per_run: int | None = None
parameters: list[PARAMETER_TYPE] = []
complete_on_download: bool = False
download_suffix: str | None = None
totp_verification_url: str | None = None
totp_identifier: str | None = None
complete_verification: bool = True
include_action_history_in_verification: bool = False
download_timeout: float | None = None # minutes
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
parameters = self.parameters
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.url and workflow_run_context.has_parameter(self.url):
if self.url not in [parameter.key for parameter in parameters]:
parameters.append(workflow_run_context.get_parameter(self.url))
return parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.title = self.format_block_parameter_template_from_workflow_run_context(self.title, workflow_run_context)
if self.url:
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
self.url = prepend_scheme_and_validate_url(self.url)
if self.totp_identifier:
self.totp_identifier = self.format_block_parameter_template_from_workflow_run_context(
self.totp_identifier, workflow_run_context
)
if self.totp_verification_url:
self.totp_verification_url = self.format_block_parameter_template_from_workflow_run_context(
self.totp_verification_url, workflow_run_context
)
self.totp_verification_url = prepend_scheme_and_validate_url(self.totp_verification_url)
if self.download_suffix:
self.download_suffix = self.format_block_parameter_template_from_workflow_run_context(
self.download_suffix, workflow_run_context
)
# encode the suffix to prevent invalid path style
self.download_suffix = quote(string=self.download_suffix, safe="")
if self.navigation_goal:
self.navigation_goal = self.format_block_parameter_template_from_workflow_run_context(
self.navigation_goal, workflow_run_context
)
if self.data_extraction_goal:
self.data_extraction_goal = self.format_block_parameter_template_from_workflow_run_context(
self.data_extraction_goal, workflow_run_context
)
if isinstance(self.data_schema, str):
self.data_schema = self.format_block_parameter_template_from_workflow_run_context(
self.data_schema, workflow_run_context
)
if self.complete_criterion:
self.complete_criterion = self.format_block_parameter_template_from_workflow_run_context(
self.complete_criterion, workflow_run_context
)
if self.terminate_criterion:
self.terminate_criterion = self.format_block_parameter_template_from_workflow_run_context(
self.terminate_criterion, workflow_run_context
)
@staticmethod
async def get_task_order(workflow_run_id: str, current_retry: int) -> tuple[int, int]:
"""
Returns the order and retry for the next task in the workflow run as a tuple.
"""
last_task_for_workflow_run = await app.DATABASE.get_last_task_for_workflow_run(workflow_run_id=workflow_run_id)
# If there is no previous task, the order will be 0 and the retry will be 0.
if last_task_for_workflow_run is None:
return 0, 0
# If there is a previous task but the current retry is 0, the order will be the order of the last task + 1
# and the retry will be 0.
order = last_task_for_workflow_run.order or 0
if current_retry == 0:
return order + 1, 0
# If there is a previous task and the current retry is not 0, the order will be the order of the last task
# and the retry will be the retry of the last task + 1. (There is a validation that makes sure the retry
# of the last task is equal to current_retry - 1) if it is not, we use last task retry + 1.
retry = last_task_for_workflow_run.retry or 0
if retry + 1 != current_retry:
LOG.error(
f"Last task for workflow run is retry number {last_task_for_workflow_run.retry}, "
f"but current retry is {current_retry}. Could be race condition. Using last task retry + 1",
workflow_run_id=workflow_run_id,
last_task_id=last_task_for_workflow_run.task_id,
last_task_retry=last_task_for_workflow_run.retry,
current_retry=current_retry,
)
return order, retry + 1
async def _handle_task_failure_with_error_detection(
self,
task: Task,
step: Step,
browser_state: BrowserState | None,
failure_reason: str,
organization_id: str,
) -> None:
"""
Handle task failure by updating the task status and detecting user-defined errors.
This helper method consolidates the error detection logic that was previously
duplicated across multiple exception handlers in the execute method.
"""
await app.DATABASE.update_task(
task.task_id,
status=TaskStatus.failed,
organization_id=organization_id,
failure_reason=failure_reason,
)
# Detect user-defined errors if error_code_mapping is provided
if self.error_code_mapping:
try:
detected_errors = await detect_user_defined_errors_for_task(
task=task,
step=step,
browser_state=browser_state,
failure_reason=failure_reason,
)
if detected_errors:
# Only pass new errors — update_task() appends to existing errors
new_errors = [error.model_dump() for error in detected_errors]
await app.DATABASE.update_task(
task_id=task.task_id,
organization_id=organization_id,
errors=new_errors,
)
except Exception:
LOG.exception(
"Failed to detect or store user-defined errors during task failure",
task_id=task.task_id,
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
current_retry = 0
# initial value for will_retry is True, so that the loop runs at least once
will_retry = True
current_running_task: Task | None = None
workflow_run = await app.WORKFLOW_SERVICE.get_workflow_run(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
# Get workflow from context if available, otherwise query database
workflow = workflow_run_context.workflow
if workflow is None:
workflow = await app.WORKFLOW_SERVICE.get_workflow_by_permanent_id(
workflow_permanent_id=workflow_run.workflow_permanent_id,
)
# Cache the workflow back to context for future block executions
workflow_run_context.set_workflow(workflow)
# if the task url is parameterized, we need to get the value from the workflow run context
if self.url and workflow_run_context.has_parameter(self.url) and workflow_run_context.has_value(self.url):
task_url_parameter_value = workflow_run_context.get_value(self.url)
if task_url_parameter_value:
LOG.info(
"Task URL is parameterized, using parameter value",
task_url_parameter_value=task_url_parameter_value,
task_url_parameter_key=self.url,
)
self.url = task_url_parameter_value
if self.totp_identifier:
if workflow_run_context.has_parameter(self.totp_identifier) and workflow_run_context.has_value(
self.totp_identifier
):
totp_identifier_parameter_value = workflow_run_context.get_value(self.totp_identifier)
if totp_identifier_parameter_value:
self.totp_identifier = totp_identifier_parameter_value
else:
for parameter in self.get_all_parameters(workflow_run_id):
parameter_key = getattr(parameter, "key", None)
if not parameter_key:
continue
credential_totp_identifier = workflow_run_context.get_credential_totp_identifier(parameter_key)
if credential_totp_identifier:
self.totp_identifier = credential_totp_identifier
break
if self.download_suffix and workflow_run_context.has_parameter(self.download_suffix):
download_suffix_parameter_value = workflow_run_context.get_value(self.download_suffix)
if download_suffix_parameter_value:
LOG.info(
"Download prefix is parameterized, using parameter value",
download_suffix_parameter_value=download_suffix_parameter_value,
download_suffix_parameter_key=self.download_suffix,
)
self.download_suffix = download_suffix_parameter_value
try:
self.format_potential_template_parameters(workflow_run_context=workflow_run_context)
except Exception as e:
failure_reason = f"Failed to format jinja template: {str(e)}"
await self.record_output_parameter_value(
workflow_run_context, workflow_run_id, {"failure_reason": failure_reason}
)
return await self.build_block_result(
success=False,
failure_reason=failure_reason,
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# TODO (kerem) we should always retry on terminated. We should make a distinction between retriable and
# non-retryable terminations
while will_retry:
task_order, task_retry = await self.get_task_order(workflow_run_id, current_retry)
is_first_task = task_order == 0
task, step = await app.agent.create_task_and_step_from_block(
task_block=self,
workflow=workflow,
workflow_run=workflow_run,
workflow_run_context=workflow_run_context,
task_order=task_order,
task_retry=task_retry,
)
workflow_run_block = await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
task_id=task.task_id,
organization_id=organization_id,
)
current_running_task = task
organization = await app.DATABASE.get_organization(organization_id=workflow_run.organization_id)
if not organization:
raise Exception(f"Organization is missing organization_id={workflow_run.organization_id}")
browser_state: BrowserState | None = None
if is_first_task:
# the first task block will create the browser state and do the navigation
try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run,
url=self.url,
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
)
working_page = await browser_state.get_working_page()
if not working_page:
LOG.error(
"BrowserState has no page",
workflow_run_id=workflow_run.workflow_run_id,
)
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
if working_page.url == "about:blank" and self.url:
await browser_state.navigate_to_url(page=working_page, url=self.url)
# When a browser profile is loaded, wait for the page to fully settle
# so that cookie-based authentication can redirect or restore the session
# BEFORE the agent starts interacting with the page.
if workflow_run.browser_profile_id:
LOG.info(
"Browser profile loaded — waiting for page to settle before agent acts",
browser_profile_id=workflow_run.browser_profile_id,
workflow_run_id=workflow_run.workflow_run_id,
)
try:
await working_page.wait_for_load_state("networkidle", timeout=10000)
except Exception:
LOG.debug(
"networkidle timeout after browser profile load (non-fatal)",
workflow_run_id=workflow_run.workflow_run_id,
)
except Exception as e:
LOG.exception(
"Failed to get browser state for first task",
task_id=task.task_id,
workflow_run_id=workflow_run_id,
)
await self._handle_task_failure_with_error_detection(
task=task,
step=step,
browser_state=browser_state,
failure_reason=str(e),
organization_id=workflow_run.organization_id,
)
raise e
try:
# add screenshot artifact for the first task
screenshot = await browser_state.take_fullpage_screenshot()
if screenshot:
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact(
workflow_run_block=workflow_run_block,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
except Exception:
LOG.warning(
"Failed to take screenshot for first task",
task_id=task.task_id,
workflow_run_id=workflow_run_id,
exc_info=True,
)
else:
# if not the first task block, need to navigate manually
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id=workflow_run_id)
if browser_state is None:
raise MissingBrowserState(task_id=task.task_id, workflow_run_id=workflow_run_id)
working_page = await browser_state.get_working_page()
if not working_page:
LOG.error(
"BrowserState has no page",
workflow_run_id=workflow_run.workflow_run_id,
)
raise MissingBrowserStatePage(workflow_run_id=workflow_run.workflow_run_id)
if self.url:
LOG.info(
"Navigating to page",
url=self.url,
workflow_run_id=workflow_run_id,
task_id=task.task_id,
workflow_id=workflow.workflow_id,
organization_id=workflow_run.organization_id,
step_id=step.step_id,
)
try:
await browser_state.navigate_to_url(page=working_page, url=self.url)
except Exception as e:
await self._handle_task_failure_with_error_detection(
task=task,
step=step,
browser_state=browser_state,
failure_reason=str(e),
organization_id=workflow_run.organization_id,
)
raise e
try:
current_context = skyvern_context.ensure_context()
current_context.task_id = task.task_id
close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address
await app.agent.execute_step(
organization=organization,
task=task,
step=step,
task_block=self,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
complete_verification=self.complete_verification,
engine=self.engine,
)
except Exception as e:
# Make sure the task is marked as failed in the database before raising the exception
await self._handle_task_failure_with_error_detection(
task=task,
step=step,
browser_state=browser_state,
failure_reason=str(e),
organization_id=workflow_run.organization_id,
)
raise e
finally:
current_context.task_id = None
# Check task status
updated_task = await app.DATABASE.get_task(
task_id=task.task_id, organization_id=workflow_run.organization_id
)
if not updated_task:
raise TaskNotFound(task.task_id)
if not updated_task.status.is_final():
raise UnexpectedTaskStatus(task_id=updated_task.task_id, status=updated_task.status)
current_running_task = updated_task
block_status_mapping = {
TaskStatus.completed: BlockStatus.completed,
TaskStatus.terminated: BlockStatus.terminated,
TaskStatus.failed: BlockStatus.failed,
TaskStatus.canceled: BlockStatus.canceled,
TaskStatus.timed_out: BlockStatus.timed_out,
}
if updated_task.status == TaskStatus.completed or updated_task.status == TaskStatus.terminated:
LOG.info(
"Task completed",
task_id=updated_task.task_id,
task_status=updated_task.status,
workflow_run_id=workflow_run_id,
workflow_id=workflow.workflow_id,
organization_id=workflow_run.organization_id,
)
success = updated_task.status == TaskStatus.completed
downloaded_files: list[FileInfo] = []
try:
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
downloaded_files = await app.STORAGE.get_downloaded_files(
organization_id=workflow_run.organization_id,
run_id=current_context.run_id
if current_context and current_context.run_id
else workflow_run_id or updated_task.task_id,
)
except asyncio.TimeoutError:
LOG.warning("Timeout getting downloaded files", task_id=updated_task.task_id)
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
organization_id=workflow_run.organization_id,
task_id=updated_task.task_id,
)
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
workflow_run_id=workflow_run_id,
organization_id=workflow_run.organization_id,
)
task_output = TaskOutput.from_task(
updated_task,
downloaded_files,
task_screenshot_artifact_ids=[a.artifact_id for a in task_screenshot_artifacts],
workflow_screenshot_artifact_ids=[a.artifact_id for a in workflow_screenshot_artifacts],
)
output_parameter_value = task_output.model_dump()
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_parameter_value)
return await self.build_block_result(
success=success,
failure_reason=(
updated_task.failure_reason
if success
else (
updated_task.failure_reason
or f"Task {updated_task.task_id} finished with status {updated_task.status}"
)
),
output_parameter_value=output_parameter_value,
status=block_status_mapping[updated_task.status],
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
elif updated_task.status == TaskStatus.canceled:
LOG.info(
"Task canceled, cancelling block",
task_id=updated_task.task_id,
task_status=updated_task.status,
workflow_run_id=workflow_run_id,
workflow_id=workflow.workflow_id,
organization_id=workflow_run.organization_id,
)
return await self.build_block_result(
success=False,
failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} was canceled",
output_parameter_value=None,
status=block_status_mapping[updated_task.status],
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
elif updated_task.status == TaskStatus.timed_out:
LOG.info(
"Task timed out, making the block time out",
task_id=updated_task.task_id,
task_status=updated_task.status,
workflow_run_id=workflow_run_id,
workflow_id=workflow.workflow_id,
organization_id=workflow_run.organization_id,
)
return await self.build_block_result(
success=False,
failure_reason=updated_task.failure_reason or f"Task {updated_task.task_id} timed out",
output_parameter_value=None,
status=block_status_mapping[updated_task.status],
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
else:
current_retry += 1
will_retry = current_retry <= self.max_retries
retry_message = f", retrying task {current_retry}/{self.max_retries}" if will_retry else ""
downloaded_files = []
try:
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
downloaded_files = await app.STORAGE.get_downloaded_files(
organization_id=workflow_run.organization_id,
run_id=current_context.run_id
if current_context and current_context.run_id
else workflow_run_id or updated_task.task_id,
)
except asyncio.TimeoutError:
LOG.warning("Timeout getting downloaded files", task_id=updated_task.task_id)
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
organization_id=workflow_run.organization_id,
task_id=updated_task.task_id,
)
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
workflow_run_id=workflow_run_id,
organization_id=workflow_run.organization_id,
)
task_output = TaskOutput.from_task(
updated_task,
downloaded_files,
task_screenshot_artifact_ids=[a.artifact_id for a in task_screenshot_artifacts],
workflow_screenshot_artifact_ids=[a.artifact_id for a in workflow_screenshot_artifacts],
)
LOG.warning(
f"Task failed with status {updated_task.status}{retry_message}",
task_id=updated_task.task_id,
task_status=updated_task.status,
workflow_run_id=workflow_run_id,
workflow_id=workflow.workflow_id,
organization_id=workflow_run.organization_id,
current_retry=current_retry,
max_retries=self.max_retries,
task_output=task_output.model_dump_json(),
)
if not will_retry:
output_parameter_value = task_output.model_dump()
await self.record_output_parameter_value(
workflow_run_context, workflow_run_id, output_parameter_value
)
return await self.build_block_result(
success=False,
failure_reason=(
updated_task.failure_reason
or f"Task {updated_task.task_id} failed with status {updated_task.status}"
),
output_parameter_value=output_parameter_value,
status=block_status_mapping[updated_task.status],
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id)
return await self.build_block_result(
success=False,
status=BlockStatus.failed,
failure_reason=(
(current_running_task.failure_reason or f"Task {current_running_task.task_id} failed")
if current_running_task
else "Task failed (no task reference available)"
),
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class TaskBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.TASK] = BlockType.TASK # type: ignore
class LoopBlockExecutedResult(BaseModel):
outputs_with_loop_values: list[list[dict[str, Any]]]
block_outputs: list[BlockResult]
last_block: BlockTypeVar | None
def is_canceled(self) -> bool:
return len(self.block_outputs) > 0 and self.block_outputs[-1].status == BlockStatus.canceled
def is_completed(self) -> bool:
if len(self.block_outputs) == 0:
return False
if self.last_block is None:
return False
if self.is_canceled():
return False
last_ouput = self.block_outputs[-1]
if last_ouput.success:
return True
if self.last_block.continue_on_failure:
return True
return False
def is_terminated(self) -> bool:
return len(self.block_outputs) > 0 and self.block_outputs[-1].status == BlockStatus.terminated
def get_failure_reason(self) -> str | None:
if self.is_completed():
return None
if self.is_canceled():
return f"Block({self.last_block.label if self.last_block else ''}) with type {self.last_block.block_type if self.last_block else ''} was canceled, canceling for loop"
return self.block_outputs[-1].failure_reason if len(self.block_outputs) > 0 else "No block has been executed"
def compute_conditional_scopes(
label_to_block: dict[str, Any],
default_next_map: dict[str, str | None],
) -> dict[str, str]:
"""Map each block label to the conditional block label whose scope it belongs to.
For each conditional block, trace each branch's chain of blocks via
``default_next_map``. Labels that appear in **all** branch chains are
considered merge-point blocks (i.e. they come *after* the conditional
reconverges) and are **not** scoped. Labels that appear in fewer chains
than the total number of branches **are** inside the conditional.
Inner conditionals are themselves scoped to an outer conditional, but
their *own* branch targets are handled by a recursive application of
the same logic (inner wins via the ``if lbl not in scopes`` guard).
"""
scopes: dict[str, str] = {}
conditional_labels = [lbl for lbl, blk in label_to_block.items() if blk.block_type == BlockType.CONDITIONAL]
for cond_label in conditional_labels:
cond_block = label_to_block[cond_label]
branch_targets: list[str | None] = [branch.next_block_label for branch in cond_block.ordered_branches]
# Deduplicate while preserving order two branches may point to the same target
seen_targets: set[str | None] = set()
unique_targets: list[str | None] = []
for t in branch_targets:
if t not in seen_targets:
seen_targets.add(t)
unique_targets.append(t)
num_branches = len(unique_targets)
if num_branches == 0:
continue
# For each unique branch target, trace the chain via default_next_map.
# Stop at other conditional blocks (they handle their own branches).
chain_sets: list[list[str]] = []
for target in unique_targets:
chain: list[str] = []
cur = target
while cur and cur in label_to_block:
chain.append(cur)
# Stop tracing when we hit another conditional it owns its own sub-tree
if label_to_block[cur].block_type == BlockType.CONDITIONAL:
break
cur = default_next_map.get(cur)
chain_sets.append(chain)
# Count how many branch chains each label appears in
label_count: dict[str, int] = {}
for chain in chain_sets:
for lbl in chain:
label_count[lbl] = label_count.get(lbl, 0) + 1
# Labels appearing in ALL branches are merge points (after the conditional).
# Labels appearing in fewer branches are inside the conditional.
for chain in chain_sets:
for lbl in chain:
if label_count[lbl] >= num_branches:
# This is a merge point stop scoping further along this chain
break
if lbl not in scopes:
scopes[lbl] = cond_label
return scopes
class ForLoopBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.FOR_LOOP] = BlockType.FOR_LOOP # type: ignore
loop_blocks: list[BlockTypeVar]
loop_over: PARAMETER_TYPE | None = None
loop_variable_reference: str | None = None
complete_if_empty: bool = False
# Note: intentionally excludes `list` (unlike BaseTaskBlock.data_schema) because a list schema
# does not describe the shape of individual loop items -- only dict schemas are meaningful here.
data_schema: dict[str, Any] | str | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
parameters = set()
if self.loop_over is not None:
parameters.add(self.loop_over)
for loop_block in self.loop_blocks:
for parameter in loop_block.get_all_parameters(workflow_run_id):
parameters.add(parameter)
return list(parameters)
def get_loop_block_context_parameters(self, workflow_run_id: str, loop_data: Any) -> list[ContextParameter]:
context_parameters = []
for loop_block in self.loop_blocks:
# todo: handle the case where the loop_block is a ForLoopBlock
all_parameters = loop_block.get_all_parameters(workflow_run_id)
for parameter in all_parameters:
if isinstance(parameter, ContextParameter):
context_parameters.append(parameter)
if self.loop_over is None:
return context_parameters
for context_parameter in context_parameters:
if context_parameter.source.key != self.loop_over.key:
continue
# If the loop_data is a dict, we need to check if the key exists in the loop_data
if isinstance(loop_data, dict):
if context_parameter.key in loop_data:
context_parameter.value = loop_data[context_parameter.key]
else:
raise ContextParameterValueNotFound(
parameter_key=context_parameter.key,
existing_keys=list(loop_data.keys()),
workflow_run_id=workflow_run_id,
)
else:
# If the loop_data is a list, we can directly assign the loop_data to the context_parameter value
context_parameter.value = loop_data
return context_parameters
async def get_values_from_loop_variable_reference(
self,
workflow_run_context: WorkflowRunContext,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
) -> list[Any]:
parameter_value = None
if self.loop_variable_reference:
LOG.debug("Processing loop variable reference", loop_variable_reference=self.loop_variable_reference)
# Check if this looks like a parameter path (contains dots and/or _output)
is_likely_parameter_path = "extracted_information." in self.loop_variable_reference
# Try parsing as Jinja template
parameter_value = self.try_parse_jinja_template(workflow_run_context)
if parameter_value is None and not is_likely_parameter_path:
try:
# Create and execute extraction block using the current block's workflow_id
extraction_block = self._create_initial_extraction_block(
self.loop_variable_reference, workflow_run_context=workflow_run_context
)
LOG.info(
"Processing natural language loop input",
prompt=self.loop_variable_reference,
extraction_goal=extraction_block.data_extraction_goal,
)
extraction_result = await extraction_block.execute(
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not extraction_result.success:
LOG.error("Extraction block failed", failure_reason=extraction_result.failure_reason)
raise ValueError(
f"Extraction block failed: "
f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}"
)
LOG.debug("Extraction block succeeded", output=extraction_result.output_parameter_value)
# Store the extraction result in the workflow context
await extraction_block.record_output_parameter_value(
workflow_run_context=workflow_run_context,
workflow_run_id=workflow_run_id,
value=extraction_result.output_parameter_value,
)
# Get the extracted information
if not isinstance(extraction_result.output_parameter_value, dict):
LOG.error(
"Extraction result output_parameter_value is not a dict",
output_parameter_value=extraction_result.output_parameter_value,
)
raise ValueError("Extraction result output_parameter_value is not a dictionary")
if "extracted_information" not in extraction_result.output_parameter_value:
LOG.error(
"Extraction result missing extracted_information key",
output_parameter_value=extraction_result.output_parameter_value,
)
raise ValueError("Extraction result missing extracted_information key")
extracted_info = extraction_result.output_parameter_value["extracted_information"]
# Handle different possible structures of extracted_info
if isinstance(extracted_info, list):
# If it's a list, take the first element
if len(extracted_info) > 0:
extracted_info = extracted_info[0]
else:
LOG.error("Extracted information list is empty")
raise ValueError("Extracted information list is empty")
# At this point, extracted_info should be a dict
if not isinstance(extracted_info, dict):
LOG.error("Invalid extraction result structure - not a dict", extracted_info=extracted_info)
raise ValueError("Extraction result is not a dictionary")
# Extract the loop values
loop_values = extracted_info.get("loop_values", [])
if not loop_values:
LOG.error("No loop values found in extraction result")
raise ValueError("No loop values found in extraction result")
LOG.info("Extracted loop values", count=len(loop_values), values=loop_values)
# Update the loop variable reference to point to the extracted loop values
# We'll use a temporary key that we can reference
temp_key = f"extracted_loop_values_{generate_random_string()}"
workflow_run_context.set_value(temp_key, loop_values)
self.loop_variable_reference = temp_key
# Now try parsing again with the updated reference
parameter_value = self.try_parse_jinja_template(workflow_run_context)
except Exception as e:
LOG.error("Failed to process natural language loop input", error=str(e))
raise FailedToFormatJinjaStyleParameter(self.loop_variable_reference, str(e))
if parameter_value is None:
# Fall back to the original Jinja template approach
value_template = f"{{{{ {self.loop_variable_reference.strip(' {}')} | tojson }}}}"
try:
value_json = self.format_block_parameter_template_from_workflow_run_context(
value_template, workflow_run_context
)
except Exception as e:
raise FailedToFormatJinjaStyleParameter(value_template, str(e))
parameter_value = json.loads(value_json)
if isinstance(parameter_value, list):
return parameter_value
else:
return [parameter_value]
async def get_loop_over_parameter_values(
self,
workflow_run_context: WorkflowRunContext,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
) -> list[Any]:
# parse the value from self.loop_variable_reference and then from self.loop_over
if self.loop_variable_reference:
return await self.get_values_from_loop_variable_reference(
workflow_run_context,
workflow_run_id,
workflow_run_block_id,
organization_id,
)
elif self.loop_over is not None:
if isinstance(self.loop_over, WorkflowParameter):
parameter_value = workflow_run_context.get_value(self.loop_over.key)
elif isinstance(self.loop_over, OutputParameter):
# If the output parameter is for a TaskBlock, it will be a TaskOutput object. We need to extract the
# value from the TaskOutput object's extracted_information field.
output_parameter_value = workflow_run_context.get_value(self.loop_over.key)
if isinstance(output_parameter_value, dict) and "extracted_information" in output_parameter_value:
parameter_value = output_parameter_value["extracted_information"]
else:
parameter_value = output_parameter_value
elif isinstance(self.loop_over, ContextParameter):
parameter_value = self.loop_over.value
if not parameter_value:
source_parameter_value = workflow_run_context.get_value(self.loop_over.source.key)
if isinstance(source_parameter_value, dict):
if "extracted_information" in source_parameter_value:
parameter_value = source_parameter_value["extracted_information"].get(self.loop_over.key)
else:
parameter_value = source_parameter_value.get(self.loop_over.key)
else:
raise ValueError("ContextParameter source value should be a dict")
else:
raise NotImplementedError()
else:
if self.complete_if_empty:
return []
else:
raise NoIterableValueFound()
if isinstance(parameter_value, list):
return parameter_value
else:
# TODO (kerem): Should we raise an error here?
return [parameter_value]
def try_parse_jinja_template(self, workflow_run_context: WorkflowRunContext) -> Any | None:
"""Try to parse the loop variable reference as a Jinja template."""
try:
# Try the exact reference first
try:
if self.loop_variable_reference is None:
return None
value_template = f"{{{{ {self.loop_variable_reference.strip(' {}')} | tojson }}}}"
value_json = self.format_block_parameter_template_from_workflow_run_context(
value_template, workflow_run_context
)
parameter_value = json.loads(value_json)
if parameter_value is not None:
return parameter_value
except Exception:
pass
# If that fails, try common access patterns for extraction results
if self.loop_variable_reference is None:
return None
access_patterns = [
f"{self.loop_variable_reference}.extracted_information",
f"{self.loop_variable_reference}.extracted_information.results",
f"{self.loop_variable_reference}.results",
]
for pattern in access_patterns:
try:
value_template = f"{{{{ {pattern.strip(' {}')} | tojson }}}}"
value_json = self.format_block_parameter_template_from_workflow_run_context(
value_template, workflow_run_context
)
parameter_value = json.loads(value_json)
if parameter_value is not None:
return parameter_value
except Exception:
continue
return None
except Exception:
return None
def _create_initial_extraction_block(
self,
natural_language_prompt: str,
workflow_run_context: WorkflowRunContext | None = None,
) -> ExtractionBlock:
"""Create an extraction block to process natural language input."""
# Determine the items schema for loop_values
items_schema: dict[str, Any] | None = None
if self.data_schema is not None:
if isinstance(self.data_schema, dict):
items_schema = self.data_schema
elif isinstance(self.data_schema, str):
# Interpolate Jinja templates before parsing, matching how BaseTaskBlock.setup_block_v2
# handles data_schema strings (see line 652-654)
schema_str = self.data_schema
if workflow_run_context is not None:
schema_str = self.format_block_parameter_template_from_workflow_run_context(
schema_str, workflow_run_context
)
try:
parsed = json.loads(schema_str)
if isinstance(parsed, dict):
items_schema = parsed
else:
LOG.warning(
"Parsed data_schema is not a dict, falling back to default string schema",
block_label=self.label,
data_schema=self.data_schema,
)
except (json.JSONDecodeError, TypeError):
LOG.warning(
"Failed to parse data_schema string, falling back to default string schema",
block_label=self.label,
data_schema=self.data_schema,
)
if items_schema is not None:
# User provided a custom schema — each loop iteration will produce a structured object
data_schema: dict[str, Any] = {
"type": "object",
"properties": {
"loop_values": {
"type": "array",
"description": "Array of structured values to iterate over, matching the provided schema.",
"items": items_schema,
}
},
}
else:
# Default: extract simple string array
data_schema = {
"type": "object",
"properties": {
"loop_values": {
"type": "array",
"description": "Array of values to iterate over. Each value should be the primary data needed for the loop blocks.",
"items": {
"type": "string",
"description": "The primary value to be used in the loop iteration (e.g., URL, text, identifier, etc.)",
},
}
},
}
# Create extraction goal that includes the natural language prompt
extraction_goal = prompt_engine.load_prompt(
"extraction_prompt_for_nat_language_loops", natural_language_prompt=natural_language_prompt
)
# Create a temporary output parameter using the current block's workflow_id
output_param = OutputParameter(
output_parameter_id=str(uuid.uuid4()),
key=f"natural_lang_extraction_{generate_random_string()}",
workflow_id=self.output_parameter.workflow_id,
created_at=datetime.now(),
modified_at=datetime.now(),
parameter_type=ParameterType.OUTPUT,
description="Natural language extraction result",
)
return ExtractionBlock(
label=f"natural_lang_extraction_{generate_random_string()}",
data_extraction_goal=extraction_goal,
data_schema=data_schema,
output_parameter=output_param,
)
def _build_loop_graph(
self, blocks: list[BlockTypeVar]
) -> tuple[str, dict[str, BlockTypeVar], dict[str, str | None]]:
label_to_block: dict[str, BlockTypeVar] = {}
default_next_map: dict[str, str | None] = {}
for block in blocks:
if block.label in label_to_block:
raise InvalidWorkflowDefinition(f"Duplicate block label detected in loop: {block.label}")
label_to_block[block.label] = block
default_next_map[block.label] = block.next_block_label
has_conditional_blocks = any(block.block_type == BlockType.CONDITIONAL for block in blocks)
if not has_conditional_blocks:
for idx, block in enumerate(blocks[:-1]):
if default_next_map.get(block.label) is None:
default_next_map[block.label] = blocks[idx + 1].label
adjacency: dict[str, set[str]] = {label: set() for label in label_to_block}
incoming: dict[str, int] = {label: 0 for label in label_to_block}
def _add_edge(source: str, target: str | None) -> None:
if not target:
return
if target not in label_to_block:
raise InvalidWorkflowDefinition(
f"Block {source} references unknown next_block_label {target} inside loop {self.label}"
)
# Allow multiple branches of a conditional to point to the same target
# without double-counting the incoming edge.
if target not in adjacency[source]:
adjacency[source].add(target)
incoming[target] += 1
for label, block in label_to_block.items():
if block.block_type == BlockType.CONDITIONAL:
for branch in block.ordered_branches:
_add_edge(label, branch.next_block_label)
else:
_add_edge(label, default_next_map.get(label))
roots = [label for label, count in incoming.items() if count == 0]
if not roots:
raise InvalidWorkflowDefinition(f"No entry block found for loop {self.label}")
if len(roots) > 1:
raise InvalidWorkflowDefinition(
f"Multiple entry blocks detected in loop {self.label} ({', '.join(sorted(roots))}); only one entry block is supported."
)
queue: deque[str] = deque([roots[0]])
visited_count = 0
in_degree = dict(incoming)
while queue:
node = queue.popleft()
visited_count += 1
for neighbor in adjacency[node]:
in_degree[neighbor] -= 1
if in_degree[neighbor] == 0:
queue.append(neighbor)
if visited_count != len(label_to_block):
raise InvalidWorkflowDefinition(f"Loop {self.label} contains a cycle; DAG traversal is required.")
return roots[0], label_to_block, default_next_map
async def execute_loop_helper(
self,
workflow_run_id: str,
workflow_run_block_id: str,
workflow_run_context: WorkflowRunContext,
loop_over_values: list[Any],
organization_id: str | None = None,
browser_session_id: str | None = None,
) -> LoopBlockExecutedResult:
outputs_with_loop_values: list[list[dict[str, Any]]] = []
block_outputs: list[BlockResult] = []
current_block: BlockTypeVar | None = None
start_label, label_to_block, default_next_map = self._build_loop_graph(self.loop_blocks)
conditional_scopes = compute_conditional_scopes(label_to_block, default_next_map)
for loop_idx, loop_over_value in enumerate(loop_over_values):
# Check max_iterations limit
if loop_idx >= DEFAULT_MAX_LOOP_ITERATIONS:
LOG.info(
f"ForLoopBlock: Reached max_iterations limit ({DEFAULT_MAX_LOOP_ITERATIONS}), stopping loop",
workflow_run_id=workflow_run_id,
loop_idx=loop_idx,
max_iterations=DEFAULT_MAX_LOOP_ITERATIONS,
)
failure_block_result = await self.build_block_result(
success=False,
status=BlockStatus.failed,
failure_reason=f"Reached max_loop_iterations limit of {DEFAULT_MAX_LOOP_ITERATIONS}",
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
block_outputs.append(failure_block_result)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
LOG.info("Starting loop iteration", loop_idx=loop_idx, loop_over_value=loop_over_value)
# context parameter has been deprecated. However, it's still used by task v2 - we should migrate away from it.
context_parameters_with_value = self.get_loop_block_context_parameters(workflow_run_id, loop_over_value)
for context_parameter in context_parameters_with_value:
workflow_run_context.set_value(context_parameter.key, context_parameter.value)
each_loop_output_values: list[dict[str, Any]] = []
iteration_step_count = 0
LOG.info(
f"ForLoopBlock: Starting iteration {loop_idx} with max_steps_per_iteration={DEFAULT_MAX_STEPS_PER_ITERATION}",
workflow_run_id=workflow_run_id,
loop_idx=loop_idx,
max_steps_per_iteration=DEFAULT_MAX_STEPS_PER_ITERATION,
)
block_idx = 0
current_label: str | None = start_label
conditional_wrb_ids: dict[str, str] = {}
while current_label:
loop_block = label_to_block.get(current_label)
if not loop_block:
LOG.error(
"Unable to find loop block with label in loop graph",
workflow_run_id=workflow_run_id,
loop_label=self.label,
current_label=current_label,
)
failure_block_result = await self.build_block_result(
success=False,
status=BlockStatus.failed,
failure_reason=f"Unable to find block with label {current_label} inside loop {self.label}",
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
block_outputs.append(failure_block_result)
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
metadata: BlockMetadata = {
"current_index": loop_idx,
"current_value": loop_over_value,
"current_item": loop_over_value,
}
workflow_run_context.update_block_metadata(self.label, metadata)
workflow_run_context.update_block_metadata(loop_block.label, metadata)
original_loop_block = loop_block
loop_block = loop_block.model_copy(deep=True)
current_block = loop_block
# Determine the parent for timeline nesting: if this block is
# inside a conditional's scope, parent it to that conditional's
# workflow_run_block rather than the loop's.
parent_wrb_id = workflow_run_block_id
if current_label in conditional_scopes:
cond_label = conditional_scopes[current_label]
if cond_label in conditional_wrb_ids:
parent_wrb_id = conditional_wrb_ids[cond_label]
block_output = await loop_block.execute_safe(
workflow_run_id=workflow_run_id,
parent_workflow_run_block_id=parent_wrb_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
# Track conditional workflow_run_block_ids so branch targets
# can be parented to them.
if loop_block.block_type == BlockType.CONDITIONAL and block_output.workflow_run_block_id:
conditional_wrb_ids[current_label] = block_output.workflow_run_block_id
output_value = (
workflow_run_context.get_value(block_output.output_parameter.key)
if workflow_run_context.has_value(block_output.output_parameter.key)
else None
)
# Log the output value for debugging
if block_output.output_parameter.key.endswith("_output"):
LOG.debug("Block output", block_type=loop_block.block_type, output_value=output_value)
# Log URL information for goto_url blocks
if loop_block.block_type == BlockType.GOTO_URL:
LOG.info("Goto URL block executed", url=loop_block.url, loop_idx=loop_idx)
each_loop_output_values.append(
{
"loop_value": loop_over_value,
"output_parameter": block_output.output_parameter,
"output_value": output_value,
}
)
try:
if block_output.workflow_run_block_id:
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=block_output.workflow_run_block_id,
organization_id=organization_id,
current_value=str(loop_over_value),
current_index=loop_idx,
)
except Exception:
LOG.warning(
"Failed to update workflow run block",
workflow_run_block_id=block_output.workflow_run_block_id,
loop_over_value=loop_over_value,
loop_idx=loop_idx,
)
loop_block = original_loop_block
block_outputs.append(block_output)
# Check max_steps_per_iteration limit after each block execution
iteration_step_count += 1 # Count each block execution as a step
if iteration_step_count >= DEFAULT_MAX_STEPS_PER_ITERATION:
LOG.info(
f"ForLoopBlock: Reached max_steps_per_iteration limit ({DEFAULT_MAX_STEPS_PER_ITERATION}) in iteration {loop_idx}, stopping iteration",
workflow_run_id=workflow_run_id,
loop_idx=loop_idx,
max_steps_per_iteration=DEFAULT_MAX_STEPS_PER_ITERATION,
iteration_step_count=iteration_step_count,
)
# Create a failure block result for this iteration
failure_block_result = await self.build_block_result(
success=False,
status=BlockStatus.failed,
failure_reason=f"Reached max_steps_per_iteration limit of {DEFAULT_MAX_STEPS_PER_ITERATION}",
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
block_outputs.append(failure_block_result)
# If next_loop_on_failure is False, stop the entire loop
if not self.next_loop_on_failure:
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
# If next_loop_on_failure is True, break out of the block loop for this iteration
break
if block_output.status == BlockStatus.canceled:
LOG.info(
f"ForLoopBlock: Block with type {loop_block.block_type} at index {block_idx} during loop {loop_idx} was canceled for workflow run {workflow_run_id}, canceling for loop",
block_type=loop_block.block_type,
workflow_run_id=workflow_run_id,
block_idx=block_idx,
block_result=block_outputs,
)
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
if (
not block_output.success
and not loop_block.continue_on_failure
and not loop_block.next_loop_on_failure
and not self.next_loop_on_failure
):
LOG.info(
f"ForLoopBlock: Encountered a failure processing block {block_idx} during loop {loop_idx}, terminating early",
block_outputs=block_outputs,
loop_idx=loop_idx,
block_idx=block_idx,
loop_over_value=loop_over_value,
loop_block_continue_on_failure=loop_block.continue_on_failure,
failure_reason=block_output.failure_reason,
next_loop_on_failure=loop_block.next_loop_on_failure or self.next_loop_on_failure,
)
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
if block_output.success or loop_block.continue_on_failure:
next_label: str | None = None
if loop_block.block_type == BlockType.CONDITIONAL:
branch_metadata = (
block_output.output_parameter_value
if isinstance(block_output.output_parameter_value, dict)
else None
)
next_label = (branch_metadata or {}).get("next_block_label")
else:
next_label = default_next_map.get(loop_block.label)
if not next_label:
break
if next_label not in label_to_block:
failure_block_result = await self.build_block_result(
success=False,
status=BlockStatus.failed,
failure_reason=f"Next block label {next_label} not found inside loop {self.label}",
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
block_outputs.append(failure_block_result)
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
current_label = next_label
block_idx += 1
continue
if loop_block.next_loop_on_failure or self.next_loop_on_failure:
LOG.info(
f"ForLoopBlock: Block {block_idx} during loop {loop_idx} failed but will continue to next iteration",
block_outputs=block_outputs,
loop_idx=loop_idx,
block_idx=block_idx,
loop_over_value=loop_over_value,
loop_block_next_loop_on_failure=loop_block.next_loop_on_failure or self.next_loop_on_failure,
)
break
break
outputs_with_loop_values.append(each_loop_output_values)
return LoopBlockExecutedResult(
outputs_with_loop_values=outputs_with_loop_values,
block_outputs=block_outputs,
last_block=current_block,
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
loop_over_values = await self.get_loop_over_parameter_values(
workflow_run_context=workflow_run_context,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"failed to get loop values: {str(e)}",
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
loop_values=loop_over_values,
)
LOG.info(
f"Number of loop_over values: {len(loop_over_values)}",
block_type=self.block_type,
workflow_run_id=workflow_run_id,
num_loop_over_values=len(loop_over_values),
)
if not loop_over_values or len(loop_over_values) == 0:
LOG.info(
"No loop_over values found, terminating block",
block_type=self.block_type,
workflow_run_id=workflow_run_id,
num_loop_over_values=len(loop_over_values),
complete_if_empty=self.complete_if_empty,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, [])
if self.complete_if_empty:
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=[],
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
else:
return await self.build_block_result(
success=False,
failure_reason="No iterable value found for the loop block",
status=BlockStatus.terminated,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not self.loop_blocks or len(self.loop_blocks) == 0:
LOG.info(
"No defined blocks to loop, terminating block",
block_type=self.block_type,
workflow_run_id=workflow_run_id,
num_loop_blocks=len(self.loop_blocks),
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, [])
return await self.build_block_result(
success=False,
failure_reason="No defined blocks to loop",
status=BlockStatus.terminated,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
try:
loop_executed_result = await self.execute_loop_helper(
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
workflow_run_context=workflow_run_context,
loop_over_values=loop_over_values,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
except InvalidWorkflowDefinition as exc:
LOG.error(
"Loop graph validation failed",
error=str(exc),
workflow_run_id=workflow_run_id,
loop_label=self.label,
)
return await self.build_block_result(
success=False,
failure_reason=str(exc),
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await self.record_output_parameter_value(
workflow_run_context, workflow_run_id, loop_executed_result.outputs_with_loop_values
)
block_status = BlockStatus.failed
success = False
if loop_executed_result.is_canceled():
block_status = BlockStatus.canceled
elif loop_executed_result.is_completed():
block_status = BlockStatus.completed
success = True
elif loop_executed_result.is_terminated():
block_status = BlockStatus.terminated
else:
block_status = BlockStatus.failed
return await self.build_block_result(
success=success,
failure_reason=loop_executed_result.get_failure_reason(),
output_parameter_value=loop_executed_result.outputs_with_loop_values,
status=block_status,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class Credential(SimpleNamespace):
pass
class CodeBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.CODE] = BlockType.CODE # type: ignore
code: str
parameters: list[PARAMETER_TYPE] = []
@staticmethod
def is_safe_code(code: str) -> None:
tree = ast.parse(code)
for node in ast.walk(tree):
if hasattr(node, "attr") and str(node.attr).startswith("__"):
raise InsecureCodeDetected("Not allowed to access private methods or attributes")
if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
raise InsecureCodeDetected("Not allowed to import modules")
@staticmethod
def build_safe_vars() -> dict[str, Any]:
return {
"__builtins__": {}, # only allow several builtins due to security concerns
"locals": locals,
"print": print,
"len": len,
"range": range,
"str": str,
"int": int,
"dict": dict,
"list": list,
"tuple": tuple,
"set": set,
"bool": bool,
"asyncio": asyncio,
"re": re,
"json": json,
"Exception": Exception,
}
def generate_async_user_function(
self, code: str, page: Page, parameters: dict[str, Any] | None = None
) -> Callable[[], Awaitable[dict[str, Any]]]:
code = textwrap.indent(code, " ")
full_code = f"""
async def wrapper():
{code}
return locals()
"""
runtime_variables: dict[str, Callable[[], Awaitable[dict[str, Any]]]] = {}
safe_vars = self.build_safe_vars()
if parameters:
safe_vars.update(parameters)
safe_vars["page"] = page
exec(full_code, safe_vars, runtime_variables)
return runtime_variables["wrapper"]
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.code = self.format_block_parameter_template_from_workflow_run_context(self.code, workflow_run_context)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
await app.AGENT_FUNCTION.validate_code_block(organization_id=organization_id)
browser_state = await self.get_or_create_browser_state(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
if not browser_state:
return await self.build_block_result(
success=False,
failure_reason="No browser found to run the code block",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
page = await browser_state.get_working_page()
if not page:
return await self.build_block_result(
success=False,
failure_reason="No page found to run the code block",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# get all parameters into a dictionary
parameter_values = {}
for parameter in self.parameters:
value = workflow_run_context.get_value(parameter.key)
if not parameter.parameter_type.is_secret_or_credential() and not (
# NOTE: skyvern credential is a 'credential_id' workflow parameter type
parameter.parameter_type == ParameterType.WORKFLOW
and parameter.workflow_parameter_type is not None
and parameter.workflow_parameter_type.is_credential_type()
):
parameter_values[parameter.key] = value
continue
if isinstance(value, dict):
real_secret_values = {}
for credential_field, credential_place_holder in value.items():
# "context" is a skyvern-defined field to reduce LLM hallucination
if credential_field == "context":
continue
secret_value = workflow_run_context.get_original_secret_value_or_none(credential_place_holder)
if (
secret_value == BitwardenConstants.TOTP
or secret_value == OnePasswordConstants.TOTP
or secret_value == AzureVaultConstants.TOTP
):
totp_secret_key = workflow_run_context.totp_secret_value_key(credential_place_holder)
totp_secret = workflow_run_context.get_original_secret_value_or_none(totp_secret_key)
if totp_secret:
secret_value = pyotp.TOTP(totp_secret).now()
else:
LOG.warning(
"No TOTP secret found, returning the parameter value as is",
parameter=credential_place_holder,
)
real_secret_value = secret_value if secret_value is not None else credential_place_holder
parameter_values[credential_field] = real_secret_value
real_secret_values[credential_field] = real_secret_value
parameter_values[parameter.key] = Credential(**real_secret_values)
else:
secret_value = workflow_run_context.get_original_secret_value_or_none(value)
parameter_values[parameter.key] = secret_value if secret_value is not None else value
try:
self.is_safe_code(self.code)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=str(e),
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
user_function = self.generate_async_user_function(self.code, page, parameter_values)
try:
result = await user_function()
except Exception as e:
exc = CustomizedCodeException(e)
return await self.build_block_result(
success=False,
failure_reason=exc.message,
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
result = json.loads(
json.dumps(result, default=lambda value: f"Object '{type(value)}' is not JSON serializable")
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=result,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class TextPromptBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.TEXT_PROMPT] = BlockType.TEXT_PROMPT # type: ignore
llm_key: str | None = None
prompt: str
parameters: list[PARAMETER_TYPE] = []
json_schema: dict[str, Any] | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
if self.llm_key:
self.llm_key = self.format_block_parameter_template_from_workflow_run_context(
self.llm_key, workflow_run_context
)
self.prompt = self.format_block_parameter_template_from_workflow_run_context(self.prompt, workflow_run_context)
async def send_prompt(
self,
prompt: str,
parameter_values: dict[str, Any],
workflow_run_id: str,
organization_id: str | None = None,
workflow_run_block_id: str | None = None,
) -> dict[str, Any]:
default_llm_handler = await self._resolve_default_llm_handler(workflow_run_id, organization_id)
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
self.override_llm_key or self.llm_key, default=default_llm_handler
)
if not self.json_schema:
self.json_schema = {
"type": "object",
"properties": {
"llm_response": {
"type": "string",
"description": "Your response to the prompt",
}
},
}
prompt = prompt_engine.load_prompt_from_string(prompt, **parameter_values)
prompt += (
"\n\n"
+ "Please respond to the prompt above using the following JSON definition:\n\n"
+ "```json\n"
+ json.dumps(self.json_schema, indent=2)
+ "\n```\n\n"
)
workflow_run_block = None
artifacts_to_persist: list[tuple[ArtifactType, bytes]] = []
if workflow_run_block_id:
try:
workflow_run_block = await app.DATABASE.get_workflow_run_block(workflow_run_block_id, organization_id)
if workflow_run_block:
artifacts_to_persist.append((ArtifactType.LLM_PROMPT, prompt.encode("utf-8")))
except Exception as e:
LOG.error("Failed to fetch workflow_run_block for TextPromptBlock artifacts", error=e)
LOG.info(
"TextPromptBlock: Sending prompt to LLM",
prompt=prompt,
llm_key=self.llm_key,
)
response = await llm_api_handler(prompt=prompt, prompt_name="text-prompt")
if workflow_run_block:
artifacts_to_persist.append((ArtifactType.LLM_RESPONSE, json.dumps(response).encode("utf-8")))
try:
await app.ARTIFACT_MANAGER.create_workflow_run_block_artifacts(
workflow_run_block=workflow_run_block,
artifacts=artifacts_to_persist,
)
except Exception as e:
LOG.error("Failed to save TextPromptBlock artifacts", error=e)
LOG.info("TextPromptBlock: Received response from LLM", response=response)
return response
async def _resolve_default_llm_handler(self, workflow_run_id: str, organization_id: str | None) -> LLMAPIHandler:
prompt_config_handler = await get_llm_handler_for_prompt_type("text-prompt", workflow_run_id, organization_id)
if prompt_config_handler:
return prompt_config_handler
secondary_handler = app.SECONDARY_LLM_API_HANDLER
if secondary_handler:
return secondary_handler
LOG.warning(
"Secondary LLM handler not configured; falling back to primary handler for TextPromptBlock",
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
return app.LLM_API_HANDLER
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# Validate block execution
await app.AGENT_FUNCTION.validate_block_execution(
block=self,
workflow_run_block_id=workflow_run_block_id,
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
prompt=self.prompt,
)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# get all parameters into a dictionary
parameter_values = {}
for parameter in self.parameters:
value = workflow_run_context.get_value(parameter.key)
secret_value = workflow_run_context.get_original_secret_value_or_none(value)
if secret_value:
continue
else:
parameter_values[parameter.key] = value
response = await self.send_prompt(
self.prompt,
parameter_values,
workflow_run_id,
organization_id,
workflow_run_block_id=workflow_run_block_id,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=response,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class DownloadToS3Block(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.DOWNLOAD_TO_S3] = BlockType.DOWNLOAD_TO_S3 # type: ignore
url: str
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.url and workflow_run_context.has_parameter(self.url):
return [workflow_run_context.get_parameter(self.url)]
return []
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
async def _upload_file_to_s3(self, uri: str, file_path: str) -> None:
try:
client = self.get_async_aws_client()
await client.upload_file_from_path(uri=uri, file_path=file_path)
finally:
# Clean up the temporary file since it's created with delete=False
os.unlink(file_path)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
if self.url and workflow_run_context.has_parameter(self.url) and workflow_run_context.has_value(self.url):
task_url_parameter_value = workflow_run_context.get_value(self.url)
if task_url_parameter_value:
LOG.info(
"DownloadToS3Block: Task URL is parameterized, using parameter value",
task_url_parameter_value=task_url_parameter_value,
task_url_parameter_key=self.url,
)
self.url = task_url_parameter_value
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
try:
file_path = await download_file(self.url, max_size_mb=10)
except Exception as e:
LOG.error("DownloadToS3Block: Failed to download file", url=self.url, error=str(e))
raise e
uri = None
try:
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/{workflow_run_id}/{uuid.uuid4()}"
await self._upload_file_to_s3(uri, file_path)
except Exception as e:
LOG.error("DownloadToS3Block: Failed to upload file to S3", uri=uri, error=str(e))
raise e
LOG.info("DownloadToS3Block: File downloaded and uploaded to S3", uri=uri)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, uri)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=uri,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class UploadToS3Block(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3 # type: ignore
# TODO (kerem): A directory upload is supported but we should also support a list of files
path: str | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.path and workflow_run_context.has_parameter(self.path):
return [workflow_run_context.get_parameter(self.path)]
return []
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
if self.path:
self.path = self.format_block_parameter_template_from_workflow_run_context(self.path, workflow_run_context)
@staticmethod
def _get_s3_uri(workflow_run_id: str, path: str) -> str:
s3_bucket = settings.AWS_S3_BUCKET_UPLOADS
s3_key = f"{settings.ENV}/{workflow_run_id}/{uuid.uuid4()}_{Path(path).name}"
return f"s3://{s3_bucket}/{s3_key}"
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
if self.path and workflow_run_context.has_parameter(self.path) and workflow_run_context.has_value(self.path):
file_path_parameter_value = workflow_run_context.get_value(self.path)
if file_path_parameter_value:
LOG.info(
"UploadToS3Block: File path is parameterized, using parameter value",
file_path_parameter_value=file_path_parameter_value,
file_path_parameter_key=self.path,
)
self.path = file_path_parameter_value
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use the download directory for the workflow run
elif self.path == settings.WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
context = skyvern_context.current()
self.path = str(
get_path_for_workflow_download_directory(
context.run_id if context and context.run_id else workflow_run_id
).absolute()
)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not self.path or not os.path.exists(self.path):
raise FileNotFoundError(f"UploadToS3Block: File not found at path: {self.path}")
s3_uris = []
try:
client = self.get_async_aws_client()
# is the file path a file or a directory?
if os.path.isdir(self.path):
# get all files in the directory, if there are more than 25 files, we will not upload them
files = os.listdir(self.path)
if len(files) > MAX_UPLOAD_FILE_COUNT:
raise ValueError("Too many files in the directory, not uploading")
for file in files:
# if the file is a directory, we will not upload it
if os.path.isdir(os.path.join(self.path, file)):
LOG.warning("UploadToS3Block: Skipping directory", file=file)
continue
file_path = os.path.join(self.path, file)
s3_uri = self._get_s3_uri(workflow_run_id, file_path)
s3_uris.append(s3_uri)
await client.upload_file_from_path(uri=s3_uri, file_path=file_path)
else:
s3_uri = self._get_s3_uri(workflow_run_id, self.path)
s3_uris.append(s3_uri)
await client.upload_file_from_path(uri=s3_uri, file_path=self.path)
except Exception as e:
LOG.exception("UploadToS3Block: Failed to upload file to S3", file_path=self.path)
raise e
LOG.info("UploadToS3Block: File(s) uploaded to S3", file_path=self.path)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, s3_uris)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=s3_uris,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class FileUploadBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.FILE_UPLOAD] = BlockType.FILE_UPLOAD # type: ignore
storage_type: FileStorageType = FileStorageType.S3
s3_bucket: str | None = None
aws_access_key_id: str | None = None
aws_secret_access_key: str | None = None
region_name: str | None = None
azure_storage_account_name: str | None = None
azure_storage_account_key: str | None = None
azure_blob_container_name: str | None = None
path: str | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
parameters = []
if self.path and workflow_run_context.has_parameter(self.path):
parameters.append(workflow_run_context.get_parameter(self.path))
if self.s3_bucket and workflow_run_context.has_parameter(self.s3_bucket):
parameters.append(workflow_run_context.get_parameter(self.s3_bucket))
if self.aws_access_key_id and workflow_run_context.has_parameter(self.aws_access_key_id):
parameters.append(workflow_run_context.get_parameter(self.aws_access_key_id))
if self.aws_secret_access_key and workflow_run_context.has_parameter(self.aws_secret_access_key):
parameters.append(workflow_run_context.get_parameter(self.aws_secret_access_key))
if self.azure_storage_account_name and workflow_run_context.has_parameter(self.azure_storage_account_name):
parameters.append(workflow_run_context.get_parameter(self.azure_storage_account_name))
if self.azure_storage_account_key and workflow_run_context.has_parameter(self.azure_storage_account_key):
parameters.append(workflow_run_context.get_parameter(self.azure_storage_account_key))
if self.azure_blob_container_name and workflow_run_context.has_parameter(self.azure_blob_container_name):
parameters.append(workflow_run_context.get_parameter(self.azure_blob_container_name))
return parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
if self.path:
self.path = self.format_block_parameter_template_from_workflow_run_context(self.path, workflow_run_context)
if self.s3_bucket:
self.s3_bucket = self.format_block_parameter_template_from_workflow_run_context(
self.s3_bucket, workflow_run_context
)
if self.aws_access_key_id:
self.aws_access_key_id = self.format_block_parameter_template_from_workflow_run_context(
self.aws_access_key_id, workflow_run_context
)
if self.aws_secret_access_key:
self.aws_secret_access_key = self.format_block_parameter_template_from_workflow_run_context(
self.aws_secret_access_key, workflow_run_context
)
if self.azure_storage_account_name:
self.azure_storage_account_name = self.format_block_parameter_template_from_workflow_run_context(
self.azure_storage_account_name, workflow_run_context
)
if self.azure_storage_account_key:
self.azure_storage_account_key = self.format_block_parameter_template_from_workflow_run_context(
self.azure_storage_account_key, workflow_run_context
)
if self.azure_blob_container_name:
self.azure_blob_container_name = self.format_block_parameter_template_from_workflow_run_context(
self.azure_blob_container_name, workflow_run_context
)
def _get_s3_uri(self, workflow_run_id: str, path: str) -> str:
folder_path = self.path or f"{workflow_run_id}"
# Remove trailing slash from folder_path to avoid double slashes
folder_path = folder_path.rstrip("/")
# Remove any empty path segments to avoid double slashes
folder_path = "/".join(segment for segment in folder_path.split("/") if segment)
s3_suffix = f"{uuid.uuid4()}_{Path(path).name}"
return f"s3://{self.s3_bucket}/{folder_path}/{s3_suffix}"
def _get_azure_blob_name(self, workflow_run_id: str, file_path: str) -> str:
blob_name = f"{uuid.uuid4()}_{Path(file_path).name}"
folder_path = self.path or workflow_run_id
# Remove trailing slash from folder_path to avoid double slashes
folder_path = folder_path.rstrip("/")
# Remove any empty path segments to avoid double slashes
folder_path = "/".join(segment for segment in folder_path.split("/") if segment)
return folder_path + "/" + blob_name
def _get_azure_blob_uri(self, workflow_run_id: str, blob_name: str) -> str:
return f"https://{self.azure_storage_account_name}.blob.core.windows.net/{self.azure_blob_container_name}/{blob_name}"
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# get all parameters into a dictionary
# data validate before uploading
missing_parameters = []
if self.storage_type == FileStorageType.S3:
if not self.s3_bucket:
missing_parameters.append("s3_bucket")
if not self.aws_access_key_id:
missing_parameters.append("aws_access_key_id")
if not self.aws_secret_access_key:
missing_parameters.append("aws_secret_access_key")
elif self.storage_type == FileStorageType.AZURE:
if not self.azure_storage_account_name or self.azure_storage_account_name == "":
missing_parameters.append("azure_storage_account_name")
if not self.azure_storage_account_key or self.azure_storage_account_key == "":
missing_parameters.append("azure_storage_account_key")
if not self.azure_blob_container_name or self.azure_blob_container_name == "":
missing_parameters.append("azure_blob_container_name")
else:
return await self.build_block_result(
success=False,
failure_reason=f"Unsupported storage type: {self.storage_type}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if missing_parameters:
return await self.build_block_result(
success=False,
failure_reason=f"Required block values are missing in the FileUploadBlock (label: {self.label}): {', '.join(missing_parameters)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
context = skyvern_context.current()
download_files_path = str(
get_path_for_workflow_download_directory(
context.run_id if context and context.run_id else workflow_run_id
).absolute()
)
uploaded_uris = []
try:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
files_to_upload = []
if os.path.isdir(download_files_path):
files = os.listdir(download_files_path)
max_file_count = (
MAX_UPLOAD_FILE_COUNT
if self.storage_type == FileStorageType.S3
else AZURE_BLOB_STORAGE_MAX_UPLOAD_FILE_COUNT
)
if len(files) > max_file_count:
raise ValueError(f"Too many files in the directory, not uploading. Max: {max_file_count}")
for file in files:
if os.path.isdir(os.path.join(download_files_path, file)):
LOG.warning("FileUploadBlock: Skipping directory", file=file)
continue
files_to_upload.append(os.path.join(download_files_path, file))
else:
files_to_upload.append(download_files_path)
if self.storage_type == FileStorageType.S3:
actual_aws_access_key_id = (
workflow_run_context.get_original_secret_value_or_none(self.aws_access_key_id)
or self.aws_access_key_id
)
actual_aws_secret_access_key = (
workflow_run_context.get_original_secret_value_or_none(self.aws_secret_access_key)
or self.aws_secret_access_key
)
aws_client = AsyncAWSClient(
aws_access_key_id=actual_aws_access_key_id,
aws_secret_access_key=actual_aws_secret_access_key,
region_name=self.region_name,
)
for file_path in files_to_upload:
s3_uri = self._get_s3_uri(workflow_run_id, file_path)
uploaded_uris.append(s3_uri)
await aws_client.upload_file_from_path(uri=s3_uri, file_path=file_path, raise_exception=True)
LOG.info("FileUploadBlock: File(s) uploaded to S3", file_path=self.path)
elif self.storage_type == FileStorageType.AZURE:
actual_azure_storage_account_name = (
workflow_run_context.get_original_secret_value_or_none(self.azure_storage_account_name)
or self.azure_storage_account_name
)
actual_azure_storage_account_key = (
workflow_run_context.get_original_secret_value_or_none(self.azure_storage_account_key)
or self.azure_storage_account_key
)
if actual_azure_storage_account_name is None or actual_azure_storage_account_key is None:
raise AzureConfigurationError("Azure Storage is not configured")
azure_client = app.AZURE_CLIENT_FACTORY.create_storage_client(
storage_account_name=actual_azure_storage_account_name,
storage_account_key=actual_azure_storage_account_key,
)
for file_path in files_to_upload:
LOG.info("FileUploadBlock: Uploading file to Azure Blob Storage", file_path=file_path)
blob_name = self._get_azure_blob_name(workflow_run_id, file_path)
azure_uri = self._get_azure_blob_uri(workflow_run_id, blob_name)
uploaded_uris.append(azure_uri)
uri = f"azure://{self.azure_blob_container_name or ''}/{blob_name}"
await azure_client.upload_file_from_path(uri, file_path)
LOG.info("FileUploadBlock: File(s) uploaded to Azure Blob Storage", file_path=self.path)
else:
# This case should ideally be caught by the initial validation
raise ValueError(f"Unsupported storage type: {self.storage_type}")
except Exception as e:
LOG.exception("FileUploadBlock: Failed to upload file", file_path=self.path, storage_type=self.storage_type)
return await self.build_block_result(
success=False,
failure_reason=f"Failed to upload file to {self.storage_type}: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, uploaded_uris)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=uploaded_uris,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class SendEmailBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.SEND_EMAIL] = BlockType.SEND_EMAIL # type: ignore
smtp_host: AWSSecretParameter
smtp_port: AWSSecretParameter
smtp_username: AWSSecretParameter
# if you're using a Gmail account, you need to pass in an app password instead of your regular password
smtp_password: AWSSecretParameter
sender: str
recipients: list[str]
subject: str
body: str
file_attachments: list[str] = []
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
parameters = [
self.smtp_host,
self.smtp_port,
self.smtp_username,
self.smtp_password,
]
if self.file_attachments:
for file_path in self.file_attachments:
if workflow_run_context.has_parameter(file_path):
parameters.append(workflow_run_context.get_parameter(file_path))
if self.subject and workflow_run_context.has_parameter(self.subject):
parameters.append(workflow_run_context.get_parameter(self.subject))
if self.body and workflow_run_context.has_parameter(self.body):
parameters.append(workflow_run_context.get_parameter(self.body))
return parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.sender = self.format_block_parameter_template_from_workflow_run_context(self.sender, workflow_run_context)
self.subject = self.format_block_parameter_template_from_workflow_run_context(
self.subject, workflow_run_context
)
self.body = self.format_block_parameter_template_from_workflow_run_context(self.body, workflow_run_context)
# Format recipients
formatted_recipients = []
for recipient in self.recipients:
formatted_recipient = self.format_block_parameter_template_from_workflow_run_context(
recipient, workflow_run_context
)
formatted_recipients.append(formatted_recipient)
self.recipients = formatted_recipients
def _decrypt_smtp_parameters(self, workflow_run_context: WorkflowRunContext) -> tuple[str, int, str, str]:
obfuscated_smtp_host_value = workflow_run_context.get_value(self.smtp_host.key)
obfuscated_smtp_port_value = workflow_run_context.get_value(self.smtp_port.key)
obfuscated_smtp_username_value = workflow_run_context.get_value(self.smtp_username.key)
obfuscated_smtp_password_value = workflow_run_context.get_value(self.smtp_password.key)
smtp_host_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_host_value)
smtp_port_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_port_value)
smtp_username_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_username_value)
smtp_password_value = workflow_run_context.get_original_secret_value_or_none(obfuscated_smtp_password_value)
email_config_problems = []
if smtp_host_value is None:
email_config_problems.append("Missing SMTP server")
if smtp_port_value is None:
email_config_problems.append("Missing SMTP port")
elif not smtp_port_value.isdigit():
email_config_problems.append("SMTP port should be a number")
if smtp_username_value is None:
email_config_problems.append("Missing SMTP username")
if smtp_password_value is None:
email_config_problems.append("Missing SMTP password")
if email_config_problems:
raise InvalidEmailClientConfiguration(email_config_problems)
return (
smtp_host_value,
smtp_port_value,
smtp_username_value,
smtp_password_value,
)
def _get_file_paths(self, workflow_run_context: WorkflowRunContext, workflow_run_id: str) -> list[str]:
file_paths = []
for path in self.file_attachments:
# if the file path is a parameter, get the value from the workflow run context first
if workflow_run_context.has_parameter(path):
file_path_parameter_value = workflow_run_context.get_value(path)
# if the file path is a secret, get the original secret value from the workflow run context
file_path_parameter_secret_value = workflow_run_context.get_original_secret_value_or_none(
file_path_parameter_value
)
if file_path_parameter_secret_value:
path = file_path_parameter_secret_value
else:
path = file_path_parameter_value
if path == settings.WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY:
# if the path is WORKFLOW_DOWNLOAD_DIRECTORY_PARAMETER_KEY, use download directory for the workflow run
context = skyvern_context.current()
path = str(
get_path_for_workflow_download_directory(
context.run_id if context and context.run_id else workflow_run_id
).absolute()
)
LOG.info(
"SendEmailBlock: Using download directory for the workflow run",
workflow_run_id=workflow_run_id,
file_path=path,
)
path = self.format_block_parameter_template_from_workflow_run_context(path, workflow_run_context)
# if the file path is a directory, add all files in the directory, skip directories, limit to 10 files
if os.path.exists(path):
if os.path.isdir(path):
for file in os.listdir(path):
if os.path.isdir(os.path.join(path, file)):
LOG.warning("SendEmailBlock: Skipping directory", file=file)
continue
file_path = os.path.join(path, file)
file_paths.append(file_path)
else:
# covers the case where the file path is a single file
file_paths.append(path)
# check if path is a url, or an S3 uri
elif (
path.startswith("http://")
or path.startswith("https://")
or path.startswith("s3://")
or path.startswith("www.")
):
file_paths.append(path)
else:
LOG.warning("SendEmailBlock: File not found", file_path=path)
return file_paths
async def _download_from_s3(self, s3_uri: str) -> str:
client = self.get_async_aws_client()
downloaded_bytes = await client.download_file(uri=s3_uri)
file_path = create_named_temporary_file(delete=False)
file_path.write(downloaded_bytes)
return file_path.name
def get_real_email_recipients(self, workflow_run_context: WorkflowRunContext) -> list[str]:
recipients = []
for recipient in self.recipients:
# Check if the recipient is a parameter and get its value
if workflow_run_context.has_parameter(recipient):
maybe_recipient = workflow_run_context.get_value(recipient)
else:
maybe_recipient = recipient
recipient = self.format_block_parameter_template_from_workflow_run_context(recipient, workflow_run_context)
# check if maybe_recipient is a valid email address
try:
validate_email(maybe_recipient)
recipients.append(maybe_recipient)
except EmailNotValidError as e:
LOG.warning(
"SendEmailBlock: Invalid email address",
recipient=maybe_recipient,
reason=str(e),
)
if not recipients:
raise NoValidEmailRecipient(recipients=recipients)
return recipients
async def _build_email_message(
self, workflow_run_context: WorkflowRunContext, workflow_run_id: str
) -> EmailMessage:
msg = EmailMessage()
msg["Subject"] = (
self.subject.strip().replace("\n", "").replace("\r", "") + f" - Workflow Run ID: {workflow_run_id}"
)
msg["To"] = ", ".join(self.get_real_email_recipients(workflow_run_context))
msg["BCC"] = self.sender # BCC the sender so there is a record of the email being sent
msg["From"] = self.sender
if self.body and workflow_run_context.has_parameter(self.body) and workflow_run_context.has_value(self.body):
# We're purposely not decrypting the body parameter value here because we don't want to expose secrets
body_parameter_value = workflow_run_context.get_value(self.body)
msg.set_content(str(body_parameter_value))
else:
msg.set_content(self.body)
file_names_by_hash: dict[str, list[str]] = defaultdict(list)
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
if filename.startswith("s3://"):
path = await download_from_s3(self.get_async_aws_client(), filename)
elif filename.startswith("http://") or filename.startswith("https://"):
path = await download_file(filename)
else:
LOG.info("SendEmailBlock: Looking for file locally", filename=filename)
if not os.path.exists(filename):
raise FileNotFoundError(f"File not found: {filename}")
if not os.path.isfile(filename):
raise IsADirectoryError(f"Path is a directory: {filename}")
path = filename
LOG.info("SendEmailBlock: Found file locally", path=path)
if not path:
raise FileNotFoundError(f"File not found: {filename}")
# Guess the content type based on the file's extension. Encoding
# will be ignored, although we should check for simple things like
# gzip'd or compressed files.
kind = filetype.guess(path)
if kind:
ctype = kind.mime
extension = kind.extension
else:
# No guess could be made, or the file is encoded (compressed), so
# use a generic bag-of-bits type.
ctype = "application/octet-stream"
extension = None
maintype, subtype = ctype.split("/", 1)
attachment_path = Path(path)
attachment_filename = attachment_path.name
# Check if the filename has an extension
if not attachment_path.suffix:
# If no extension, guess it based on the MIME type
if extension:
attachment_filename += f".{extension}"
LOG.info(
"SendEmailBlock: Adding attachment",
filename=attachment_filename,
maintype=maintype,
subtype=subtype,
)
with open(path, "rb") as fp:
msg.add_attachment(
fp.read(),
maintype=maintype,
subtype=subtype,
filename=attachment_filename,
)
file_hash = calculate_sha256_for_file(path)
file_names_by_hash[file_hash].append(path)
# Calculate file stats based on content hashes
total_files = sum(len(files) for files in file_names_by_hash.values())
unique_files = len(file_names_by_hash)
duplicate_files_list = [files for files in file_names_by_hash.values() if len(files) > 1]
# Log file statistics
LOG.info("SendEmailBlock: Total files attached", total_files=total_files)
LOG.info("SendEmailBlock: Unique files (based on content) attached", unique_files=unique_files)
if duplicate_files_list:
LOG.info(
"SendEmailBlock: Duplicate files (based on content) attached", duplicate_files_list=duplicate_files_list
)
return msg
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
recipients=self.recipients,
attachments=self.file_attachments,
subject=self.subject,
body=self.body,
)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
smtp_host_value, smtp_port_value, smtp_username_value, smtp_password_value = self._decrypt_smtp_parameters(
workflow_run_context
)
smtp_host = None
try:
smtp_host = smtplib.SMTP(smtp_host_value, smtp_port_value)
LOG.info("SendEmailBlock: Connected to SMTP server")
smtp_host.starttls()
smtp_host.login(smtp_username_value, smtp_password_value)
LOG.info("SendEmailBlock: Logged in to SMTP server")
message = await self._build_email_message(workflow_run_context, workflow_run_id)
smtp_host.send_message(message)
LOG.info("SendEmailBlock: Email sent")
except Exception as e:
LOG.error("SendEmailBlock: Failed to send email", exc_info=True)
result_dict = {"success": False, "error": str(e)}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
return await self.build_block_result(
success=False,
failure_reason=str(e),
output_parameter_value=result_dict,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
finally:
if smtp_host:
smtp_host.quit()
result_dict = {"success": True}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=result_dict,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class FileParserBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.FILE_URL_PARSER] = BlockType.FILE_URL_PARSER # type: ignore
file_url: str
file_type: FileType
json_schema: dict[str, Any] | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.file_url and workflow_run_context.has_parameter(self.file_url):
return [workflow_run_context.get_parameter(self.file_url)]
return []
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.file_url = self.format_block_parameter_template_from_workflow_run_context(
self.file_url, workflow_run_context
)
def _detect_file_type_from_url(self, file_url: str) -> FileType:
"""Detect file type based on file extension in the URL."""
url_parsed = urlparse(file_url)
# TODO: use filetype.guess(file_path) to make the detection more robust
suffix = Path(url_parsed.path).suffix.lower()
if suffix in (".xlsx", ".xls", ".xlsm"):
return FileType.EXCEL
elif suffix == ".pdf":
return FileType.PDF
elif suffix == ".tsv":
return FileType.CSV # TSV files are handled by the CSV parser
elif suffix in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif"):
return FileType.IMAGE
elif suffix == ".docx":
return FileType.DOCX
elif suffix == ".doc":
raise InvalidFileType(
file_url=file_url,
file_type=FileType.DOCX,
error="Legacy .doc format (Word 97-2003) is not supported. Please convert the file to .docx format.",
)
else:
return FileType.CSV # Default to CSV for .csv and any other extensions
def _detect_file_encoding(self, file_path: str) -> str:
"""Detect the encoding of a file using charset-normalizer with fallbacks.
Reads a sample of the file (first 64KB) to detect encoding efficiently.
Falls back through common encodings if detection fails.
"""
sample_size = 65536 # 64KB sample for detection
with open(file_path, "rb") as f:
raw_data = f.read(sample_size)
result = from_bytes(raw_data)
best_match = result.best()
if best_match and best_match.encoding:
return best_match.encoding
for encoding in ["utf-8", "cp1252", "latin-1"]:
try:
raw_data.decode(encoding)
return encoding
except UnicodeDecodeError:
continue
# latin-1 always succeeds (1:1 byte mapping), so this is a safety fallback
return "latin-1"
def validate_file_type(self, file_url_used: str, file_path: str) -> None:
if self.file_type == FileType.CSV:
try:
encoding = self._detect_file_encoding(file_path)
with open(file_path, encoding=encoding, errors="replace") as file:
csv.Sniffer().sniff(file.read(1024))
except csv.Error as e:
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
elif self.file_type == FileType.EXCEL:
try:
# Try to read the file with pandas to validate it's a valid Excel file
pd.read_excel(file_path, nrows=1, engine="calamine")
except Exception as e:
raise InvalidFileType(
file_url=file_url_used, file_type=self.file_type, error=f"Invalid Excel file format: {str(e)}"
)
elif self.file_type == FileType.PDF:
try:
validate_pdf_file(file_path, file_identifier=file_url_used)
except PDFParsingError as e:
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
elif self.file_type == FileType.IMAGE:
kind = filetype.guess(file_path)
if kind is None or not kind.mime.startswith("image/"):
raise InvalidFileType(
file_url=file_url_used, file_type=self.file_type, error="File is not a valid image"
)
elif self.file_type == FileType.DOCX:
try:
# Try to open the file with python-docx to validate it's a valid DOCX file
docx.Document(file_path)
except Exception as e:
raise InvalidFileType(
file_url=file_url_used, file_type=self.file_type, error=f"Invalid DOCX file format: {str(e)}"
)
async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
"""Parse CSV/TSV file and return list of dictionaries."""
parsed_data = []
encoding = self._detect_file_encoding(file_path)
with open(file_path, encoding=encoding, errors="replace") as file:
# Try to detect the delimiter (comma for CSV, tab for TSV)
sample = file.read(1024)
file.seek(0) # Reset file pointer
# Use csv.Sniffer to detect the delimiter
try:
dialect = csv.Sniffer().sniff(sample)
delimiter = dialect.delimiter
except csv.Error:
# Default to comma if detection fails
delimiter = ","
reader = csv.DictReader(file, delimiter=delimiter)
for row in reader:
parsed_data.append(row)
return parsed_data
def _clean_dataframe_for_json(self, df: pd.DataFrame) -> list[dict[str, Any]]:
"""Clean DataFrame to ensure it can be serialized to JSON."""
# Replace NaN and NaT values with "nan" string
df_cleaned = df.replace({pd.NA: "nan", pd.NaT: "nan"})
df_cleaned = df_cleaned.where(pd.notna(df_cleaned), "nan")
# Convert to list of dictionaries
records = df_cleaned.to_dict("records")
# Additional cleaning for any remaining problematic values
for record in records:
for key, value in record.items():
if pd.isna(value) or value == "NaN" or value == "NaT":
record[key] = "nan"
elif isinstance(value, (pd.Timestamp, pd.DatetimeTZDtype)):
# Convert pandas timestamps to ISO format strings
record[key] = value.isoformat() if pd.notna(value) else "nan"
return records
async def _parse_excel_file(self, file_path: str) -> list[dict[str, Any]]:
"""Parse Excel file and return list of dictionaries."""
try:
# Read Excel file with pandas, specifying engine explicitly
df = pd.read_excel(file_path, engine="calamine")
# Clean and convert DataFrame to list of dictionaries
return self._clean_dataframe_for_json(df)
except ImportError as e:
raise InvalidFileType(
file_url=self.file_url,
file_type=self.file_type,
error=f"Missing required dependency for Excel parsing: {str(e)}. Please install calamine: pip install python-calamine",
)
except Exception as e:
raise InvalidFileType(
file_url=self.file_url, file_type=self.file_type, error=f"Failed to parse Excel file: {str(e)}"
)
async def _parse_pdf_file(self, file_path: str) -> str:
"""Parse PDF file and return extracted text.
Uses the shared PDF parsing utility that tries pypdf first,
then falls back to pdfplumber if pypdf fails.
"""
try:
return extract_pdf_file(file_path, file_identifier=self.file_url)
except PDFParsingError as e:
raise InvalidFileType(file_url=self.file_url, file_type=self.file_type, error=str(e))
async def _parse_image_file(self, file_path: str) -> str:
"""Parse image file using vision LLM for OCR."""
try:
with open(file_path, "rb") as f:
image_bytes = f.read()
llm_prompt = prompt_engine.load_prompt("extract-text-from-image")
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
self.override_llm_key, default=app.LLM_API_HANDLER
)
llm_response = await llm_api_handler(
prompt=llm_prompt,
prompt_name="extract-text-from-image",
screenshots=[image_bytes],
force_dict=True,
)
return llm_response.get("extracted_text", "")
except Exception:
LOG.exception("Failed to extract text from image via OCR", file_url=self.file_url)
raise
async def _parse_docx_file(self, file_path: str, max_tokens: int = MAX_FILE_PARSE_INPUT_TOKENS) -> str:
"""Parse DOCX file and return extracted text.
Extracts text from all paragraphs and tables in the document,
respecting the token limit.
"""
try:
document = docx.Document(file_path)
text_parts = []
current_tokens = 0
truncated = False
# Extract text from paragraphs
for paragraph in document.paragraphs:
if paragraph.text.strip():
para_tokens = count_tokens(paragraph.text)
if max_tokens and current_tokens + para_tokens > max_tokens:
LOG.warning(
"DOCX text exceeds token limit, truncating",
file_url=self.file_url,
current_tokens=current_tokens,
max_tokens=max_tokens,
)
truncated = True
break
text_parts.append(paragraph.text)
current_tokens += para_tokens
# Extract text from tables (only if not already truncated)
if not truncated:
for table in document.tables:
if truncated:
break
for row in table.rows:
row_text = []
for cell in row.cells:
cell_text = cell.text.strip()
if cell_text:
row_text.append(cell_text)
if row_text:
row_str = " | ".join(row_text)
row_tokens = count_tokens(row_str)
if max_tokens and current_tokens + row_tokens > max_tokens:
LOG.warning(
"DOCX text exceeds token limit, truncating at table",
file_url=self.file_url,
current_tokens=current_tokens,
max_tokens=max_tokens,
)
truncated = True
break
text_parts.append(row_str)
current_tokens += row_tokens
extracted_text = "\n".join(text_parts)
extracted_text = sanitize_postgres_text(extracted_text)
LOG.info(
"Successfully parsed DOCX file",
file_url=self.file_url,
paragraph_count=len(document.paragraphs),
table_count=len(document.tables),
text_length=len(extracted_text),
truncated=truncated,
)
return extracted_text
except Exception as e:
raise InvalidFileType(
file_url=self.file_url, file_type=self.file_type, error=f"Failed to parse DOCX file: {str(e)}"
)
async def _extract_with_ai(
self, content: str | list[dict[str, Any]], workflow_run_context: WorkflowRunContext
) -> dict[str, Any]:
"""Extract structured data using AI based on json_schema."""
# Use local variable to avoid mutating the instance
schema_to_use = self.json_schema or {
"type": "object",
"properties": {
"output": {
"type": "object",
"description": "Information extracted from the file",
}
},
}
# Convert content to string for AI processing
if isinstance(content, list):
# For CSV/Excel data, convert to a readable format
content_str = json.dumps(content, indent=2)
else:
content_str = content
llm_prompt = prompt_engine.load_prompt(
"extract-information-from-file-text", extracted_text_content=content_str, json_schema=schema_to_use
)
llm_key = self.override_llm_key
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key, default=app.LLM_API_HANDLER)
llm_response = await llm_api_handler(
prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
)
return llm_response
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if (
self.file_url
and workflow_run_context.has_parameter(self.file_url)
and workflow_run_context.has_value(self.file_url)
):
file_url_parameter_value = workflow_run_context.get_value(self.file_url)
if file_url_parameter_value:
LOG.info(
"FileParserBlock: File URL is parameterized, using parameter value",
file_url_parameter_value=file_url_parameter_value,
file_url_parameter_key=self.file_url,
)
self.file_url = file_url_parameter_value
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Download the file
if self.file_url.startswith("s3://"):
file_path = await download_from_s3(self.get_async_aws_client(), self.file_url)
else:
file_path = await download_file(self.file_url)
# Auto-detect file type if not explicitly set (IMAGE/EXCEL/PDF/DOCX are explicit choices)
if self.file_type not in (FileType.IMAGE, FileType.EXCEL, FileType.PDF, FileType.DOCX):
self.file_type = self._detect_file_type_from_url(self.file_url)
# Validate the file type
self.validate_file_type(self.file_url, file_path)
LOG.debug(
"FileParserBlock: After file type validation",
file_type=self.file_type,
json_schema_present=self.json_schema is not None,
json_schema_type=type(self.json_schema),
)
# Parse the file based on type
parsed_data: str | list[dict[str, Any]]
if self.file_type == FileType.CSV:
parsed_data = await self._parse_csv_file(file_path)
elif self.file_type == FileType.EXCEL:
parsed_data = await self._parse_excel_file(file_path)
elif self.file_type == FileType.PDF:
parsed_data = await self._parse_pdf_file(file_path)
elif self.file_type == FileType.IMAGE:
parsed_data = await self._parse_image_file(file_path)
elif self.file_type == FileType.DOCX:
parsed_data = await self._parse_docx_file(file_path)
else:
return await self.build_block_result(
success=False,
failure_reason=f"Unsupported file type: {self.file_type}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# If json_schema is provided, use AI to extract structured data
final_data: str | list[dict[str, Any]] | dict[str, Any]
LOG.debug(
"FileParserBlock: JSON schema check",
has_json_schema=self.json_schema is not None,
json_schema_type=type(self.json_schema),
json_schema=self.json_schema,
)
if self.json_schema:
try:
ai_extracted_data = await self._extract_with_ai(parsed_data, workflow_run_context)
final_data = ai_extracted_data
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to extract data with AI: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
else:
# Return raw parsed data
final_data = parsed_data
# Record the parsed data
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, final_data)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=final_data,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class PDFParserBlock(Block):
"""
DEPRECATED: Use FileParserBlock with file_type=FileType.PDF instead.
This block will be removed in a future version.
"""
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.PDF_PARSER] = BlockType.PDF_PARSER # type: ignore
file_url: str
json_schema: dict[str, Any] | None = None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if self.file_url and workflow_run_context.has_parameter(self.file_url):
return [workflow_run_context.get_parameter(self.file_url)]
return []
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.file_url = self.format_block_parameter_template_from_workflow_run_context(
self.file_url, workflow_run_context
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if (
self.file_url
and workflow_run_context.has_parameter(self.file_url)
and workflow_run_context.has_value(self.file_url)
):
file_url_parameter_value = workflow_run_context.get_value(self.file_url)
if file_url_parameter_value:
LOG.info(
"PDFParserBlock: File URL is parameterized, using parameter value",
file_url_parameter_value=file_url_parameter_value,
file_url_parameter_key=self.file_url,
)
self.file_url = file_url_parameter_value
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Download the file
file_path = None
if self.file_url.startswith("s3://"):
file_path = await download_from_s3(self.get_async_aws_client(), self.file_url)
else:
file_path = await download_file(self.file_url)
try:
extracted_text = extract_pdf_file(file_path, file_identifier=self.file_url)
except PDFParsingError:
return await self.build_block_result(
success=False,
failure_reason="Failed to parse PDF file",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not self.json_schema:
self.json_schema = {
"type": "object",
"properties": {
"output": {
"type": "object",
"description": "Information extracted from the text",
}
},
}
llm_prompt = prompt_engine.load_prompt(
"extract-information-from-file-text", extracted_text_content=extracted_text, json_schema=self.json_schema
)
llm_response = await app.LLM_API_HANDLER(
prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
)
# Record the parsed data
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, llm_response)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=llm_response,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class WaitBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.WAIT] = BlockType.WAIT # type: ignore
wait_sec: int
parameters: list[PARAMETER_TYPE] = []
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# TODO: we need to support to interrupt the sleep when the workflow run failed/cancelled/terminated
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
wait_sec=self.wait_sec,
)
LOG.info(
"Going to pause the workflow for a while",
second=self.wait_sec,
workflow_run_id=workflow_run_id,
)
await asyncio.sleep(self.wait_sec)
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
result_dict = {"success": True}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=result_dict,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class HumanInteractionBlock(BaseTaskBlock):
"""
A block for human/agent interaction.
For the first pass at this, the implicit behaviour is that the user is given a single binary
choice (a go//no-go).
If the human:
- chooses positively, the workflow continues
- chooses negatively, the workflow is terminated
- does not respond within the timeout period, the workflow terminates
"""
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.HUMAN_INTERACTION] = BlockType.HUMAN_INTERACTION # type: ignore
instructions: str = "Please review and approve or reject to continue the workflow."
positive_descriptor: str = "Approve"
negative_descriptor: str = "Reject"
timeout_seconds: int = 60 * 60 * 2 # two hours
# email options
sender: str = "hello@skyvern.com"
recipients: list[str] = []
subject: str = "Human interaction required for workflow run"
body: str = "Your interaction is required for a workflow run!"
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
super().format_potential_template_parameters(workflow_run_context)
self.instructions = self.format_block_parameter_template_from_workflow_run_context(
self.instructions, workflow_run_context
)
self.body = self.format_block_parameter_template_from_workflow_run_context(self.body, workflow_run_context)
self.subject = self.format_block_parameter_template_from_workflow_run_context(
self.subject, workflow_run_context
)
formatted: list[str] = []
for recipient in self.recipients:
formatted.append(
self.format_block_parameter_template_from_workflow_run_context(recipient, workflow_run_context)
)
self.recipients = formatted
self.negative_descriptor = self.format_block_parameter_template_from_workflow_run_context(
self.negative_descriptor, workflow_run_context
)
self.positive_descriptor = self.format_block_parameter_template_from_workflow_run_context(
self.positive_descriptor, workflow_run_context
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# avoid circular import
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus # noqa: PLC0415
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
recipients=self.recipients,
subject=self.subject,
body=self.body,
instructions=self.instructions,
positive_descriptor=self.positive_descriptor,
negative_descriptor=self.negative_descriptor,
)
LOG.info(
"Pausing workflow for human interaction",
workflow_run_id=workflow_run_id,
recipients=self.recipients,
timeout=self.timeout_seconds,
browser_session_id=browser_session_id,
)
await app.DATABASE.update_workflow_run(
workflow_run_id=workflow_run_id,
status=WorkflowRunStatus.paused,
)
workflow_run = await app.DATABASE.get_workflow_run(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
if not workflow_run:
return await self.build_block_result(
success=False,
failure_reason="Workflow run not found",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
app_url = f"{settings.SKYVERN_APP_URL}/runs/{workflow_run_id}/overview"
body = f"{self.body}\n\nKindly visit {app_url}\n\n{self.instructions}\n\n"
subject = f"{self.subject} - Workflow Run ID: {workflow_run_id}"
try:
await email.send(
body=body,
sender=self.sender,
subject=subject,
recipients=self.recipients,
)
email_success = True
email_failure_reason = None
except Exception as ex:
LOG.error(
"Failed to send human interaction email",
workflow_run_id=workflow_run_id,
error=str(ex),
browser_session_id=browser_session_id,
)
email_success = False
email_failure_reason = str(ex)
if not email_success:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to send human interaction email: {email_failure_reason or 'email failed'}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Wait for the timeout_seconds or until the workflow run status changes from paused
start_time = asyncio.get_event_loop().time()
check_interval = 5 # Check every 5 seconds
log_that_we_are_waiting = True
log_wait = 0
while True:
if not log_that_we_are_waiting:
log_wait += check_interval
if log_wait >= 60: # Log every 1 minute
log_that_we_are_waiting = True
log_wait = 0
elapsed_time_seconds = asyncio.get_event_loop().time() - start_time
if log_that_we_are_waiting:
LOG.info(
"Waiting for human interaction...",
workflow_run_id=workflow_run_id,
elapsed_time_seconds=elapsed_time_seconds,
timeout_seconds=self.timeout_seconds,
browser_session_id=browser_session_id,
)
log_that_we_are_waiting = False
# Check if timeout_seconds has elapsed
if elapsed_time_seconds >= self.timeout_seconds:
LOG.info(
"Human Interaction block timeout_seconds reached",
workflow_run_id=workflow_run_id,
elapsed_time_seconds=elapsed_time_seconds,
browser_session_id=browser_session_id,
)
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
success = False
reason = "Timeout elapsed with no human interaction"
result_dict = {"success": success, "reason": reason}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
return await self.build_block_result(
success=success,
failure_reason=reason,
output_parameter_value=result_dict,
status=BlockStatus.timed_out,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
workflow_run = await app.DATABASE.get_workflow_run(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
if workflow_run and workflow_run.status != WorkflowRunStatus.paused:
LOG.info(
"Workflow run status changed from paused",
workflow_run_id=workflow_run_id,
new_status=workflow_run.status,
browser_session_id=browser_session_id,
)
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
result_dict = {"success": True, "reason": f"status_changed:{workflow_run.status}"}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, result_dict)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=result_dict,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
await asyncio.sleep(min(check_interval, self.timeout_seconds - elapsed_time_seconds))
class ValidationBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION # type: ignore
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
task_order, _ = await self.get_task_order(workflow_run_id, 0)
is_first_task = task_order == 0
if is_first_task:
return await self.build_block_result(
success=False,
failure_reason="Validation block should not be the first block",
output_parameter_value=None,
status=BlockStatus.terminated,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
return await super().execute(
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
kwargs=kwargs,
)
class ActionBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
class NavigationBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.NAVIGATION] = BlockType.NAVIGATION # type: ignore
navigation_goal: str
class ExtractionBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.EXTRACTION] = BlockType.EXTRACTION # type: ignore
data_extraction_goal: str
class LoginBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.LOGIN] = BlockType.LOGIN # type: ignore
class FileDownloadBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.FILE_DOWNLOAD] = BlockType.FILE_DOWNLOAD # type: ignore
class UrlBlock(BaseTaskBlock):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.GOTO_URL] = BlockType.GOTO_URL # type: ignore
url: str
class TaskV2Block(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.TaskV2] = BlockType.TaskV2 # type: ignore
prompt: str
url: str | None = None
totp_verification_url: str | None = None
totp_identifier: str | None = None
max_iterations: int = settings.MAX_ITERATIONS_PER_TASK_V2
max_steps: int = settings.MAX_STEPS_PER_TASK_V2
def _resolve_totp_identifier(self, workflow_run_context: WorkflowRunContext) -> str | None:
if self.totp_identifier:
return self.totp_identifier
if workflow_run_context.credential_totp_identifiers:
return next(iter(workflow_run_context.credential_totp_identifiers.values()), None)
return None
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return []
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.prompt = self.format_block_parameter_template_from_workflow_run_context(self.prompt, workflow_run_context)
if self.url:
self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context)
if self.totp_identifier:
self.totp_identifier = self.format_block_parameter_template_from_workflow_run_context(
self.totp_identifier, workflow_run_context
)
if self.totp_verification_url:
self.totp_verification_url = self.format_block_parameter_template_from_workflow_run_context(
self.totp_verification_url, workflow_run_context
)
self.totp_verification_url = prepend_scheme_and_validate_url(self.totp_verification_url)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus # noqa: PLC0415
from skyvern.services import task_v2_service # noqa: PLC0415
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# Simple template resolution - no complex dynamic resolution to prevent recursion
try:
self.format_potential_template_parameters(workflow_run_context)
# Use the resolved values directly
resolved_prompt = self.prompt
resolved_url = self.url
resolved_totp_identifier = self._resolve_totp_identifier(workflow_run_context)
resolved_totp_verification_url = self.totp_verification_url
except Exception as e:
output_reason = f"Failed to format jinja template: {str(e)}"
await self.record_output_parameter_value(
workflow_run_context, workflow_run_id, {"failure_reason": output_reason}
)
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not resolved_url:
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
if browser_state:
page = await browser_state.get_working_page()
if page:
current_url = await SkyvernFrame.get_url(frame=page)
if current_url != "about:blank":
resolved_url = current_url
if not organization_id:
raise ValueError("Running TaskV2Block requires organization_id")
organization = await app.DATABASE.get_organization(organization_id)
if not organization:
raise ValueError(f"Organization not found {organization_id}")
workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id, organization_id)
if not workflow_run:
raise ValueError(f"WorkflowRun not found {workflow_run_id} when running TaskV2Block")
try:
task_v2 = await task_v2_service.initialize_task_v2(
organization=organization,
user_prompt=resolved_prompt,
user_url=resolved_url,
parent_workflow_run_id=workflow_run_id,
proxy_location=workflow_run.proxy_location,
totp_identifier=resolved_totp_identifier,
totp_verification_url=resolved_totp_verification_url,
max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolls,
)
await app.DATABASE.update_task_v2(
task_v2.observer_cruise_id, status=TaskV2Status.queued, organization_id=organization_id
)
if task_v2.workflow_run_id:
await app.DATABASE.update_workflow_run(
workflow_run_id=task_v2.workflow_run_id,
status=WorkflowRunStatus.queued,
)
await app.DATABASE.update_workflow_run_block(
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
block_workflow_run_id=task_v2.workflow_run_id,
)
task_v2 = await task_v2_service.run_task_v2(
organization=organization,
task_v2_id=task_v2.observer_cruise_id,
request_id=None,
max_steps_override=self.max_steps,
browser_session_id=browser_session_id,
)
finally:
context: skyvern_context.SkyvernContext | None = skyvern_context.current()
current_run_id = context.run_id if context and context.run_id else workflow_run_id
root_workflow_run_id = (
context.root_workflow_run_id if context and context.root_workflow_run_id else workflow_run_id
)
skyvern_context.set(
skyvern_context.SkyvernContext(
organization_id=organization_id,
organization_name=organization.organization_name,
workflow_id=workflow_run.workflow_id,
workflow_permanent_id=workflow_run.workflow_permanent_id,
workflow_run_id=workflow_run_id,
root_workflow_run_id=root_workflow_run_id,
run_id=current_run_id,
browser_session_id=browser_session_id,
max_screenshot_scrolls=workflow_run.max_screenshot_scrolls,
)
)
result_dict = None
if task_v2:
result_dict = task_v2.output
# Determine block status from task status using module-level mapping
block_status = TASKV2_TO_BLOCK_STATUS.get(task_v2.status, BlockStatus.failed)
success = task_v2.status == TaskV2Status.completed
failure_reason: str | None = None
task_v2_workflow_run_id = task_v2.workflow_run_id
if task_v2_workflow_run_id:
task_v2_workflow_run = await app.DATABASE.get_workflow_run(task_v2_workflow_run_id, organization_id)
if task_v2_workflow_run:
failure_reason = task_v2_workflow_run.failure_reason
# If continue_on_failure is True, we treat the block as successful even if the task failed
# This allows the workflow to continue execution despite this block's failure
task_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_task_screenshot_artifacts(
organization_id=organization_id,
task_v2_id=task_v2.observer_cruise_id,
)
workflow_screenshot_artifacts = await app.WORKFLOW_SERVICE.get_recent_workflow_screenshot_artifacts(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
)
task_v2_output = {
"task_id": task_v2.observer_cruise_id,
"status": task_v2.status,
"summary": task_v2.summary,
"extracted_information": result_dict,
"failure_reason": failure_reason,
"task_screenshot_artifact_ids": [a.artifact_id for a in task_screenshot_artifacts],
"workflow_screenshot_artifact_ids": [a.artifact_id for a in workflow_screenshot_artifacts],
}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, task_v2_output)
return await self.build_block_result(
success=success or self.continue_on_failure,
failure_reason=failure_reason,
output_parameter_value=result_dict,
status=block_status,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class HttpRequestBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST # type: ignore
# Individual HTTP parameters
method: str = "GET"
url: str | None = None
headers: dict[str, str] | None = None
body: dict[str, Any] | None = None # Changed to consistently be dict only
files: dict[str, str] | None = None # Dictionary mapping field names to file paths for multipart file uploads
timeout: int = 30
follow_redirects: bool = True
download_filename: str | None = None
save_response_as_file: bool = False
# Parameters for templating
parameters: list[PARAMETER_TYPE] = []
# Allowed directories for local file access (class variable, not a Pydantic field)
_allowed_dirs: ClassVar[list[str] | None] = None
@classmethod
def get_allowed_dirs(cls) -> list[str]:
"""Get the list of allowed directories for local file access.
Computed once and cached for performance.
"""
if cls._allowed_dirs is None:
allowed_dirs: list[str] = []
if settings.ARTIFACT_STORAGE_PATH:
allowed_dirs.append(os.path.abspath(settings.ARTIFACT_STORAGE_PATH))
if settings.VIDEO_PATH:
allowed_dirs.append(os.path.abspath(settings.VIDEO_PATH))
if settings.HAR_PATH:
allowed_dirs.append(os.path.abspath(settings.HAR_PATH))
if settings.LOG_PATH:
allowed_dirs.append(os.path.abspath(settings.LOG_PATH))
if settings.DOWNLOAD_PATH:
allowed_dirs.append(os.path.abspath(settings.DOWNLOAD_PATH))
cls._allowed_dirs = allowed_dirs
return cls._allowed_dirs or []
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
parameters = self.parameters
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# Check if url is a parameter
if self.url and workflow_run_context.has_parameter(self.url):
if self.url not in [parameter.key for parameter in parameters]:
parameters.append(workflow_run_context.get_parameter(self.url))
return parameters
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
"""Format template parameters in the block fields"""
template_kwargs = {"force_include_secrets": True}
def _render_templates_in_json(value: object) -> object:
"""
Recursively render Jinja templates in nested JSON-like structures.
This is required because HTTP request bodies are often deeply nested
dict/list structures, and templates may appear at any depth.
Supports {{ expr | json }} filter for type-preserving JSON injection.
"""
if isinstance(value, str):
rendered = self.format_block_parameter_template_from_workflow_run_context(
value, workflow_run_context, **template_kwargs
)
if rendered.startswith(_JSON_TYPE_MARKER) and rendered.endswith(_JSON_TYPE_MARKER):
json_str = rendered[len(_JSON_TYPE_MARKER) : -len(_JSON_TYPE_MARKER)]
try:
return json.loads(json_str)
except json.JSONDecodeError:
raise FailedToFormatJinjaStyleParameter(
value, f"Raw JSON filter produced invalid JSON: {json_str}"
)
elif _JSON_TYPE_MARKER in rendered:
raise FailedToFormatJinjaStyleParameter(
value,
"The '| json' filter can only be used for complete value replacement. "
"It cannot be combined with other text (e.g., 'prefix-{{ val | json }}'). "
"Remove the surrounding text or remove the '| json' filter.",
)
return rendered
if isinstance(value, list):
return [_render_templates_in_json(item) for item in value]
if isinstance(value, dict):
return {
cast(str, _render_templates_in_json(key)): _render_templates_in_json(val)
for key, val in value.items()
}
return value
if self.url:
self.url = self.format_block_parameter_template_from_workflow_run_context(
self.url, workflow_run_context, **template_kwargs
)
if self.body:
self.body = cast(dict[str, Any], _render_templates_in_json(self.body))
if self.files:
self.files = cast(dict[str, str], _render_templates_in_json(self.files))
if self.headers:
self.headers = cast(dict[str, str], _render_templates_in_json(self.headers))
if self.download_filename:
self.download_filename = self.format_block_parameter_template_from_workflow_run_context(
self.download_filename, workflow_run_context, **template_kwargs
)
def validate_url(self, url: str) -> bool:
"""Validate if the URL is properly formatted"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception:
return False
async def _execute_file_download(
self,
workflow_run_context: WorkflowRunContext,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None,
) -> BlockResult:
if not self.url:
return await self.build_block_result(
success=False,
failure_reason="URL is required for file download",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
try:
max_size_mb = settings.MAX_HTTP_DOWNLOAD_FILE_SIZE // (1024 * 1024)
output_dir = get_download_dir(workflow_run_id)
file_path = await download_file(
self.url,
max_size_mb=max_size_mb,
headers=self.headers,
output_dir=output_dir,
filename=self.download_filename,
)
response_data = {
"file_path": file_path,
"file_name": os.path.basename(file_path),
"file_size": os.path.getsize(file_path),
}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response_data)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=response_data,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except aiohttp.ClientResponseError as e:
error_data = {"error": f"HTTP {e.status}", "error_type": "http_error"}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
return await self.build_block_result(
success=False,
failure_reason=f"HTTP {e.status}",
output_parameter_value=error_data,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except DownloadFileMaxSizeExceeded as e:
max_size_str = f"{e.max_size:.1f}"
error_data = {"error": f"File exceeds maximum size of {max_size_str}MB", "error_type": "file_too_large"}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
return await self.build_block_result(
success=False,
failure_reason=f"File exceeds maximum size of {max_size_str}MB",
output_parameter_value=error_data,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except Exception as e:
error_data = {"error": str(e), "error_type": "unknown"}
LOG.warning(
"File download failed",
error=str(e),
url=self.url,
workflow_run_id=workflow_run_id,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
return await self.build_block_result(
success=False,
failure_reason=f"File download failed: {str(e)}",
output_parameter_value=error_data,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
"""Execute the HTTP request and return the response"""
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to format jinja template: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Validate URL
if not self.url:
return await self.build_block_result(
success=False,
failure_reason="URL is required for HTTP request",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
if not self.validate_url(self.url):
return await self.build_block_result(
success=False,
failure_reason=f"Invalid URL format: {self.url}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Add default content-type as application/json if not provided (unless files are being uploaded)
if not self.headers:
self.headers = {}
# If files are provided, don't set default Content-Type (aiohttp will set multipart/form-data)
if not self.files:
if not self.headers.get("Content-Type") and not self.headers.get("content-type"):
LOG.info("Adding default content-type as application/json", headers=self.headers)
self.headers["Content-Type"] = "application/json"
# Download files from HTTP URLs or S3 URIs if needed
# Also allow local files from allowed directories (ARTIFACT_STORAGE_PATH, VIDEO_PATH, HAR_PATH, LOG_PATH)
if self.files:
downloaded_files: dict[str, str] = {}
for field_name, file_path in self.files.items():
# Parse file path (handle file:// URI format)
actual_file_path: str | None = None
is_file_uri = file_path.startswith("file://")
if is_file_uri:
try:
actual_file_path = parse_uri_to_path(file_path)
except ValueError as e:
return await self.build_block_result(
success=False,
failure_reason=f"Invalid file URI format: {file_path}. Error: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
else:
actual_file_path = file_path
# Check if file_path is a URL or S3 URI
is_url = (
file_path.startswith("http://") or file_path.startswith("https://") or file_path.startswith("www.")
)
is_s3_uri = file_path.startswith("s3://")
# Check if file is in allowed directories
is_allowed_local_file = False
if actual_file_path:
# Convert to absolute path for comparison (handles both absolute and relative paths)
abs_file_path = os.path.abspath(actual_file_path)
# Get allowed directory paths (using class method for cached result)
allowed_dirs = self.get_allowed_dirs()
LOG.debug("HttpRequestBlock: Allowed directories", allowed_dirs=allowed_dirs)
# Check if file is within any allowed directory
for allowed_dir in allowed_dirs:
# Use os.path.commonpath to check if file is within allowed directory
try:
common_path = os.path.commonpath([abs_file_path, allowed_dir])
if common_path == allowed_dir:
is_allowed_local_file = True
break
except ValueError:
# Paths are on different drives (Windows) or incompatible
continue
# If not URL, S3 URI, or allowed local file, reject
if not (is_url or is_s3_uri or is_allowed_local_file):
return await self.build_block_result(
success=False,
failure_reason=f"No permission to access local file: {file_path}. Only HTTP/HTTPS URLs, S3 URIs, or files in allowed directories are allowed.",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Handle different file sources
if is_allowed_local_file:
# Use local file directly
local_file_path_str: str = cast(str, actual_file_path)
if not os.path.exists(local_file_path_str):
return await self.build_block_result(
success=False,
failure_reason=f"File not found: {local_file_path_str}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
downloaded_files[field_name] = local_file_path_str
LOG.info(
"HttpRequestBlock: Using allowed local file",
field_name=field_name,
file_path=local_file_path_str,
)
else:
# Download from remote source
try:
LOG.info(
"HttpRequestBlock: Downloading file from remote source",
field_name=field_name,
file_path=file_path,
is_url=is_url,
is_s3_uri=is_s3_uri,
)
if is_s3_uri:
local_file_path = await download_from_s3(self.get_async_aws_client(), file_path)
else:
local_file_path = await download_file(file_path)
downloaded_files[field_name] = local_file_path
LOG.info(
"HttpRequestBlock: File downloaded successfully",
field_name=field_name,
original_path=file_path,
local_path=local_file_path,
)
except Exception as e:
return await self.build_block_result(
success=False,
failure_reason=f"Failed to download file {file_path}: {str(e)}",
output_parameter_value=None,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# Update self.files with local file paths
self.files = downloaded_files
if self.save_response_as_file:
return await self._execute_file_download(
workflow_run_context=workflow_run_context,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
try:
LOG.info(
"Executing HTTP request",
method=self.method,
url=self.url,
headers=self.headers,
workflow_run_id=workflow_run_id,
body=self.body,
files=self.files,
)
status_code, response_headers, response_body = await aiohttp_request(
method=self.method,
url=self.url,
headers=self.headers,
data=self.body,
files=self.files,
timeout=self.timeout,
follow_redirects=self.follow_redirects,
)
response_data = {
"status_code": status_code,
"response_headers": response_headers,
"response_body": response_body,
"request_method": self.method,
"request_url": self.url,
"request_headers": self.headers,
"request_body": self.body,
"headers": response_headers,
"body": response_body,
"url": self.url,
}
response_data = workflow_run_context.mask_secrets_in_data(response_data)
LOG.info(
"HTTP request completed",
status_code=status_code,
url=self.url,
method=self.method,
workflow_run_id=workflow_run_id,
response_data=response_data,
)
success = 200 <= status_code < 300
failure_reason = None if success else f"HTTP {status_code}: {response_data.get('response_body', '')}"
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response_data)
return await self.build_block_result(
success=success,
failure_reason=failure_reason,
output_parameter_value=response_data,
status=BlockStatus.completed if success else BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except asyncio.TimeoutError:
error_data = {"error": "Request timed out", "error_type": "timeout"}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
return await self.build_block_result(
success=False,
failure_reason=f"Request timed out after {self.timeout} seconds",
output_parameter_value=error_data,
status=BlockStatus.timed_out,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
except Exception as e:
error_data = {"error": str(e), "error_type": "unknown"}
LOG.warning(
"HTTP request failed with unexpected error",
error=str(e),
url=self.url,
method=self.method,
workflow_run_id=workflow_run_id,
)
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
return await self.build_block_result(
success=False,
failure_reason=f"HTTP request failed: {str(e)}",
output_parameter_value=error_data,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class PrintPageBlock(Block):
block_type: Literal[BlockType.PRINT_PAGE] = BlockType.PRINT_PAGE # type: ignore
include_timestamp: bool = True
custom_filename: str | None = None
format: str = "A4"
landscape: bool = False
print_background: bool = True
parameters: list[PARAMETER_TYPE] = []
VALID_FORMATS: ClassVar[set[str]] = {"A4", "Letter", "Legal", "Tabloid"}
def get_all_parameters(self, workflow_run_id: str) -> list[PARAMETER_TYPE]:
return self.parameters
@staticmethod
def _sanitize_filename(filename: str) -> str:
sanitized = re.sub(r'[<>:"/\\|?*]', "_", filename)
sanitized = sanitized.strip(". ")
return sanitized[:200] if sanitized else "document"
def _build_pdf_options(self) -> dict[str, Any]:
pdf_format = self.format if self.format in self.VALID_FORMATS else "A4"
pdf_options: dict[str, Any] = {
"format": pdf_format,
"landscape": self.landscape,
"print_background": self.print_background,
}
if self.include_timestamp:
pdf_options["display_header_footer"] = True
pdf_options["header_template"] = (
'<div style="font-size:10px;width:100%;display:flex;justify-content:space-between;padding:0 10px;">'
'<span class="date"></span><span class="title"></span><span></span></div>'
)
pdf_options["footer_template"] = (
'<div style="font-size:10px;width:100%;display:flex;justify-content:space-between;padding:0 10px;">'
'<span class="url"></span><span></span><span><span class="pageNumber"></span>/<span class="totalPages"></span></span></div>'
)
pdf_options["margin"] = {"top": "40px", "bottom": "40px"}
return pdf_options
async def _upload_pdf_artifact(
self,
*,
pdf_bytes: bytes,
workflow_run_id: str,
workflow_run_block_id: str,
workflow_run_context: WorkflowRunContext,
organization_id: str | None,
) -> str | None:
artifact_org_id = organization_id or workflow_run_context.organization_id
if not artifact_org_id:
LOG.warning(
"PrintPageBlock: Missing organization_id, skipping artifact upload",
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
)
return None
try:
workflow_run_block = await app.DATABASE.get_workflow_run_block(
workflow_run_block_id,
organization_id=artifact_org_id,
)
except NotFoundError:
LOG.warning(
"PrintPageBlock: Workflow run block not found, skipping artifact upload",
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=artifact_org_id,
)
return None
_, artifact_uri = await app.ARTIFACT_MANAGER.create_workflow_run_block_artifact_with_uri(
workflow_run_block=workflow_run_block,
artifact_type=ArtifactType.PDF,
data=pdf_bytes,
)
try:
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([workflow_run_block.workflow_run_block_id])
except Exception:
LOG.warning(
"PrintPageBlock: Failed to upload PDF artifact",
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block.workflow_run_block_id,
exc_info=True,
)
return None
return artifact_uri
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
browser_state = await self.get_or_create_browser_state(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
if not browser_state:
return await self.build_block_result(
success=False,
failure_reason="No browser state available",
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
page = await browser_state.get_working_page()
if not page:
return await self.build_block_result(
success=False,
failure_reason="No page available",
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
pdf_options = self._build_pdf_options()
try:
pdf_bytes = await page.pdf(**pdf_options)
except Exception as e:
error_msg = str(e)
if "pdf" in error_msg.lower() and ("not supported" in error_msg.lower() or "chromium" in error_msg.lower()):
error_msg = "PDF generation requires Chromium browser. Current browser does not support page.pdf()."
LOG.warning("PrintPageBlock: Failed to generate PDF", error=error_msg, workflow_run_id=workflow_run_id)
return await self.build_block_result(
success=False,
failure_reason=f"Failed to generate PDF: {error_msg}",
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
timestamp_str = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
if self.custom_filename:
filename = self.format_block_parameter_template_from_workflow_run_context(
self.custom_filename, workflow_run_context
)
filename = self._sanitize_filename(filename)
if not filename.endswith(".pdf"):
filename += ".pdf"
else:
filename = f"page_{timestamp_str}.pdf"
# Save PDF to download directory so it appears in runs UI
download_dir = get_download_dir(workflow_run_id)
file_path = os.path.join(download_dir, filename)
async with aiofiles.open(file_path, "wb") as f:
await f.write(pdf_bytes)
# Upload to artifact storage for downstream block access (e.g., File Extraction Block)
artifact_uri = await self._upload_pdf_artifact(
pdf_bytes=pdf_bytes,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
workflow_run_context=workflow_run_context,
organization_id=organization_id,
)
output = {
"filename": filename,
"file_path": file_path,
"size_bytes": len(pdf_bytes),
"artifact_uri": artifact_uri,
}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output)
return await self.build_block_result(
success=True,
failure_reason=None,
output_parameter_value=output,
status=BlockStatus.completed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
class BranchEvaluationContext:
"""Collection of runtime data that BranchCriteria evaluators can consume."""
def __init__(
self,
*,
workflow_run_context: WorkflowRunContext | None = None,
block_label: str | None = None,
template_renderer: Callable[[str], str] | None = None,
) -> None:
self.workflow_run_context = workflow_run_context
self.block_label = block_label
self.template_renderer = template_renderer
def build_llm_safe_context_snapshot(self) -> dict[str, Any]:
"""
Build a minimal context blob for LLM-facing branch evaluation.
Only includes essential data the LLM needs to evaluate conditions:
- Parameter values (base_date, date_1, etc.)
- Extracted information from previous blocks
- Loop variables (current_value, current_index, current_item)
"""
if self.workflow_run_context is None:
return {}
ctx = self.workflow_run_context
raw_values: dict[str, Any] = ctx.values.copy()
# Keys to skip - these are not useful for evaluating conditions
keys_to_skip = {
"blocks_metadata",
"params",
"outputs",
"environment",
"env",
"llm",
"workflow_title",
"workflow_id",
"workflow_permanent_id",
"workflow_run_id",
}
snapshot: dict[str, Any] = {}
for key, value in raw_values.items():
# Skip noisy keys
if key in keys_to_skip:
continue
# For block outputs (dicts with extracted_information), only include extracted_information
if isinstance(value, dict) and "extracted_information" in value:
extracted = value.get("extracted_information")
if extracted is not None:
snapshot[key] = extracted
else:
# Include parameter values directly
snapshot[key] = value
# Copy loop variables (current_value, current_index, current_item) to top level
# Required for pure NatLang expressions like "current_value['date']" to work
if self.block_label:
block_metadata = ctx.get_block_metadata(self.block_label)
if "current_value" in block_metadata:
snapshot["current_value"] = block_metadata["current_value"]
if "current_index" in block_metadata:
snapshot["current_index"] = block_metadata["current_index"]
if "current_item" in block_metadata:
snapshot["current_item"] = block_metadata["current_item"]
# Mask any real secret values that may have leaked into values
snapshot = ctx.mask_secrets_in_data(snapshot)
return snapshot
def build_template_data(self) -> dict[str, Any]:
"""Build Jinja template data mirroring block parameter rendering context."""
if self.workflow_run_context is None:
return {
"params": {},
"outputs": {},
"environment": {},
"env": {},
"llm": {},
}
ctx = self.workflow_run_context
template_data = ctx.values.copy()
if ctx.include_secrets_in_templates:
template_data.update(ctx.secrets)
credential_params: list[tuple[str, dict[str, Any]]] = []
for key, value in template_data.items():
if isinstance(value, dict) and "context" in value and "username" in value and "password" in value:
credential_params.append((key, value))
for key, value in credential_params:
username_secret_id = value.get("username", "")
password_secret_id = value.get("password", "")
real_username = template_data.get(username_secret_id, "")
real_password = template_data.get(password_secret_id, "")
template_data[f"{key}_real_username"] = real_username
template_data[f"{key}_real_password"] = real_password
if self.block_label:
block_reference_data: dict[str, Any] = ctx.get_block_metadata(self.block_label)
if self.block_label in template_data:
current_value = template_data[self.block_label]
if isinstance(current_value, dict):
block_reference_data.update(current_value)
template_data[self.block_label] = block_reference_data
if "current_index" in block_reference_data:
template_data["current_index"] = block_reference_data["current_index"]
if "current_item" in block_reference_data:
template_data["current_item"] = block_reference_data["current_item"]
if "current_value" in block_reference_data:
template_data["current_value"] = block_reference_data["current_value"]
template_data.setdefault("workflow_title", ctx.workflow_title)
template_data.setdefault("workflow_id", ctx.workflow_id)
template_data.setdefault("workflow_permanent_id", ctx.workflow_permanent_id)
template_data.setdefault("workflow_run_id", ctx.workflow_run_id)
template_data.setdefault("current_date", datetime.now(timezone.utc).strftime(CURRENT_DATE_FORMAT))
template_data.setdefault("params", template_data.get("params", {}))
template_data.setdefault("outputs", template_data.get("outputs", {}))
template_data.setdefault("environment", template_data.get("environment", {}))
template_data.setdefault("env", template_data.get("environment"))
template_data.setdefault("llm", template_data.get("llm", {}))
return template_data
class BranchCriteria(BaseModel, abc.ABC):
"""Abstract interface describing how a branch condition should be evaluated."""
criteria_type: str
expression: str
description: str | None = None
@abc.abstractmethod
async def evaluate(self, context: BranchEvaluationContext) -> bool:
"""Return True when the branch should execute."""
raise NotImplementedError
def requires_llm(self) -> bool:
"""Whether the criteria relies on an LLM classification step."""
return False
def _evaluate_truthy_string(value: str) -> bool:
"""
Evaluate a string as a boolean, handling common truthy/falsy representations.
Truthy: "true", "True", "TRUE", "1", "yes", "y", "on", non-zero numbers
Falsy: "", "false", "False", "FALSE", "0", "no", "n", "off", "null", "None", whitespace-only
For other strings, use Python's default bool() behavior (non-empty = truthy).
"""
if not value or not value.strip():
return False
normalized = value.strip().lower()
# Explicit falsy values
if normalized in ("false", "0", "no", "n", "off", "null", "none"):
return False
# Explicit truthy values
if normalized in ("true", "1", "yes", "y", "on"):
return True
# Try to parse as a number
try:
num = float(normalized)
return num != 0.0
except ValueError:
pass
# For any other non-empty string, consider it truthy
# This allows expressions like "{{ 'some text' }}" to be truthy
return True
class JinjaBranchCriteria(BranchCriteria):
"""Jinja2-templated branch criteria (only supported criteria type for now)."""
criteria_type: Literal["jinja2_template"] = "jinja2_template"
async def evaluate(self, context: BranchEvaluationContext) -> bool:
# Prefer the renderer provided by the caller (matches block parameter rendering),
# otherwise build a minimal sandboxed renderer using the evaluation context.
if context.template_renderer:
try:
rendered = context.template_renderer(self.expression)
except MissingJinjaVariables:
# Let upstream MissingJinjaVariables bubble as-is.
raise
except Exception as exc: # pragma: no cover - caught for robustness
raise FailedToFormatJinjaStyleParameter(self.expression, str(exc)) from exc
else:
template_data = context.build_template_data()
sandbox_env = (
SandboxedEnvironment(undefined=StrictUndefined)
if settings.WORKFLOW_TEMPLATING_STRICTNESS == "strict"
else SandboxedEnvironment()
)
try:
missing_vars = get_missing_variables(self.expression, template_data)
if missing_vars:
raise MissingJinjaVariables(self.expression, missing_vars)
template = sandbox_env.from_string(self.expression)
rendered = template.render(template_data)
except MissingJinjaVariables:
raise
except Exception as exc:
# Covers syntax errors and rendering issues
raise FailedToFormatJinjaStyleParameter(self.expression, str(exc)) from exc
return _evaluate_truthy_string(rendered)
class PromptBranchCriteria(BranchCriteria):
"""Natural language branch criteria."""
criteria_type: Literal["prompt"] = "prompt"
async def evaluate(self, context: BranchEvaluationContext) -> bool:
# Natural language criteria are evaluated in batch by ConditionalBlock.execute.
raise NotImplementedError("PromptBranchCriteria is evaluated in batch, not per-branch.")
def requires_llm(self) -> bool:
return True
def _is_pure_jinja_expression(expression: str) -> bool:
"""
Determine if an expression is a pure Jinja template (single block) vs Jinja+NatLang (mixed).
Pure Jinja: "{{ A == B }}" - single Jinja block, should be evaluated server-side
Jinja+NatLang: "{{ A }} is same as {{ B }}" - multiple Jinja blocks mixed with natural language
Returns True only for pure Jinja expressions that can be evaluated to boolean server-side.
"""
if not expression:
return False
stripped = expression.strip()
# Must start with {{ and end with }}
if not (stripped.startswith("{{") and stripped.endswith("}}")):
return False
# Count the number of {{ occurrences
# If there's more than one, it's Jinja+NatLang (e.g., "{{ A }} is same as {{ B }}")
jinja_open_count = stripped.count("{{")
if jinja_open_count > 1:
return False
# Single {{ and ends with }} - this is pure Jinja
return True
def _resolve_nested_path(value: Any, path: str) -> Any:
"""
Resolve a dotted/bracket access path on a nested value.
Examples:
_resolve_nested_path({"a": {"b": 1}}, ".a.b") -> 1
_resolve_nested_path([{"x": 2}], "[0].x") -> 2
Args:
value: The root value to traverse
path: The access path (e.g., ".field1.field2[0].field3")
Returns:
The resolved leaf value
Raises:
LookupError: If the path cannot be resolved
"""
segments = re.findall(r"\.([a-zA-Z_]\w*)|\[(\d+)\]", path)
current = value
for dot_key, bracket_idx in segments:
if dot_key:
if isinstance(current, dict):
if dot_key not in current:
raise LookupError(f"Key {dot_key!r} not found")
current = current[dot_key]
else:
raise LookupError(f"Cannot access .{dot_key} on {type(current).__name__}")
elif bracket_idx:
idx = int(bracket_idx)
if isinstance(current, (list, tuple)):
if idx >= len(current):
raise LookupError(f"Index [{idx}] out of range")
current = current[idx]
else:
raise LookupError(f"Cannot index [{idx}] on {type(current).__name__}")
return current
_JINJA_DISPLAY_FILTERS: dict[str, Callable[[Any], Any]] = {
"lower": lambda v: str(v).lower(),
"upper": lambda v: str(v).upper(),
"trim": lambda v: str(v).strip(),
"title": lambda v: str(v).title(),
"capitalize": lambda v: str(v).capitalize(),
"int": lambda v: int(v),
"float": lambda v: float(v),
"string": lambda v: str(v),
"length": lambda v: len(v),
"abs": lambda v: abs(v),
}
def _render_jinja_expression_for_display(
expression: str,
context_values: dict[str, Any],
block_label: str | None = None,
) -> str:
"""
Render a pure Jinja expression for UI display by substituting variable names with values.
This is for display purposes only - it shows users what values were compared
without actually evaluating the expression. For example:
- Input: "{{ base_date == date_1 }}" with context {"base_date": "01-25-2026", "date_1": "01-25-2026"}
- Output: '"01-25-2026" == "01-25-2026"'
- Input: "{{ output.extracted_information.field != None }}" with nested dict context
- Output: '"some_value" != None'
- Input: "{{ output.status|lower == 'active' }}" with context {"output": {"status": "Active"}}
- Output: '"active" == \'active\''
Known Jinja filters (lower, upper, trim, etc.) are applied to the resolved value.
Unknown filters are left as-is in the output.
Returns the original expression if it's not a pure Jinja expression or if rendering fails.
"""
if not _is_pure_jinja_expression(expression):
return expression
try:
# Extract inner expression (strip {{ and }})
inner_expr = expression.strip()[2:-2].strip()
display_expr = inner_expr
# Substitute variable references (including dotted/bracket access paths and filters)
# with their values.
# Match var_name optionally followed by .field or [index] segments,
# then optionally followed by a |filter_name.
# Sort by key length (longest first) to avoid partial matches.
for var_name in sorted(context_values.keys(), key=len, reverse=True):
pattern = r"\b" + re.escape(var_name) + r"((?:\.[a-zA-Z_]\w*|\[\d+\])*)(\|[a-zA-Z_]\w*)?"
def _replacer(match: re.Match, _var_name: str = var_name) -> str:
access_path = match.group(1) # the dotted/bracket part after var_name
filter_expr = match.group(2) # e.g., "|lower" or None
var_value = context_values[_var_name]
if access_path:
try:
var_value = _resolve_nested_path(var_value, access_path)
except LookupError:
# Path couldn't be resolved — return original text unchanged
return match.group(0)
if filter_expr:
filter_name = filter_expr[1:] # strip the leading |
filter_fn = _JINJA_DISPLAY_FILTERS.get(filter_name)
if filter_fn is not None:
try:
var_value = filter_fn(var_value)
except Exception:
# Filter application failed — show value with filter text
if isinstance(var_value, str):
return f'"{var_value}"{filter_expr}'
return f"{var_value}{filter_expr}"
else:
# Unknown filter — show value with filter text preserved
if isinstance(var_value, str):
return f'"{var_value}"{filter_expr}'
return f"{var_value}{filter_expr}"
if isinstance(var_value, str):
return f'"{var_value}"'
return str(var_value)
display_expr = re.sub(pattern, _replacer, display_expr)
return display_expr
except Exception as exc:
LOG.debug(
"Failed to render Jinja expression for display",
block_label=block_label,
expression=expression,
error=str(exc),
)
return expression
def _find_evaluations_array(output_value: dict[str, Any]) -> list[Any]:
"""
Extract the evaluations array from LLM output.
ExtractionBlock wraps output in 'extracted_information', so we check there first.
Falls back to direct access if not found in the nested structure.
Args:
output_value: The raw output from ExtractionBlock
Returns:
List of evaluation objects from the LLM
Raises:
ValueError: If evaluations array is not found or has wrong type
"""
# Try standard ExtractionBlock format: output_value.extracted_information.evaluations
extracted_info = output_value.get("extracted_information")
if isinstance(extracted_info, dict):
raw_evaluations = extracted_info.get("evaluations")
else:
# Fallback: try direct access at output_value.evaluations
raw_evaluations = output_value.get("evaluations")
if not isinstance(raw_evaluations, list):
raise ValueError(f"Expected array of evaluations, got: {type(raw_evaluations)}")
return raw_evaluations
def _parse_single_evaluation(
evaluation: Any,
idx: int,
fallback_rendered_expressions: list[str],
) -> tuple[bool, str]:
"""
Parse a single evaluation from the LLM response.
Handles two formats:
- Dict format: {result: bool, reasoning: str}
- Legacy format: just a boolean value
The rendered expression always comes from the Jinja pre-rendering step (fallback),
not from the LLM response, to avoid the LLM re-interpreting already-resolved values.
Args:
evaluation: Single evaluation object from LLM (dict or bool)
idx: Index of this evaluation (for fallback lookup)
fallback_rendered_expressions: Pre-rendered expressions from Jinja rendering
Returns:
Tuple of (boolean_result, rendered_expression_string)
"""
rendered_expression = fallback_rendered_expressions[idx] if idx < len(fallback_rendered_expressions) else ""
if isinstance(evaluation, dict):
result = evaluation.get("result")
if isinstance(result, bool):
bool_result = result
else:
bool_result = _evaluate_truthy_string(str(result))
LOG.warning(
"Prompt branch evaluation returned non-boolean result",
branch_index=idx,
result=result,
evaluated_result=bool_result,
)
return (bool_result, rendered_expression)
else:
# Legacy format: just a boolean
if isinstance(evaluation, bool):
bool_result = evaluation
else:
bool_result = _evaluate_truthy_string(str(evaluation))
return (bool_result, rendered_expression)
# Pattern to find Jinja template blocks like {{ variable_name }}
_JINJA_BLOCK_RE = re.compile(r"\{\{(.*?)\}\}")
# Marker inserted into rendered expressions when a Jinja variable resolved to
# an empty/whitespace-only value. The LLM uses this to reason about emptiness.
_EMPTY_VALUE_MARKER = "(empty value)"
def _make_empty_params_explicit(
original_expression: str,
rendered_expression: str,
) -> tuple[str, bool]:
"""
Detect Jinja template variables that resolved to empty values and replace
the empty gaps with explicit ``(empty value)`` markers.
When ``{{test_parameter}}`` resolves to ``""``, the rendered expression becomes
malformed (e.g., ``"if is not empty"``). This function detects such cases by
comparing the *original* expression (with ``{{ }}`` blocks) against the
*rendered* expression and rebuilds it with clear markers so the LLM can
evaluate the condition correctly.
Returns:
``(patched_expression, was_patched)``
"""
if not original_expression or "{{" not in original_expression:
return rendered_expression, False
# Split the original expression into alternating [static, var, static, var, ...] parts.
parts = _JINJA_BLOCK_RE.split(original_expression)
if len(parts) <= 1:
return rendered_expression, False
# Extract static parts (even indices) and build a regex that captures what
# each Jinja block rendered to by using the static text as anchors.
static_parts = [parts[i] for i in range(0, len(parts), 2)]
num_vars = len(parts) // 2
# When two Jinja variables are adjacent (e.g. "{{a}}{{b}}") the interior
# static separator is an empty string and the non-greedy regex cannot
# reliably attribute rendered text to the correct variable. Bail out.
if num_vars > 1 and any(static == "" for static in static_parts[1:-1]):
return rendered_expression, False
# NOTE: if a rendered value happens to contain the same text as a static
# anchor the regex may split on the wrong occurrence. This is extremely
# unlikely in user-authored conditional expressions and the worst-case
# outcome is an unnecessary "(empty value)" marker, which still beats the
# invisible empty-string that caused SKY-8073.
regex_fragments: list[str] = []
for i, static in enumerate(static_parts):
regex_fragments.append(re.escape(static))
if i < num_vars:
regex_fragments.append("(.*?)")
match = re.match("^" + "".join(regex_fragments) + "$", rendered_expression, re.DOTALL)
if not match:
return rendered_expression, False
rendered_values = match.groups()
has_empty = any(not v.strip() for v in rendered_values)
if not has_empty:
return rendered_expression, False
# Rebuild the expression, replacing empty rendered values with an explicit marker.
result_parts: list[str] = []
for i, static in enumerate(static_parts):
result_parts.append(static)
if i < len(rendered_values):
if not rendered_values[i].strip():
result_parts.append(_EMPTY_VALUE_MARKER)
else:
result_parts.append(rendered_values[i])
return "".join(result_parts), True
class BranchCondition(BaseModel):
"""Represents a single conditional branch edge within a ConditionalBlock."""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
criteria: BranchCriteriaTypeVar | None = None
next_block_label: str | None = None
description: str | None = None
is_default: bool = False
@model_validator(mode="after")
def validate_condition(cls, condition_obj: BranchCondition) -> BranchCondition:
if isinstance(condition_obj.criteria, dict):
criteria_type = condition_obj.criteria.get("criteria_type")
if criteria_type is None:
# Infer criteria type from expression format
expression = condition_obj.criteria.get("expression", "")
if _is_pure_jinja_expression(expression):
criteria_type = "jinja2_template"
else:
criteria_type = "prompt"
if criteria_type == "prompt":
condition_obj.criteria = PromptBranchCriteria(**condition_obj.criteria)
else:
condition_obj.criteria = JinjaBranchCriteria(**condition_obj.criteria)
if condition_obj.criteria is None and not condition_obj.is_default:
raise ValueError("Branches without criteria must be marked as default.")
if condition_obj.criteria is not None and condition_obj.is_default:
raise ValueError("Default branches may not define criteria.")
if condition_obj.criteria and isinstance(condition_obj.criteria, BranchCriteria):
expression = condition_obj.criteria.expression
criteria_dict = condition_obj.criteria.model_dump()
if _is_pure_jinja_expression(expression):
criteria_dict["criteria_type"] = "jinja2_template"
condition_obj.criteria = JinjaBranchCriteria(**criteria_dict)
else:
criteria_dict["criteria_type"] = "prompt"
condition_obj.criteria = PromptBranchCriteria(**criteria_dict)
return condition_obj
class ConditionalBlock(Block):
"""Branching block that selects the next block label based on list-ordered conditions."""
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.CONDITIONAL] = BlockType.CONDITIONAL # type: ignore
branch_conditions: list[BranchCondition] = Field(default_factory=list)
@model_validator(mode="after")
def validate_branches(cls, block: ConditionalBlock) -> ConditionalBlock:
if not block.branch_conditions:
raise ValueError("Conditional blocks require at least one branch.")
default_branches = [branch for branch in block.branch_conditions if branch.is_default]
if len(default_branches) > 1:
raise ValueError("Only one default branch is permitted per conditional block.")
return block
def get_all_parameters(
self,
workflow_run_id: str, # noqa: ARG002 - preserved for interface compatibility
) -> list[PARAMETER_TYPE]:
# BranchCriteria subclasses will surface their parameter dependencies once implemented.
return []
async def _evaluate_prompt_branches(
self,
*,
branches: list[BranchCondition],
evaluation_context: BranchEvaluationContext,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
) -> tuple[list[bool], list[str], str | None, dict | None]:
"""
Evaluate natural language branch conditions in batch.
All prompt-based conditions are batched into ONE LLM call for performance.
Jinja parts ({{ }}) are pre-rendered before sending to LLM.
Evaluation strategy:
- If any condition is pure natural language, use ExtractionBlock for browser/page context.
- If all conditions contain Jinja and are pre-rendered, use direct LLM call (no browser context).
Returns:
A tuple of (results, rendered_expressions, extraction_goal, llm_response):
- results: List of boolean results for each branch
- rendered_expressions: List of expressions after Jinja pre-rendering
- extraction_goal: The prompt sent to the LLM (for UI display)
- llm_response: The raw LLM response for debugging
"""
if organization_id is None:
raise ValueError("organization_id is required to evaluate natural language branches")
if not branches:
return ([], [], None, None)
workflow_run_context = evaluation_context.workflow_run_context
# Step 1: Pre-render all expressions (resolve any Jinja {{ }} parts)
rendered_expressions: list[str] = []
has_any_pure_natlang = False
for idx, branch in enumerate(branches):
expression = branch.criteria.expression if branch.criteria else ""
has_jinja = "{{" in expression
if has_jinja:
try:
rendered_expression = (
evaluation_context.template_renderer(expression)
if evaluation_context.template_renderer
else expression
)
except Exception as render_exc:
LOG.error(
"Conditional branch expression rendering FAILED",
block_label=self.label,
branch_index=idx,
original_expression=expression,
error=str(render_exc),
exc_info=True,
)
rendered_expression = expression
# Rendering failed, so this expression is effectively unresolved and must
# take the ExtractionBlock path (with context) instead of direct LLM mode.
has_any_pure_natlang = True
else:
# When a Jinja variable resolves to an empty string the rendered
# expression becomes malformed (e.g. "if is not empty") and the
# LLM cannot reason about emptiness correctly. Replace empty gaps
# with an explicit "(empty value)" marker so the intent is clear.
rendered_expression, was_patched = _make_empty_params_explicit(expression, rendered_expression)
if was_patched:
LOG.info(
"Conditional branch expression patched for empty parameter(s)",
workflow_run_id=workflow_run_id,
block_label=self.label,
branch_index=idx,
original_expression=expression,
patched_expression=rendered_expression,
)
else:
rendered_expression = expression
has_any_pure_natlang = True
LOG.info(
"Conditional branch expression rendering",
block_label=self.label,
branch_index=idx,
original_expression=expression,
rendered_expression=rendered_expression,
has_jinja=has_jinja,
expression_changed=expression != rendered_expression,
)
rendered_expressions.append(rendered_expression)
# Step 2: Build extraction goal with all conditions
# Include context only if there are pure NatLang expressions that need variable resolution
if has_any_pure_natlang:
context_snapshot = evaluation_context.build_llm_safe_context_snapshot()
context_json = json.dumps(context_snapshot, default=str)
else:
context_json = None
extraction_goal = prompt_engine.load_prompt(
"conditional-prompt-branch-evaluation",
conditions=rendered_expressions,
context_json=context_json,
)
# Step 3: Build schema for array of evaluation results
# Order matters: reasoning -> result (chain-of-thought)
data_schema = {
"type": "object",
"properties": {
"evaluations": {
"type": "array",
"items": {
"type": "object",
"properties": {
"reasoning": {
"type": "string",
"description": "Explanation of the reasoning behind evaluating the condition.",
},
"result": {
"type": "boolean",
"description": "TRUE if the condition is satisfied, FALSE otherwise.",
},
},
"required": ["reasoning", "result"],
},
"description": "Array of evaluation results for each condition in the same order.",
"minItems": len(branches),
"maxItems": len(branches),
}
},
"required": ["evaluations"],
}
# Step 4: Create and execute single ExtractionBlock.
# When all expressions have been Jinja-rendered successfully, omit
# browser_session_id so the LLM won't reinterpret resolved literal
# values as on-screen references (SKY-7985).
effective_browser_session_id = browser_session_id if has_any_pure_natlang else None
output_param = OutputParameter(
output_parameter_id=str(uuid.uuid4()),
key=f"conditional_branch_eval_{generate_random_string()}",
workflow_id=self.output_parameter.workflow_id,
created_at=datetime.now(),
modified_at=datetime.now(),
parameter_type=ParameterType.OUTPUT,
description=f"Conditional branch evaluation results ({len(branches)} conditions)",
)
extraction_block = ExtractionBlock(
label=f"conditional_branch_eval_{generate_random_string()}",
data_extraction_goal=extraction_goal,
data_schema=data_schema,
output_parameter=output_param,
)
LOG.info(
"Conditional branch ExtractionBlock created (batched)",
block_label=self.label,
num_conditions=len(branches),
extraction_goal_preview=extraction_goal[:500] if extraction_goal else None,
has_browser_session=effective_browser_session_id is not None,
has_any_pure_natlang=has_any_pure_natlang,
has_context=context_json is not None,
)
try:
extraction_result = await extraction_block.execute(
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
browser_session_id=effective_browser_session_id,
)
if not extraction_result.success:
LOG.error(
"Conditional branch ExtractionBlock failed",
block_label=self.label,
failure_reason=extraction_result.failure_reason,
)
raise ValueError(
f"Branch evaluation failed: "
f"{extraction_result.failure_reason or 'Unknown error (no failure reason provided)'}"
)
if workflow_run_context:
try:
await extraction_block.record_output_parameter_value(
workflow_run_context=workflow_run_context,
workflow_run_id=workflow_run_id,
value=extraction_result.output_parameter_value,
)
except Exception:
LOG.warning(
"Failed to record conditional branch evaluation output",
workflow_run_id=workflow_run_id,
block_label=self.label,
exc_info=True,
)
output_value = extraction_result.output_parameter_value
# Step 5: Extract the evaluation results (reasoning + result)
results_array: list[bool] = []
llm_rendered_expressions: list[str] = []
if isinstance(output_value, list):
output_value = {"evaluations": output_value}
if not isinstance(output_value, dict):
raise ValueError(f"Unexpected output format: {type(output_value)}")
# Find evaluations array from LLM output (handles ExtractionBlock nesting)
raw_evaluations = _find_evaluations_array(output_value)
# Parse each evaluation to extract result (rendered expression comes from Jinja pre-rendering)
for idx, evaluation in enumerate(raw_evaluations):
bool_result, rendered_expr = _parse_single_evaluation(
evaluation=evaluation,
idx=idx,
fallback_rendered_expressions=rendered_expressions,
)
results_array.append(bool_result)
llm_rendered_expressions.append(rendered_expr)
LOG.info(
"Conditional branch evaluation results",
block_label=self.label,
results=results_array,
llm_rendered_expressions=llm_rendered_expressions,
raw_output=output_value,
)
if len(results_array) != len(branches):
raise ValueError(
f"Prompt branch evaluation returned {len(results_array)} results for {len(branches)} branches"
)
return (results_array, llm_rendered_expressions, extraction_goal, output_value)
except Exception as exc:
LOG.error(
"Conditional branch prompt evaluation failed",
block_label=self.label,
error=str(exc),
exc_info=True,
)
raise ValueError(f"Prompt branch evaluation failed: {str(exc)}") from exc
async def execute( # noqa: D401
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
"""
Evaluate conditional branches and determine next block to execute.
Returns a BlockResult with branch metadata in the output_parameter_value.
"""
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
evaluation_context = BranchEvaluationContext(
workflow_run_context=workflow_run_context,
block_label=self.label,
template_renderer=(
lambda potential_template: self.format_block_parameter_template_from_workflow_run_context(
potential_template,
workflow_run_context,
)
)
if workflow_run_context
else None,
)
matched_branch = None
failure_reason: str | None = None
# Track all branch evaluations for UI display
branch_evaluations_list: list[dict] = []
prompt_rendered_by_id: dict[str, str] = {}
natural_language_branches = [
branch for branch in self.ordered_branches if isinstance(branch.criteria, PromptBranchCriteria)
]
prompt_results_by_id: dict[str, bool] = {}
prompt_llm_response: dict | None = None
prompt_extraction_goal: str | None = None
if natural_language_branches:
try:
(
prompt_results,
prompt_rendered_expressions,
prompt_extraction_goal,
prompt_llm_response,
) = await self._evaluate_prompt_branches(
branches=natural_language_branches,
evaluation_context=evaluation_context,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
prompt_results_by_id = {
branch.id: result for branch, result in zip(natural_language_branches, prompt_results, strict=False)
}
prompt_rendered_by_id = {
branch.id: rendered
for branch, rendered in zip(natural_language_branches, prompt_rendered_expressions, strict=False)
}
except Exception as exc:
failure_reason = f"Failed to evaluate natural language branches: {str(exc)}"
LOG.error(
"Failed to evaluate natural language branches",
block_label=self.label,
error=str(exc),
exc_info=True,
)
for idx, branch in enumerate(self.ordered_branches):
branch_eval: dict = {
"branch_id": branch.id,
"branch_index": idx,
"criteria_type": branch.criteria.criteria_type if branch.criteria else None,
"original_expression": branch.criteria.expression if branch.criteria else None,
"rendered_expression": None,
"result": None,
"is_matched": False,
"is_default": branch.is_default,
"next_block_label": branch.next_block_label,
"error": None,
}
# Handle default branch (no criteria to evaluate)
if branch.criteria is None:
# Default branch - only matched if no other branch matches
branch_evaluations_list.append(branch_eval)
continue
if branch.criteria.criteria_type == "prompt":
if failure_reason:
branch_eval["error"] = failure_reason
branch_evaluations_list.append(branch_eval)
break
prompt_result = prompt_results_by_id.get(branch.id)
rendered_expr = prompt_rendered_by_id.get(branch.id)
branch_eval["rendered_expression"] = rendered_expr
if prompt_result is None:
failure_reason = "Missing result for natural language branch evaluation"
branch_eval["error"] = failure_reason
LOG.error(
"Missing prompt evaluation result",
block_label=self.label,
branch_index=idx,
branch_id=branch.id,
)
branch_evaluations_list.append(branch_eval)
break
branch_eval["result"] = prompt_result
branch_evaluations_list.append(branch_eval)
if prompt_result:
matched_branch = branch
branch_eval["is_matched"] = True
LOG.info(
"Conditional natural language branch matched",
block_label=self.label,
branch_index=idx,
next_block_label=branch.next_block_label,
)
break
continue
# Jinja template branch
try:
# Render the expression for UI display - substitute variables without evaluating
rendered_expression = _render_jinja_expression_for_display(
expression=branch.criteria.expression,
context_values=evaluation_context.workflow_run_context.values
if evaluation_context.workflow_run_context
else {},
block_label=self.label,
)
branch_eval["rendered_expression"] = rendered_expression
result = await branch.criteria.evaluate(evaluation_context)
branch_eval["result"] = result
branch_evaluations_list.append(branch_eval)
if result:
matched_branch = branch
branch_eval["is_matched"] = True
LOG.info(
"Conditional branch matched",
block_label=self.label,
branch_index=idx,
next_block_label=branch.next_block_label,
)
break
except Exception as exc:
failure_reason = f"Failed to evaluate branch {idx} for {self.label}: {str(exc)}"
branch_eval["error"] = str(exc)
branch_eval["result"] = None
branch_evaluations_list.append(branch_eval)
LOG.error(
"Failed to evaluate conditional branch",
block_label=self.label,
branch_index=idx,
error=str(exc),
exc_info=True,
)
break
if matched_branch is None and failure_reason is None:
matched_branch = self.get_default_branch()
# Update is_matched for default branch in evaluations
if matched_branch:
for eval_entry in branch_evaluations_list:
if eval_entry["branch_id"] == matched_branch.id:
eval_entry["is_matched"] = True
break
matched_index = self.ordered_branches.index(matched_branch) if matched_branch in self.ordered_branches else None
next_block_label = matched_branch.next_block_label if matched_branch else None
executed_branch_id = matched_branch.id if matched_branch else None
# Extract execution details for frontend display
executed_branch_expression: str | None = None
executed_branch_result: bool | None = None
executed_branch_next_block: str | None = None
if matched_branch:
executed_branch_next_block = matched_branch.next_block_label
if matched_branch.is_default:
# Default/else branch - no expression to evaluate
executed_branch_expression = None
executed_branch_result = None
elif matched_branch.criteria:
# Regular condition branch - it matched
executed_branch_expression = matched_branch.criteria.expression
executed_branch_result = True
branch_metadata: BlockMetadata = {
"branch_taken": next_block_label,
"branch_index": matched_index,
"branch_id": executed_branch_id,
"branch_description": matched_branch.description if matched_branch else None,
"criteria_type": matched_branch.criteria.criteria_type
if matched_branch and matched_branch.criteria
else None,
"criteria_expression": matched_branch.criteria.expression
if matched_branch and matched_branch.criteria
else None,
"next_block_label": next_block_label,
# Detailed evaluation info for all branches
"evaluations": branch_evaluations_list if branch_evaluations_list else None,
# Raw LLM response for debugging prompt-based evaluations (masked for secrets)
"llm_response": (
workflow_run_context.mask_secrets_in_data(prompt_llm_response)
if workflow_run_context and prompt_llm_response
else prompt_llm_response
),
# The exact prompt sent to LLM for debugging (masked for secrets)
"llm_prompt": (
workflow_run_context.mask_secrets_in_data(prompt_extraction_goal)
if workflow_run_context and prompt_extraction_goal
else prompt_extraction_goal
),
}
status = BlockStatus.completed
success = True
if failure_reason:
status = BlockStatus.failed
success = False
elif matched_branch is None:
failure_reason = "No conditional branch matched and no default branch configured"
status = BlockStatus.failed
success = False
if workflow_run_context:
workflow_run_context.update_block_metadata(self.label, branch_metadata)
try:
await self.record_output_parameter_value(
workflow_run_context=workflow_run_context,
workflow_run_id=workflow_run_id,
value=branch_metadata,
)
except Exception as exc:
LOG.warning(
"Failed to record branch metadata as output parameter",
workflow_run_id=workflow_run_id,
block_label=self.label,
error=str(exc),
)
block_result = await self.build_block_result(
success=success,
failure_reason=failure_reason,
output_parameter_value=branch_metadata,
status=status,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
executed_branch_id=executed_branch_id,
executed_branch_expression=executed_branch_expression,
executed_branch_result=executed_branch_result,
executed_branch_next_block=executed_branch_next_block,
)
return block_result
@property
def ordered_branches(self) -> list[BranchCondition]:
"""Convenience accessor that returns branches in author-specified list order."""
return list(self.branch_conditions)
def get_default_branch(self) -> BranchCondition | None:
"""Return the default/else branch when configured."""
return next((branch for branch in self.branch_conditions if branch.is_default), None)
class WorkflowTriggerBlock(Block):
# There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error:
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.WORKFLOW_TRIGGER] = BlockType.WORKFLOW_TRIGGER # type: ignore
# The permanent ID of the target workflow to trigger
workflow_permanent_id: str
# Parameters/payload to pass to the triggered workflow
payload: dict[str, Any] | None = None
# Whether to wait for the triggered workflow to complete
wait_for_completion: bool = True
# Optional browser session ID for the triggered workflow
browser_session_id: str | None = None
# When True, the child workflow inherits the parent's browser session
use_parent_browser_session: bool = False
# Parameters for Jinja2 template interpolation
parameters: list[PARAMETER_TYPE] = []
MAX_TRIGGER_DEPTH: ClassVar[int] = 10
def get_all_parameters(
self,
workflow_run_id: str,
) -> list[PARAMETER_TYPE]:
return self.parameters
async def _check_trigger_depth(self, workflow_run_id: str) -> int:
"""Check the nesting depth of workflow triggers to prevent infinite recursion.
Note: This depth guard walks the parent_workflow_run_id chain, which is only
populated for synchronous triggers. For async (fire-and-forget) dispatch, the
parent may have already completed before the child runs, so circular async
chains (A->B->A) are only blocked while A is still running. A full
visited-workflow guard would require persistent state and is left as a future
enhancement.
"""
depth = 0
current_run_id: str | None = workflow_run_id
while current_run_id:
if depth >= self.MAX_TRIGGER_DEPTH:
raise InvalidWorkflowDefinition(
f"Workflow trigger depth exceeds maximum of {self.MAX_TRIGGER_DEPTH}. "
"This may indicate a circular workflow trigger chain."
)
run = await app.DATABASE.get_workflow_run(current_run_id)
if not run or not run.parent_workflow_run_id:
break
current_run_id = run.parent_workflow_run_id
depth += 1
return depth
def _render_template_value(
self,
value: str,
workflow_run_context: WorkflowRunContext,
) -> Any:
"""Render a single Jinja2 template string, handling the | json filter marker."""
rendered = self.format_block_parameter_template_from_workflow_run_context(
value, workflow_run_context, force_include_secrets=True
)
if rendered.startswith(_JSON_TYPE_MARKER) and rendered.endswith(_JSON_TYPE_MARKER):
json_str = rendered[len(_JSON_TYPE_MARKER) : -len(_JSON_TYPE_MARKER)]
try:
return json.loads(json_str)
except json.JSONDecodeError:
raise FailedToFormatJinjaStyleParameter(value, f"Raw JSON filter produced invalid JSON: {json_str}")
elif _JSON_TYPE_MARKER in rendered:
raise FailedToFormatJinjaStyleParameter(
value,
"The '| json' filter can only be used for complete value replacement. "
"It cannot be combined with other text (e.g., 'prefix-{{ val | json }}'). "
"Remove the surrounding text or remove the '| json' filter.",
)
return rendered
def _render_templates_in_payload(
self,
payload: dict[str, Any],
workflow_run_context: WorkflowRunContext,
) -> dict[str, Any]:
"""Recursively render Jinja2 templates in payload values."""
resolved: dict[str, Any] = {}
for key, value in payload.items():
if isinstance(value, str):
resolved[key] = self._render_template_value(value, workflow_run_context)
elif isinstance(value, dict):
resolved[key] = self._render_templates_in_payload(value, workflow_run_context)
elif isinstance(value, list):
resolved[key] = self._render_templates_in_list(value, workflow_run_context)
else:
resolved[key] = value
return resolved
def _render_templates_in_list(
self,
items: list[Any],
workflow_run_context: WorkflowRunContext,
) -> list[Any]:
"""Recursively render Jinja2 templates in list items (strings, nested dicts, and nested lists)."""
result: list[Any] = []
for item in items:
if isinstance(item, str):
result.append(self._render_template_value(item, workflow_run_context))
elif isinstance(item, dict):
result.append(self._render_templates_in_payload(item, workflow_run_context))
elif isinstance(item, list):
result.append(self._render_templates_in_list(item, workflow_run_context))
else:
result.append(item)
return result
def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None:
self.workflow_permanent_id = self.format_block_parameter_template_from_workflow_run_context(
self.workflow_permanent_id, workflow_run_context, force_include_secrets=True
)
if self.payload:
self.payload = self._render_templates_in_payload(self.payload, workflow_run_context)
if self.browser_session_id:
self.browser_session_id = self.format_block_parameter_template_from_workflow_run_context(
self.browser_session_id, workflow_run_context, force_include_secrets=True
)
async def execute(
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRequestBody, WorkflowRunStatus # noqa: PLC0415
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
# Helper to record output and build a failed block result in one step.
# This ensures downstream blocks referencing block_X_output see the
# failure reason instead of "parameter not found".
async def _fail(failure_reason: str) -> BlockResult:
error_output = {"failure_reason": failure_reason}
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_output)
return await self.build_block_result(
success=False,
failure_reason=failure_reason,
output_parameter_value=error_output,
status=BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
# 1. Resolve Jinja2 templates
try:
self.format_potential_template_parameters(workflow_run_context)
except Exception as e:
return await _fail(f"Failed to resolve templates: {str(e)}")
resolved_workflow_permanent_id = self.workflow_permanent_id
resolved_payload = self.payload
# 2. Check recursion depth
try:
await self._check_trigger_depth(workflow_run_id)
except InvalidWorkflowDefinition as e:
return await _fail(str(e))
# 3. Get the organization
if not organization_id:
return await _fail("organization_id is required for WorkflowTriggerBlock")
organization = await app.DATABASE.get_organization(organization_id)
if not organization:
return await _fail(f"Organization {organization_id} not found")
# 4. Resolve browser session
# Browser session priority:
# 1. Explicit browser_session_id configured on the block
# 2. use_parent_browser_session → inherit parent's session (persistent
# or in-memory via self.pages[parent_workflow_run_id] lookup)
# 3. Neither → for sync (wait_for_completion), create a fresh persistent
# session; for async (fire-and-forget), let the child's Temporal worker
# handle its own browser.
created_fresh_session = False
if self.browser_session_id:
resolved_browser_session_id = self.browser_session_id
elif self.use_parent_browser_session and browser_session_id:
resolved_browser_session_id = browser_session_id
elif self.use_parent_browser_session:
# Parent uses an in-memory browser (no persistent session).
# Pass None so the child inherits via the parent_workflow_run_id
# lookup in get_or_create_for_workflow_run.
resolved_browser_session_id = None
elif self.wait_for_completion:
# Sync mode: child runs inline in the same process, so it needs
# its own persistent session to avoid sharing the parent's browser.
parent_workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id)
proxy_location = parent_workflow_run.proxy_location if parent_workflow_run else None
try:
child_browser_session = await app.PERSISTENT_SESSIONS_MANAGER.create_session(
organization_id=organization_id,
proxy_location=proxy_location,
timeout_minutes=30,
)
resolved_browser_session_id = child_browser_session.persistent_browser_session_id
created_fresh_session = True
LOG.info(
"Created fresh browser session for triggered workflow",
parent_workflow_run_id=workflow_run_id,
child_browser_session_id=resolved_browser_session_id,
)
except Exception as e:
return await _fail(f"Failed to create browser session for triggered workflow: {str(e)}")
else:
# Async (fire-and-forget): the child runs in its own Temporal worker
# and will create its own browser. No pre-creation needed.
resolved_browser_session_id = None
# 5. Execute based on wait mode
output_data: dict[str, Any] = {}
success = False
if self.wait_for_completion:
# Synchronous: setup + execute inline in the same process.
workflow_request = WorkflowRequestBody(
data=resolved_payload,
browser_session_id=resolved_browser_session_id,
)
# Save the parent's skyvern_context because setup_workflow_run and
# execute_workflow overwrite it with the child's values. We restore
# it after the child finishes so subsequent parent blocks get correct
# context (logs, observability, workflow_run_id, etc.).
from skyvern.forge.sdk.core import skyvern_context # noqa: PLC0415
parent_context = skyvern_context.current()
try:
triggered_workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run(
request_id=None,
workflow_request=workflow_request,
workflow_permanent_id=resolved_workflow_permanent_id,
organization=organization,
parent_workflow_run_id=workflow_run_id,
)
except Exception as e:
error_msg = get_user_facing_exception_message(e)
if parent_context:
skyvern_context.set(parent_context)
if created_fresh_session and resolved_browser_session_id:
try:
await app.PERSISTENT_SESSIONS_MANAGER.close_session(
organization_id, resolved_browser_session_id
)
except Exception:
LOG.warning(
"Failed to close child browser session after setup failure",
child_browser_session_id=resolved_browser_session_id,
exc_info=True,
)
return await _fail(f"Failed to setup triggered workflow run: {error_msg}")
triggered_run_id = triggered_workflow_run.workflow_run_id
LOG.info(
"Triggered workflow run (sync)",
parent_workflow_run_id=workflow_run_id,
triggered_workflow_run_id=triggered_run_id,
triggered_workflow_permanent_id=resolved_workflow_permanent_id,
)
try:
final_run = await app.WORKFLOW_SERVICE.execute_workflow(
workflow_run_id=triggered_run_id,
api_key=None,
organization=organization,
browser_session_id=resolved_browser_session_id,
)
success = final_run.status == WorkflowRunStatus.completed
output_data = {
"workflow_run_id": triggered_run_id,
"workflow_permanent_id": resolved_workflow_permanent_id,
"status": str(final_run.status),
"failure_reason": final_run.failure_reason,
}
# Include the child workflow's output parameters so downstream
# blocks can reference them (e.g. block_3_output.outputs.block_2_output)
try:
child_output_params = (
await app.WORKFLOW_SERVICE.get_output_parameter_workflow_run_output_parameter_tuples(
workflow_id=final_run.workflow_id,
workflow_run_id=triggered_run_id,
)
)
child_outputs: dict[str, Any] = {}
for output_param, run_output_param in child_output_params:
child_outputs[output_param.key] = run_output_param.value
output_data["outputs"] = child_outputs
except Exception:
LOG.warning(
"Failed to fetch child workflow outputs",
triggered_workflow_run_id=triggered_run_id,
exc_info=True,
)
except Exception as e:
error_msg = get_user_facing_exception_message(e)
output_data = {
"workflow_run_id": triggered_run_id,
"workflow_permanent_id": resolved_workflow_permanent_id,
"status": "failed",
"failure_reason": f"Triggered workflow execution failed: {error_msg}",
}
success = False
finally:
if parent_context:
skyvern_context.set(parent_context)
if created_fresh_session and resolved_browser_session_id:
try:
await app.PERSISTENT_SESSIONS_MANAGER.close_session(
organization_id, resolved_browser_session_id
)
except Exception:
LOG.warning(
"Failed to close child browser session",
child_browser_session_id=resolved_browser_session_id,
triggered_workflow_run_id=triggered_run_id,
exc_info=True,
)
else:
# Fire and forget: dispatch the child workflow via Temporal so it
# gets its own independent worker process. This ensures the child
# survives even if the parent workflow finishes first.
# NOTE: This path requires Temporal (cloud). On self-hosted
# (BackgroundTaskExecutor), the workflow run record is created but
# execution is silently skipped because background_tasks=None.
from skyvern.services.workflow_service import run_workflow # noqa: PLC0415
workflow_request = WorkflowRequestBody(
data=resolved_payload,
browser_session_id=resolved_browser_session_id,
)
try:
triggered_workflow_run = await run_workflow(
workflow_id=resolved_workflow_permanent_id,
organization=organization,
workflow_request=workflow_request,
request=None,
background_tasks=None,
parent_workflow_run_id=workflow_run_id,
)
except Exception as e:
error_msg = get_user_facing_exception_message(e)
return await _fail(f"Failed to dispatch triggered workflow: {error_msg}")
triggered_run_id = triggered_workflow_run.workflow_run_id
LOG.info(
"Async workflow dispatch succeeded (via Temporal)",
parent_workflow_run_id=workflow_run_id,
triggered_workflow_run_id=triggered_run_id,
triggered_workflow_permanent_id=resolved_workflow_permanent_id,
)
output_data = {
"workflow_run_id": triggered_run_id,
"workflow_permanent_id": resolved_workflow_permanent_id,
"status": "queued",
}
success = True
await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_data)
return await self.build_block_result(
success=success,
failure_reason=output_data.get("failure_reason") if not success else None,
output_parameter_value=output_data,
status=BlockStatus.completed if success else BlockStatus.failed,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
)
def get_all_blocks(blocks: list[BlockTypeVar]) -> list[BlockTypeVar]:
"""
Recursively get "all blocks" in a workflow definition.
At time of writing, blocks can be nested via the ForLoop block. This function
returns all blocks, flattened.
"""
all_blocks: list[BlockTypeVar] = []
for block in blocks:
all_blocks.append(block)
if block.block_type == BlockType.FOR_LOOP:
nested_blocks = get_all_blocks(block.loop_blocks)
all_blocks.extend(nested_blocks)
return all_blocks
BlockSubclasses = Union[
ConditionalBlock,
ForLoopBlock,
TaskBlock,
CodeBlock,
TextPromptBlock,
DownloadToS3Block,
UploadToS3Block,
SendEmailBlock,
FileParserBlock,
PDFParserBlock,
ValidationBlock,
ActionBlock,
NavigationBlock,
ExtractionBlock,
LoginBlock,
WaitBlock,
HumanInteractionBlock,
FileDownloadBlock,
UrlBlock,
TaskV2Block,
FileUploadBlock,
HttpRequestBlock,
PrintPageBlock,
WorkflowTriggerBlock,
]
BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]
BranchCriteriaSubclasses = Union[JinjaBranchCriteria, PromptBranchCriteria]
BranchCriteriaTypeVar = Annotated[BranchCriteriaSubclasses, Field(discriminator="criteria_type")]