# skyvern_codegen_cst.py """ Generate a runnable Skyvern workflow script. Example ------- generated_code = generate_workflow_script( file_name="workflow.py", workflow_run_request=workflow_run_request, workflow=workflow, tasks=tasks, actions_by_task=actions_by_task, ) Path("workflow.py").write_text(src) """ from __future__ import annotations import hashlib import keyword from typing import Any import libcst as cst import structlog from libcst import Attribute, Call, Dict, DictElement, FunctionDef, Name, Param from skyvern.config import settings from skyvern.core.script_generations.constants import SCRIPT_TASK_BLOCKS from skyvern.core.script_generations.generate_workflow_parameters import ( generate_workflow_parameters_schema, hydrate_input_text_actions_with_field_names, ) from skyvern.forge import app from skyvern.webeye.actions.action_types import ActionType LOG = structlog.get_logger(__name__) # --------------------------------------------------------------------- # # 1. helpers # # --------------------------------------------------------------------- # ACTION_MAP = { "click": "click", "input_text": "fill", "upload_file": "upload_file", "select_option": "select_option", "goto": "goto", "scroll": "scroll", "keypress": "keypress", "type": "type", "move": "move", "drag": "drag", "solve_captcha": "solve_captcha", "verification_code": "verification_code", "wait": "wait", "extract": "extract", } ACTIONS_WITH_XPATH = [ "click", "input_text", "type", "fill", "upload_file", "select_option", ] INDENT = " " * 4 DOUBLE_INDENT = " " * 8 def _safe_name(label: str) -> str: s = "".join(c if c.isalnum() else "_" for c in label).lower() if not s or s[0].isdigit() or keyword.iskeyword(s): s = f"_{s}" while "__" in s: s = s.replace("__", "_") return s def _value(value: Any) -> cst.BaseExpression: """Convert simple Python objects to CST expressions.""" if isinstance(value, str): if "\n" in value: return cst.SimpleString('"""' + value.replace('"""', '\\"\\"\\"') + '"""') return cst.SimpleString(repr(value)) if isinstance(value, (int, float, bool)) or value is None: return cst.parse_expression(repr(value)) if isinstance(value, dict): return Dict( [ DictElement( key=_value(k), value=_value(v), ) for k, v in value.items() ] ) if isinstance(value, (list, tuple)): elts = [cst.Element(_value(v)) for v in value] return cst.List(elts) if isinstance(value, list) else cst.Tuple(elts) # fallback return cst.SimpleString(repr(str(value))) def _render_value(prompt_text: str) -> cst.BaseExpression: """Create a prompt value with template rendering logic if needed.""" if "{{" in prompt_text and "}}" in prompt_text: # Generate code for: render_template(prompt_text) return cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("render_template")), args=[cst.Arg(value=_value(prompt_text))], ) else: # Return the prompt as a simple string value return _value(prompt_text) def _generate_text_call(text_value: str, intention: str, parameter_key: str) -> cst.BaseExpression: """Create a generate_text function call CST expression.""" return cst.Await( expression=cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("generate_text")), whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(DOUBLE_INDENT), ), args=[ # First positional argument: context.generated_parameters['parameter_key'] cst.Arg( value=cst.Subscript( value=cst.Attribute( value=cst.Name("context"), attr=cst.Name("generated_parameters"), ), slice=[cst.SubscriptElement(slice=cst.Index(value=_value(parameter_key)))], ), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(DOUBLE_INDENT), ), ), # intention keyword argument cst.Arg( keyword=cst.Name("intention"), value=_value(intention), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(DOUBLE_INDENT), ), ), # data keyword argument cst.Arg( keyword=cst.Name("data"), value=cst.Attribute( value=cst.Name("context"), attr=cst.Name("parameters"), ), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), comma=cst.Comma(), ), ], ) ) # --------------------------------------------------------------------- # # 2. utility builders # # --------------------------------------------------------------------- # def _workflow_decorator(wf_req: dict[str, Any]) -> cst.Decorator: """ Build @skyvern.workflow( title="...", totp_url=..., totp_identifier=..., webhook_callback_url=..., max_steps=... ) """ # helper that skips “None” so the output is concise def kw(key: str, value: Any) -> cst.Arg | None: if value is None: return None return cst.Arg(keyword=cst.Name(key), value=_value(value)) args: list = list( filter( None, [ kw("title", wf_req.get("title", "")), kw("totp_url", wf_req.get("totp_url")), kw("totp_identifier", wf_req.get("totp_identifier")), kw("webhook_url", wf_req.get("webhook_url")), kw("max_steps", wf_req.get("max_steps")), ], ) ) return cst.Decorator( decorator=cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("workflow")), args=args, ) ) def _make_decorator(block_label: str, block: dict[str, Any]) -> cst.Decorator: kwargs = [ cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_label), ) ] return cst.Decorator( decorator=Call( func=Attribute(value=cst.Name("skyvern"), attr=cst.Name("cached")), args=kwargs, ) ) def _action_to_stmt(act: dict[str, Any], assign_to_output: bool = False) -> cst.BaseStatement: """ Turn one Action dict into: await page.(xpath=..., intention=..., data=context.parameters) Or if assign_to_output is True for extract actions: output = await page.extract(...) """ method = ACTION_MAP[act["action_type"]] args: list[cst.Arg] = [] if method in ACTIONS_WITH_XPATH: args.append( cst.Arg( keyword=cst.Name("xpath"), value=_value(act["xpath"]), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) if method in ["type", "fill"]: # Get intention from action intention = act.get("intention") or act.get("reasoning") or "" # Use generate_text call if field_name is available, otherwise fallback to direct value if act.get("field_name"): text_value = _generate_text_call( text_value=act["text"], intention=intention, parameter_key=act["field_name"] ) else: text_value = _value(act["text"]) args.append( cst.Arg( keyword=cst.Name("text"), value=text_value, whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) elif method == "select_option": args.append( cst.Arg( keyword=cst.Name("option"), value=_value(act["option"]["value"]), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), ) elif method == "wait": args.append( cst.Arg( keyword=cst.Name("seconds"), value=_value(act["seconds"]), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) elif method == "extract": args.append( cst.Arg( keyword=cst.Name("prompt"), value=_render_value(act["data_extraction_goal"]), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) if act.get("data_extraction_schema"): args.append( cst.Arg( keyword=cst.Name("schema"), value=_value(act["data_extraction_schema"]), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), comma=cst.Comma(), ) ) args.extend( [ cst.Arg( keyword=cst.Name("intention"), value=_value(act.get("intention") or act.get("reasoning") or ""), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("data"), value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")), whitespace_after_arg=cst.ParenthesizedWhitespace(indent=True), comma=cst.Comma(), ), ] ) call = cst.Call( func=cst.Attribute(value=cst.Name("page"), attr=cst.Name(method)), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) # await page.method(...) await_expr = cst.Await(call) # If this is an extract action and we want to assign to output if assign_to_output and method == "extract": # output = await page.extract(...) assign = cst.Assign( targets=[cst.AssignTarget(cst.Name("output"))], value=await_expr, ) return cst.SimpleStatementLine([assign]) else: # Wrap in a statement line: await ... return cst.SimpleStatementLine([cst.Expr(await_expr)]) def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> FunctionDef: name = block.get("label") or _safe_name(block.get("title") or f"block_{block.get('workflow_run_block_id')}") body_stmts: list[cst.BaseStatement] = [] is_extraction_block = block.get("block_type") == "extraction" if block.get("url"): body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})")) for act in actions: if act["action_type"] in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]: continue # For extraction blocks, assign extract action results to output variable assign_to_output = is_extraction_block and act["action_type"] == "extract" body_stmts.append(_action_to_stmt(act, assign_to_output=assign_to_output)) # For extraction blocks, add return output statement if we have actions if is_extraction_block and any( act["action_type"] == "extract" for act in actions if act["action_type"] not in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION] ): body_stmts.append(cst.parse_statement("return output")) elif not body_stmts: body_stmts.append(cst.parse_statement("return None")) return FunctionDef( name=Name(name), params=cst.Parameters( params=[ Param(name=Name("page"), annotation=cst.Annotation(cst.Name("SkyvernPage"))), Param(name=Name("context"), annotation=cst.Annotation(cst.Name("RunContext"))), ] ), decorators=[_make_decorator(name, block)], body=cst.IndentedBlock(body_stmts), returns=None, asynchronous=cst.Asynchronous(), ) def _build_model(workflow: dict[str, Any]) -> cst.ClassDef: """ class WorkflowParameters(BaseModel): param1: str param2: str ... """ ann_lines: list[cst.BaseStatement] = [] for p in workflow["workflow_definition"]["parameters"]: if p["parameter_type"] != "workflow": continue ann = cst.AnnAssign( target=cst.Name(p["key"]), annotation=cst.Annotation(cst.Name("str")), value=None, ) ann_lines.append(cst.SimpleStatementLine([ann])) if not ann_lines: # no parameters ann_lines.append(cst.SimpleStatementLine([cst.Pass()])) return cst.ClassDef( name=cst.Name("WorkflowParameters"), bases=[cst.Arg(cst.Name("BaseModel"))], body=cst.IndentedBlock(ann_lines), # ← wrap in block ) def _build_generated_model_from_schema(schema_code: str) -> cst.ClassDef | None: """ Parse the generated schema code and return a ClassDef, or None if parsing fails. """ try: # Parse the schema code and extract just the class definition parsed_module = cst.parse_module(schema_code) # Find the GeneratedWorkflowParameters class in the parsed module for node in parsed_module.body: if isinstance(node, cst.ClassDef) and node.name.value == "GeneratedWorkflowParameters": return node # If no class found, return None return None except Exception as e: LOG.warning("Failed to parse generated schema code", error=str(e)) return None # --------------------------------------------------------------------- # # 3. statement builders # # --------------------------------------------------------------------- # def _build_run_task_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.run_task statement.""" args = __build_base_task_statement(block_title, block) call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("run_task")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_download_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.download statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("complete_on_download"), value=_value(block.get("complete_on_download", False)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("download_suffix"), value=_render_value(block.get("download_suffix", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_title), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("download")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_action_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.action statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_title), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("action")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_login_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.login statement.""" args = __build_base_task_statement(block_title, block) call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("login")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_extract_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.extract statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("data_extraction_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("schema"), value=_value(block.get("data_schema", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_title), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("extract")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_navigate_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.navigate statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("url"), value=_value(block.get("url", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("max_steps"), value=_value(block.get("max_steps_per_run", settings.MAX_STEPS_PER_RUN)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_title), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("run_task")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_send_email_statement(block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.send_email statement.""" args = [ cst.Arg( keyword=cst.Name("sender"), value=_value(block.get("sender", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("subject"), value=_value(block.get("subject", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("body"), value=_value(block.get("body", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("recipients"), value=_value(block.get("recipients", [])), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("attach_downloaded_files"), value=_value(block.get("attach_downloaded_files", False)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("send_email")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_validate_statement(block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.validate statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("validate")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_wait_statement(block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.wait statement.""" args = [ cst.Arg( keyword=cst.Name("seconds"), value=_value(block.get("wait_sec", 1)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("wait")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_for_loop_statement(block_title: str, block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.for_loop statement.""" args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), cst.Arg( keyword=cst.Name("max_steps"), value=_value(block.get("max_steps_per_run", settings.MAX_STEPS_PER_RUN)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("for_loop")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def _build_goto_statement(block: dict[str, Any]) -> cst.SimpleStatementLine: """Build a skyvern.goto statement.""" args = [ cst.Arg( keyword=cst.Name("url"), value=_value(block.get("url", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("goto")), args=args, whitespace_before_args=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) return cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) def __build_base_task_statement(block_title: str, block: dict[str, Any]) -> list[cst.Arg]: args = [ cst.Arg( keyword=cst.Name("prompt"), value=_render_value(block.get("navigation_goal", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), ] if block.get("url"): args.append( cst.Arg( keyword=cst.Name("url"), value=_render_value(block.get("url", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) if block.get("max_steps_per_run"): args.append( cst.Arg( keyword=cst.Name("max_steps"), value=_render_value(block.get("max_steps_per_run", settings.MAX_STEPS_PER_RUN)), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) if block.get("totp_identifier"): args.append( cst.Arg( keyword=cst.Name("totp_identifier"), value=_render_value(block.get("totp_identifier", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) if block.get("totp_verification_url"): args.append( cst.Arg( keyword=cst.Name("totp_url"), value=_render_value(block.get("totp_verification_url", "")), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) ) args.append( cst.Arg( keyword=cst.Name("cache_key"), value=_value(block_title), whitespace_after_arg=cst.ParenthesizedWhitespace( indent=True, ), comma=cst.Comma(), ) ) return args # --------------------------------------------------------------------- # # 4. function builders # # --------------------------------------------------------------------- # def _build_run_fn(blocks: list[dict[str, Any]], wf_req: dict[str, Any]) -> FunctionDef: body = [ cst.parse_statement( "parameters = parameters.model_dump() if isinstance(parameters, WorkflowParameters) else parameters" ), cst.parse_statement("page, context = await skyvern.setup(parameters, GeneratedWorkflowParameters)"), ] for block in blocks: block_type = block.get("block_type") block_title = block.get("label") or block.get("title") or f"block_{block.get('workflow_run_block_id')}" if block_type in SCRIPT_TASK_BLOCKS: # For task blocks, call the custom function with cache_key if block_type == "task": stmt = _build_run_task_statement(block_title, block) elif block_type == "file_download": stmt = _build_download_statement(block_title, block) elif block_type == "action": stmt = _build_action_statement(block_title, block) elif block_type == "login": stmt = _build_login_statement(block_title, block) elif block_type == "extraction": stmt = _build_extract_statement(block_title, block) elif block_type == "navigation": stmt = _build_navigate_statement(block_title, block) elif block_type == "send_email": stmt = _build_send_email_statement(block) elif block_type == "text_prompt": stmt = _build_validate_statement(block) elif block_type == "wait": stmt = _build_wait_statement(block) elif block_type == "for_loop": stmt = _build_for_loop_statement(block_title, block) elif block_type == "goto_url": stmt = _build_goto_statement(block) else: # Default case for unknown block types stmt = cst.SimpleStatementLine([cst.Expr(cst.SimpleString(f"# Unknown block type: {block_type}"))]) body.append(stmt) # Add a final validation step if not already present has_validation = any(block.get("block_type") == "text_prompt" for block in blocks) has_task_blocks = any(block.get("block_type") in SCRIPT_TASK_BLOCKS for block in blocks) if not has_validation and not has_task_blocks: # Build the final validation statement using LibCST components args = [ cst.Arg( keyword=cst.Name("prompt"), value=cst.SimpleString( '"Your goal is to validate that the workflow completed successfully. COMPLETE if successful, TERMINATE if there are issues."' ), ), ] call = cst.Call( func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("validate")), args=args, ) validation_stmt = cst.SimpleStatementLine([cst.Expr(cst.Await(call))]) body.append(validation_stmt) params = cst.Parameters( params=[ Param( name=cst.Name("parameters"), annotation=cst.Annotation( cst.BinaryOperation( left=cst.Name("WorkflowParameters"), operator=cst.BitOr( whitespace_before=cst.SimpleWhitespace(" "), whitespace_after=cst.SimpleWhitespace(" "), ), right=cst.Subscript( value=cst.Name("dict"), slice=[ cst.SubscriptElement( slice=cst.Index(value=cst.Name("str")), comma=cst.Comma(whitespace_after=cst.SimpleWhitespace(" ")), ), cst.SubscriptElement( slice=cst.Index(value=cst.Name("Any")), ), ], ), ) ), whitespace_after_param=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), Param( name=cst.Name("title"), annotation=cst.Annotation(cst.Name("str")), default=_value(wf_req.get("title", "")), whitespace_after_param=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), Param( name=cst.Name("webhook_url"), annotation=cst.Annotation(cst.parse_expression("str | None")), default=_value(wf_req.get("webhook_url")), whitespace_after_param=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), Param( name=cst.Name("totp_url"), annotation=cst.Annotation(cst.parse_expression("str | None")), default=_value(wf_req.get("totp_url")), whitespace_after_param=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ), Param( name=cst.Name("totp_identifier"), annotation=cst.Annotation(cst.parse_expression("str | None")), default=_value(wf_req.get("totp_identifier")), whitespace_after_param=cst.ParenthesizedWhitespace(), comma=cst.Comma(), ), ] ) return FunctionDef( name=cst.Name("run_workflow"), asynchronous=cst.Asynchronous(), decorators=[_workflow_decorator(wf_req)], params=params, body=cst.IndentedBlock(body), whitespace_before_params=cst.ParenthesizedWhitespace( indent=True, last_line=cst.SimpleWhitespace(INDENT), ), ) # --------------------------------------------------------------------- # # 5. entrypoint # # --------------------------------------------------------------------- # async def generate_workflow_script( *, file_name: str, workflow_run_request: dict[str, Any], workflow: dict[str, Any], blocks: list[dict[str, Any]], actions_by_task: dict[str, list[dict[str, Any]]], organization_id: str | None = None, run_id: str | None = None, script_id: str | None = None, script_revision_id: str | None = None, ) -> str: """ Build a LibCST Module and emit .code (PEP-8-formatted source). """ # --- imports -------------------------------------------------------- imports: list[cst.BaseStatement] = [ cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]), cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("pydantic"))])]), cst.SimpleStatementLine( [ cst.ImportFrom( module=cst.Name("typing"), names=[ cst.ImportAlias(cst.Name("Any")), ], ) ] ), cst.SimpleStatementLine( [ cst.ImportFrom( module=cst.Name("pydantic"), names=[ cst.ImportAlias(cst.Name("BaseModel")), cst.ImportAlias(cst.Name("Field")), ], ) ] ), cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("skyvern"))])]), cst.SimpleStatementLine( [ cst.ImportFrom( module=cst.Name("skyvern"), names=[ cst.ImportAlias(cst.Name("RunContext")), cst.ImportAlias(cst.Name("SkyvernPage")), ], ) ] ), ] # --- generate schema and hydrate actions --------------------------- generated_schema, field_mappings = await generate_workflow_parameters_schema(actions_by_task) actions_by_task = hydrate_input_text_actions_with_field_names(actions_by_task, field_mappings) # --- class + cached params ----------------------------------------- model_cls = _build_model(workflow) generated_model_cls = _build_generated_model_from_schema(generated_schema) # --- blocks --------------------------------------------------------- block_fns = [] task_blocks = [block for block in blocks if block["block_type"] in SCRIPT_TASK_BLOCKS] length_of_tasks = len(task_blocks) # Create script first if organization_id is provided for idx, task in enumerate(task_blocks): block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), [])) # Create script block if we have script context if script_id and script_revision_id and organization_id: try: block_name = task.get("label") or task.get("title") or task.get("task_id") or f"task_{idx}" block_description = f"Generated block for task: {block_name}" temp_module = cst.Module(body=[block_fn_def]) block_code = temp_module.code await create_script_block( block_code=block_code, script_revision_id=script_revision_id, script_id=script_id, organization_id=organization_id, block_name=block_name, block_description=block_description, ) except Exception as e: LOG.error("Failed to create script block", error=str(e), exc_info=True) # Continue without script block creation if it fails block_fns.append(block_fn_def) if idx < length_of_tasks - 1: block_fns.append(cst.EmptyLine()) block_fns.append(cst.EmptyLine()) # --- runner --------------------------------------------------------- run_fn = _build_run_fn(blocks, workflow_run_request) # --- create __start_block__ ----------------------------------------- # Build the __start_block__ content that combines imports, model classes, and run function start_block_body = [ *imports, cst.EmptyLine(), cst.EmptyLine(), model_cls, cst.EmptyLine(), cst.EmptyLine(), ] # Add generated model class if available if generated_model_cls: start_block_body.extend( [ generated_model_cls, cst.EmptyLine(), cst.EmptyLine(), ] ) # Add run function to start block start_block_body.extend( [ run_fn, cst.EmptyLine(), cst.EmptyLine(), ] ) # Create script block for __start_block__ if we have script context if script_id and script_revision_id and organization_id: try: # Create a temporary module to convert the start block content to a function start_block_module = cst.Module(body=start_block_body) start_block_code = start_block_module.code await create_script_block( block_code=start_block_code, script_revision_id=script_revision_id, script_id=script_id, organization_id=organization_id, block_name=settings.WORKFLOW_START_BLOCK_LABEL, block_description="Start block containing imports, model classes, and run function", ) except Exception as e: LOG.error("Failed to create __start_block__", error=str(e), exc_info=True) # Continue without script block creation if it fails # Build module body with the start block content and other blocks module_body = [ *start_block_body, *block_fns, cst.EmptyLine(), cst.EmptyLine(), cst.parse_statement("if __name__ == '__main__':\n asyncio.run(run_workflow())"), ] module = cst.Module(body=module_body) with open(file_name, "w") as f: f.write(module.code) return module.code async def create_script_block( block_code: str | bytes, script_revision_id: str, script_id: str, organization_id: str, block_name: str, block_description: str | None = None, ) -> None: """ Create a script block in the database and save the block code to a script file. Args: block_code: The code to save script_revision_id: The script revision ID script_id: The script ID organization_id: The organization ID block_name: Optional custom name for the block (defaults to function name) block_description: Optional description for the block """ block_code_bytes = block_code if isinstance(block_code, bytes) else block_code.encode("utf-8") try: # Step 3: Create script block in database script_block = await app.DATABASE.create_script_block( script_revision_id=script_revision_id, script_id=script_id, organization_id=organization_id, script_block_label=block_name, ) # Step 4: Create script file for the block # Generate a unique filename for the block file_name = f"{block_name}.skyvern" file_path = f"blocks/{file_name}" # Create artifact and upload to S3 artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact( organization_id=organization_id, script_id=script_id, script_version=1, # Assuming version 1 for now file_path=file_path, data=block_code_bytes, ) # Create script file record script_file = await app.DATABASE.create_script_file( script_revision_id=script_revision_id, script_id=script_id, organization_id=organization_id, file_path=file_path, file_name=file_name, file_type="file", content_hash=f"sha256:{hashlib.sha256(block_code_bytes).hexdigest()}", file_size=len(block_code_bytes), mime_type="text/x-python", artifact_id=artifact_id, ) # update script block with script file id await app.DATABASE.update_script_block( script_block_id=script_block.script_block_id, organization_id=organization_id, script_file_id=script_file.file_id, ) except Exception as e: # Log error but don't fail the entire generation process LOG.error("Failed to create script block", error=str(e), exc_info=True) # For now, just log the error and continue # In production, you might want to handle this differently