Script generation (#3157)

2025-09-10 15:35:51 +00:00 · 2025-08-10 13:16:46 -07:00 · 2025-08-10 13:16:46 -07:00 · 58bd43171e
commit 58bd43171e
parent 19d7b951bb
16 changed files with 708 additions and 244 deletions
--- a/skyvern/init.py
+++ b/skyvern/init.py
@ -23,14 +23,16 @@ setup_logger()
 from skyvern.forge import app  # noqa: E402, F401
 from skyvern.library import Skyvern  # noqa: E402
-from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage  # noqa: E402
+from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage  # noqa: E402
-from skyvern.core.code_generations.run_initializer import setup  # noqa: E402
+from skyvern.core.script_generations.run_initializer import setup  # noqa: E402
-from skyvern.core.code_generations.workflow_wrappers import (  # noqa: E402
+from skyvern.core.script_generations.workflow_wrappers import (  # noqa: E402
    workflow,  # noqa: E402
    task_block,  # noqa: E402
    file_download_block,  # noqa: E402
    email_block,  # noqa: E402
    file_download_block,  # noqa: E402
    navigation_block,  # noqa: E402
    task_block,  # noqa: E402
    url_block,  # noqa: E402
    wait_block,  # noqa: E402
    workflow,  # noqa: E402
 )  # noqa: E402
@ -38,10 +40,12 @@ __all__ = [
    "Skyvern",
    "SkyvernPage",
    "RunContext",
    "setup",
    "workflow",
    "task_block",
    "file_download_block",
    "email_block",
    "file_download_block",
    "navigation_block",
    "setup",
    "task_block",
    "url_block",
    "wait_block",
    "workflow",
 ]
--- a/skyvern/core/code_generations/workflow_wrappers.py
+++ b/skyvern/core/code_generations/workflow_wrappers.py
@ -1,59 +0,0 @@
 from typing import Any, Callable
 # Build a dummy workflow decorator
 def workflow(
    title: str | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    webhook_url: str | None = None,
    max_steps: int | None = None,
 ) -> Callable:
    def wrapper(func: Callable) -> Callable:
        return func
    return wrapper
 def task_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    engine: str | None = None,
    model: dict[str, Any] | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    max_steps: int | None = None,
    navigation_payload: str | None = None,
    webhook_url: str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        return func
    return decorator
 def file_download_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    max_steps: int | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        return func
    return decorator
 def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable:
    def decorator(func: Callable) -> Callable:
        return func
    return decorator
 def wait_block(seconds: int) -> Callable:
    def decorator(func: Callable) -> Callable:
        return func
    return decorator
--- a/skyvern/core/script_generations/init.py
+++ b/skyvern/core/script_generations/init.py
--- a/skyvern/core/script_generations/generate_script.py
+++ b/skyvern/core/script_generations/generate_script.py
@ -16,15 +16,21 @@ Path("workflow.py").write_text(src)
 from __future__ import annotations
 import hashlib
 import keyword
 from enum import StrEnum
 from typing import Any
 import libcst as cst
 import structlog
 from libcst import Attribute, Call, Dict, DictElement, FunctionDef, Name, Param
 from skyvern.forge import app
 from skyvern.webeye.actions.action_types import ActionType
 LOG = structlog.get_logger(__name__)
 # --------------------------------------------------------------------- #
 # 1. helpers                                                            #
 # --------------------------------------------------------------------- #
@ -45,6 +51,12 @@ ACTION_MAP = {
    "wait": "wait",
    "extract": "extract",
 }
 ACTIONS_WITH_XPATH = [
    "click",
    "input_text",
    "upload_file",
    "select_option",
 ]
 INDENT = " " * 4
@ -130,6 +142,12 @@ def _make_decorator(block: dict[str, Any]) -> cst.Decorator:
        "send_email": "email_block",
        "wait": "wait_block",
        "navigation": "navigation_block",
        "for_loop": "for_loop_block",
        "action": "action_block",
        "extraction": "extraction_block",
        "login": "login_block",
        "text_prompt": "text_prompt_block",
        "goto_url": "url_block",
    }[bt]
    kwargs = []
@ -177,17 +195,28 @@ def _action_to_stmt(act: dict[str, Any]) -> cst.BaseStatement:
    """
    method = ACTION_MAP[act["action_type"]]
-    args = [
+    args: list[cst.Arg] = []
-        cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"])),
+    if method == "input_text":
-        cst.Arg(
+        args.append(cst.Arg(keyword=cst.Name("text"), value=_value(act["text"])))
-            keyword=cst.Name("intention"),
+    elif method == "select_option":
-            value=_value(act.get("intention") or act.get("reasoning") or ""),
+        args.append(cst.Arg(keyword=cst.Name("option"), value=_value(act["option"]["value"])))
-        ),
+    elif method == "wait":
-        cst.Arg(
+        args.append(cst.Arg(keyword=cst.Name("seconds"), value=_value(act["seconds"])))
-            keyword=cst.Name("data"),
+
-            value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")),
+    args.extend(
-        ),
+        [
-    ]
+            cst.Arg(
                keyword=cst.Name("intention"),
                value=_value(act.get("intention") or act.get("reasoning") or ""),
            ),
            cst.Arg(
                keyword=cst.Name("data"),
                value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")),
            ),
        ]
    )
    if method in ACTIONS_WITH_XPATH:
        args.append(cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"])))
    call = cst.Call(
        func=cst.Attribute(value=cst.Name("page"), attr=cst.Name(method)),
@ -209,7 +238,7 @@ def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> Fun
        body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})"))
    for act in actions:
-        if act["action_type"] in [ActionType.COMPLETE]:
+        if act["action_type"] in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]:
            continue
        body_stmts.append(_action_to_stmt(act))
@ -329,19 +358,22 @@ def _build_run_fn(task_titles: list[str], wf_req: dict[str, Any]) -> FunctionDef
 # --------------------------------------------------------------------- #
-def generate_workflow_script(
+async def generate_workflow_script(
    *,
    file_name: str,
    workflow_run_request: dict[str, Any],
    workflow: dict[str, Any],
    tasks: list[dict[str, Any]],
    actions_by_task: dict[str, list[dict[str, Any]]],
    organization_id: str | None = None,
    run_id: str | None = None,
 ) -> str:
    """
    Build a LibCST Module and emit .code (PEP-8-formatted source).
    """
    # --- imports --------------------------------------------------------
    imports: list[cst.BaseStatement] = [
        cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]),
        cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("pydantic"))])]),
        cst.SimpleStatementLine(
            [
@ -372,8 +404,43 @@ def generate_workflow_script(
    # --- blocks ---------------------------------------------------------
    block_fns = []
    length_of_tasks = len(tasks)
    # Create script first if organization_id is provided
    script_id = None
    script_revision_id = None
    if organization_id:
        try:
            script = await app.DATABASE.create_script(
                organization_id=organization_id,
                run_id=run_id,
            )
            script_id = script.script_id
            script_revision_id = script.script_revision_id
        except Exception as e:
            LOG.error("Failed to create script", error=str(e), exc_info=True)
            # Continue without script creation if it fails
    for idx, task in enumerate(tasks):
-        block_fns.append(_build_block_fn(task, actions_by_task.get(task.get("task_id", ""), [])))
+        block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
        # Create script block if we have script context
        if script_id and script_revision_id and organization_id:
            try:
                block_name = task.get("title") or task.get("label") or task.get("task_id") or f"task_{idx}"
                block_description = f"Generated block for task: {block_name}"
                await create_script_block(
                    block_fn_def=block_fn_def,
                    script_revision_id=script_revision_id,
                    script_id=script_id,
                    organization_id=organization_id,
                    block_name=block_name,
                    block_description=block_description,
                )
            except Exception as e:
                LOG.error("Failed to create script block", error=str(e), exc_info=True)
                # Continue without script block creation if it fails
        block_fns.append(block_fn_def)
        if idx < length_of_tasks - 1:
            block_fns.append(cst.EmptyLine())
            block_fns.append(cst.EmptyLine())
@ -400,8 +467,122 @@ def generate_workflow_script(
            cst.EmptyLine(),
            cst.EmptyLine(),
            run_fn,
            cst.EmptyLine(),
            cst.EmptyLine(),
            cst.parse_statement("if __name__ == '__main__':\n    asyncio.run(run_workflow())"),
        ]
    )
    # Create main script file if we have script context
    if script_id and script_revision_id and organization_id:
        try:
            main_script_code = module.code
            main_file_name = "main.py"
            main_file_path = main_file_name
            # Create artifact and upload to S3
            artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact(
                organization_id=organization_id,
                script_id=script_id,
                script_version=1,  # Assuming version 1 for now
                file_path=main_file_path,
                data=main_script_code.encode("utf-8"),
            )
            # Create script file record for main file
            await app.DATABASE.create_script_file(
                script_revision_id=script_revision_id,
                script_id=script_id,
                organization_id=organization_id,
                file_path=main_file_path,
                file_name=main_file_name,
                file_type="file",
                content_hash=f"sha256:{hashlib.sha256(main_script_code.encode('utf-8')).hexdigest()}",
                file_size=len(main_script_code.encode("utf-8")),
                mime_type="text/x-python",
                artifact_id=artifact_id,
            )
        except Exception as e:
            LOG.error("Failed to create main script file", error=str(e), exc_info=True)
            # Continue without main script file creation if it fails
    with open(file_name, "w") as f:
        f.write(module.code)
    return module.code
 async def create_script_block(
    block_fn_def: FunctionDef,
    script_revision_id: str,
    script_id: str,
    organization_id: str,
    block_name: str,
    block_description: str | None = None,
 ) -> None:
    """
    Create a script block in the database and save the block code to a script file.
    Args:
        block_fn_def: The LibCST function definition to save
        script_revision_id: The script revision ID
        script_id: The script ID
        organization_id: The organization ID
        block_name: Optional custom name for the block (defaults to function name)
        block_description: Optional description for the block
    """
    try:
        # Step 1: Transform the block function definition to a string
        block_code = block_fn_def.code
        # Step 2: Use the function name as block name if not provided
        if not block_name:
            block_name = block_fn_def.name.value
        # Step 3: Create script block in database
        script_block = await app.DATABASE.create_script_block(
            script_revision_id=script_revision_id,
            script_id=script_id,
            organization_id=organization_id,
            script_block_label=block_name,
        )
        # Step 4: Create script file for the block
        # Generate a unique filename for the block
        file_name = f"{block_name}.skyvern"
        file_path = f"blocks/{script_block.script_block_id}/{file_name}"
        # Create artifact and upload to S3
        artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact(
            organization_id=organization_id,
            script_id=script_id,
            script_version=1,  # Assuming version 1 for now
            file_path=file_path,
            data=block_code.encode("utf-8"),
        )
        # Create script file record
        script_file = await app.DATABASE.create_script_file(
            script_revision_id=script_revision_id,
            script_id=script_id,
            organization_id=organization_id,
            file_path=file_path,
            file_name=file_name,
            file_type="file",
            content_hash=f"sha256:{hashlib.sha256(block_code.encode('utf-8')).hexdigest()}",
            file_size=len(block_code.encode("utf-8")),
            mime_type="text/x-python",
            artifact_id=artifact_id,
        )
        # update script block with script file id
        await app.DATABASE.update_script_block(
            script_block_id=script_block.script_block_id,
            organization_id=organization_id,
            script_file_id=script_file.script_file_id,
        )
    except Exception as e:
        # Log error but don't fail the entire generation process
        LOG.error("Failed to create script block", error=str(e), exc_info=True)
        # For now, just log the error and continue
        # In production, you might want to handle this differently
--- a/skyvern/core/script_generations/run_initializer.py
+++ b/skyvern/core/script_generations/run_initializer.py
@ -2,13 +2,13 @@ from typing import Any
 from playwright.async_api import async_playwright
-from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage
+from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage
 from skyvern.forge.sdk.core import skyvern_context
 from skyvern.webeye.browser_factory import BrowserContextFactory
 # TODO: find a better name for this function
-async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]:
+async def setup(parameters: dict[str, Any], generate_response: bool = False) -> tuple[SkyvernPage, RunContext]:
    # set up skyvern context
    skyvern_context.set(skyvern_context.SkyvernContext())
    # start playwright
@ -19,5 +19,6 @@ async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]:
        _,
    ) = await BrowserContextFactory.create_browser_context(playwright=pw)
    new_page = await browser_context.new_page()
    # skyvern_page = SkyvernPage(page=new_page, generate_response=generate_response)
    skyvern_page = SkyvernPage(page=new_page)
    return skyvern_page, RunContext(parameters=parameters, page=skyvern_page)
--- a/skyvern/core/script_generations/script_run_context_manager.py
+++ b/skyvern/core/script_generations/script_run_context_manager.py
@ -1,7 +1,7 @@
-from skyvern.core.code_generations.skyvern_page import RunContext
+from skyvern.core.script_generations.skyvern_page import RunContext
-class CodeRunContextManager:
+class ScriptRunContextManager:
    """
    Manages the run context for code runs.
    """
--- a/skyvern/core/script_generations/skyvern_page.py
+++ b/skyvern/core/script_generations/skyvern_page.py
@ -1,8 +1,9 @@
 from __future__ import annotations
 import asyncio
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import Any, Callable
+from typing import Any, Callable, Literal
 from playwright.async_api import Page
@ -48,6 +49,7 @@ class SkyvernPage:
        driver: Driver = Driver.PLAYWRIGHT,
        *,
        recorder: Callable[[ActionCall], None] | None = None,
        # generate_response: bool = False,
    ):
        self.driver = driver
        self.page = page  # e.g. Playwright's Page
@ -95,7 +97,32 @@ class SkyvernPage:
    ######### Public Interfaces #########
    @action_wrap(ActionType.CLICK)
    async def click(self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
-        locator = self.page.locator(xpath)
+        # if self.generate_response:
        #     # TODO: get element tree
        #     # generate click action based on the current html
        #     single_click_prompt = prompt_engine.load_prompt(
        #         template="single-click-action",
        #         navigation_goal=intention,
        #         navigation_payload_str=data,
        #         current_url=self.page.url,
        #         elements=element_tree,
        #         local_datetime=datetime.now(context.tz_info).isoformat(),
        #         user_context=context.prompt,
        #     )
        #     json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
        #         prompt=single_click_prompt,
        #         prompt_name="single-click-action",
        #         step=step,
        #     )
        #     click_actions = parse_actions(new_task, step.step_id, step.order, scraped_page, json_response["actions"])
        #     if not click_actions:
        #         raise CachedActionPlanError("No click actions to execute")
        #     for click_action in click_actions:
        #         await _handle_action(
        #             click_action, step, new_task, scraped_page, current_page, detailed_output, browser_state, engine
        #         )
        locator = self.page.locator(f"xpath={xpath}")
        await locator.click(timeout=5000)
    @action_wrap(ActionType.INPUT_TEXT)
@ -107,53 +134,83 @@ class SkyvernPage:
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
    ) -> None:
-        locator = self.page.locator(xpath)
+        # if self.generate_response:
        #     # TODO: regenerate text
        #     pass
        locator = self.page.locator(f"xpath={xpath}")
        await handler_utils.input_sequentially(locator, text, timeout=timeout)
    @action_wrap(ActionType.UPLOAD_FILE)
    async def upload_file(
        self, xpath: str, file_path: str, intention: str | None = None, data: str | dict[str, Any] | None = None
    ) -> None:
        # if self.generate_response:
        #     # TODO: regenerate file_path and xpath
        #     pass
        file = await download_file(file_path)
        await self.page.set_input_files(xpath, file)
    @action_wrap(ActionType.SELECT_OPTION)
    async def select_option(
-        self, xpath: str, option: str, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
        xpath: str,
        option: str,
        intention: str | None = None,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
    ) -> None:
-        locator = self.page.locator(xpath)
+        # if self.generate_response:
-        await locator.select_option(option, timeout=5000)
+        #     # TODO: regenerate option
        #     pass
        locator = self.page.locator(f"xpath={xpath}")
        try:
            await locator.click(timeout=timeout)
        except Exception:
            print("Failed to click before select action")
            return
        await locator.select_option(option, timeout=timeout)
    @action_wrap(ActionType.WAIT)
    async def wait(
        self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        await asyncio.sleep(seconds)
    @action_wrap(ActionType.NULL_ACTION)
-    async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ...
+    async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
        return
    @action_wrap(ActionType.SOLVE_CAPTCHA)
    async def solve_captcha(
        self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        await asyncio.sleep(30)
    @action_wrap(ActionType.TERMINATE)
    async def terminate(
        self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        # TODO: update the workflow run status to terminated
        return
    @action_wrap(ActionType.COMPLETE)
    async def complete(
        self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        # TODO: update the workflow run status to completed
        return
    @action_wrap(ActionType.RELOAD_PAGE)
-    async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ...
+    async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
        await self.page.reload()
        return
    @action_wrap(ActionType.EXTRACT)
    async def extract(
        self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        # TODO: extract the data
        return
    @action_wrap(ActionType.VERIFICATION_CODE)
    async def verification_code(
@ -162,37 +219,48 @@ class SkyvernPage:
    @action_wrap(ActionType.SCROLL)
    async def scroll(
-        self, amount: int, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
    @action_wrap(ActionType.KEYPRESS)
    async def keypress(
-        self, key: str, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
-    ) -> None: ...
+        keys: list[str],
-
+        hold: bool = False,
-    @action_wrap(ActionType.TYPE)
+        duration: float = 0,
-    async def type(self, text: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ...
+        intention: str | None = None,
        data: str | dict[str, Any] | None = None,
    ) -> None:
        await handler_utils.keypress(self.page, keys, hold=hold, duration=duration)
    @action_wrap(ActionType.MOVE)
    async def move(
        self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
-    ) -> None: ...
+    ) -> None:
        await self.page.mouse.move(x, y)
    @action_wrap(ActionType.DRAG)
    async def drag(
        self,
        start_x: int,
        start_y: int,
-        end_x: int,
+        path: list[tuple[int, int]],
        end_y: int,
        intention: str | None = None,
        data: str | dict[str, Any] | None = None,
-    ) -> None: ...
+    ) -> None:
        await handler_utils.drag(self.page, start_x, start_y, path)
    @action_wrap(ActionType.LEFT_MOUSE)
    async def left_mouse(
-        self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
-    ) -> None: ...
+        x: int,
        y: int,
        direction: Literal["down", "up"],
        intention: str | None = None,
        data: str | dict[str, Any] | None = None,
    ) -> None:
        await handler_utils.left_mouse(self.page, x, y, direction)
 class RunContext:
@ -204,3 +272,4 @@ class RunContext:
        self.parameters = parameters
        self.page = page
        self.trace: list[ActionCall] = []
        self.prompt: str | None = None
--- a/skyvern/core/script_generations/transform_workflow_run.py
+++ b/skyvern/core/script_generations/transform_workflow_run.py
@ -63,7 +63,9 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
                LOG.warning(f"Task {block.task_id} not found")
                continue
            block_dump.update(task.model_dump())
-            actions = await app.DATABASE.get_task_actions(task_id=block.task_id, organization_id=organization_id)
+            actions = await app.DATABASE.get_task_actions_hydrated(
                task_id=block.task_id, organization_id=organization_id
            )
            action_dumps = []
            for action in actions:
                action_dump = action.model_dump()
--- a/skyvern/core/script_generations/workflow_wrappers.py
+++ b/skyvern/core/script_generations/workflow_wrappers.py
@ -0,0 +1,195 @@
 from typing import Any, Callable
 from skyvern import RunContext, SkyvernPage
 # Build a dummy workflow decorator
 def workflow(
    title: str | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    webhook_url: str | None = None,
    max_steps: int | None = None,
 ) -> Callable:
    def wrapper(func: Callable) -> Callable:
        return func
    return wrapper
 def task_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    engine: str | None = None,
    model: dict[str, Any] | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    max_steps: int | None = None,
    navigation_payload: str | None = None,
    webhook_url: str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def login_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    engine: str | None = None,
    model: dict[str, Any] | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    max_steps: int | None = None,
    navigation_payload: str | None = None,
    webhook_url: str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def navigation_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    engine: str | None = None,
    model: dict[str, Any] | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    max_steps: int | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def action_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    engine: str | None = None,
    model: dict[str, Any] | None = None,
    totp_url: str | None = None,
    totp_identifier: str | None = None,
    max_steps: int | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def extraction_block(
    title: str | None = None,
    data_extraction_goal: str | None = None,
    data_extraction_schema: dict[str, Any] | list | str | None = None,
    model: dict[str, Any] | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the data_extraction_goal as prompt in the context
            context.prompt = data_extraction_goal
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def url_block(
    title: str | None = None,
    url: str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # No prompt to store for url_block
            context.prompt = None
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def file_download_block(
    prompt: str | None = None,
    title: str | None = None,
    url: str | None = None,
    max_steps: int | None = None,
    engine: str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def wait_block(seconds: int, title: str | None = None) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # No prompt to store for wait_block
            context.prompt = None
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
 def text_prompt_block(
    prompt: str | None = None,
    title: str | None = None,
    json_schema: dict[str, Any] | list | str | None = None,
 ) -> Callable:
    def decorator(func: Callable) -> Callable:
        async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
            # Store the prompt in the context
            context.prompt = prompt
            return await func(page, context, *args, **kwargs)
        return wrapper
    return decorator
--- a/skyvern/forge/sdk/db/client.py
+++ b/skyvern/forge/sdk/db/client.py
@ -3742,31 +3742,71 @@ class AgentDB:
        mime_type: str | None = None,
        encoding: str = "utf-8",
        artifact_id: str | None = None,
-    ) -> None:
+    ) -> ScriptFile:
-        """Create a script file record."""
+        """Create a script file."""
-        try:
+        async with self.Session() as session:
-            async with self.Session() as session:
+            script_file = ScriptFileModel(
-                script_file = ScriptFileModel(
+                script_revision_id=script_revision_id,
-                    script_revision_id=script_revision_id,
+                script_id=script_id,
-                    script_id=script_id,
+                organization_id=organization_id,
-                    organization_id=organization_id,
+                file_path=file_path,
-                    file_path=file_path,
+                file_name=file_name,
-                    file_name=file_name,
+                file_type=file_type,
-                    file_type=file_type,
+                content_hash=content_hash,
-                    content_hash=content_hash,
+                file_size=file_size,
-                    file_size=file_size,
+                mime_type=mime_type,
-                    mime_type=mime_type,
+                encoding=encoding,
-                    encoding=encoding,
+                artifact_id=artifact_id,
-                    artifact_id=artifact_id,
+            )
            session.add(script_file)
            await session.commit()
            await session.refresh(script_file)
            return convert_to_script_file(script_file)
    async def create_script_block(
        self,
        script_revision_id: str,
        script_id: str,
        organization_id: str,
        script_block_label: str,
        script_file_id: str | None = None,
    ) -> ScriptBlock:
        """Create a script block."""
        async with self.Session() as session:
            script_block = ScriptBlockModel(
                script_revision_id=script_revision_id,
                script_id=script_id,
                organization_id=organization_id,
                script_block_label=script_block_label,
                script_file_id=script_file_id,
            )
            session.add(script_block)
            await session.commit()
            await session.refresh(script_block)
            return convert_to_script_block(script_block)
    async def update_script_block(
        self,
        script_block_id: str,
        organization_id: str,
        script_file_id: str | None = None,
    ) -> ScriptBlock:
        async with self.Session() as session:
            script_block = (
                await session.scalars(
                    select(ScriptBlockModel)
                    .filter_by(script_block_id=script_block_id)
                    .filter_by(organization_id=organization_id)
                )
-                session.add(script_file)
+            ).first()
            if script_block:
                if script_file_id:
                    script_block.script_file_id = script_file_id
                await session.commit()
-        except SQLAlchemyError:
+                await session.refresh(script_block)
-            LOG.error("SQLAlchemyError", exc_info=True)
+                return convert_to_script_block(script_block)
-            raise
+            else:
-        except Exception:
+                raise NotFoundError("Script block not found")
            LOG.error("UnexpectedError", exc_info=True)
            raise
    async def get_script_files(self, script_revision_id: str, organization_id: str) -> list[ScriptFile]:
        async with self.Session() as session:
--- a/skyvern/forge/sdk/routes/scripts.py
+++ b/skyvern/forge/sdk/routes/scripts.py
@ -35,44 +35,12 @@ async def create_script(
    current_org: Organization = Depends(org_auth_service.get_current_org),
 ) -> CreateScriptResponse:
    """Create a new script with optional files and metadata."""
-    organization_id = current_org.organization_id
+    return await script_service.create_script(
-    LOG.info(
+        organization_id=current_org.organization_id,
-        "Creating script",
+        workflow_id=data.workflow_id,
-        organization_id=organization_id,
+        run_id=data.run_id,
-        file_count=len(data.files) if data.files else 0,
+        files=data.files,
    )
    if data.run_id:
        if not await app.DATABASE.get_run(run_id=data.run_id, organization_id=organization_id):
            raise HTTPException(status_code=404, detail=f"Run_id {data.run_id} not found")
    try:
        # Create the script in the database
        script = await app.DATABASE.create_script(
            organization_id=organization_id,
            run_id=data.run_id,
        )
        # Process files if provided
        file_tree = {}
        file_count = 0
        if data.files:
            file_tree = await script_service.build_file_tree(
                data.files,
                organization_id=organization_id,
                script_id=script.script_id,
                script_version=script.version,
                script_revision_id=script.script_revision_id,
            )
            file_count = len(data.files)
        return CreateScriptResponse(
            script_id=script.script_id,
            version=script.version,
            run_id=script.run_id,
            file_count=file_count,
            created_at=script.created_at,
            file_tree=file_tree,
        )
    except Exception as e:
        LOG.error("Failed to create script", error=str(e), exc_info=True)
        raise HTTPException(status_code=500, detail="Failed to create script")
@base_router.get(
--- a/skyvern/forge/sdk/workflow/service.py
+++ b/skyvern/forge/sdk/workflow/service.py
@ -11,8 +11,8 @@ from jinja2.sandbox import SandboxedEnvironment
 from skyvern import analytics
 from skyvern.config import settings
 from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT
-from skyvern.core.code_generations.generate_code import generate_workflow_script as generate_python_workflow_script
+from skyvern.core.script_generations.generate_script import generate_workflow_script as generate_python_workflow_script
-from skyvern.core.code_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input
+from skyvern.core.script_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input
 from skyvern.exceptions import (
    BlockNotFound,
    BrowserSessionNotFound,
@ -2287,7 +2287,7 @@ class WorkflowService:
                workflow_run_id=workflow_run.workflow_run_id,
                organization_id=workflow.organization_id,
            )
-            python_src = generate_python_workflow_script(
+            python_src = await generate_python_workflow_script(
                file_name=codegen_input.file_name,
                workflow_run_request=codegen_input.workflow_run,
                workflow=codegen_input.workflow,
--- a/skyvern/services/script_service.py
+++ b/skyvern/services/script_service.py
@ -5,11 +5,11 @@ import subprocess
 from datetime import datetime
 import structlog
-from fastapi import BackgroundTasks
+from fastapi import BackgroundTasks, HTTPException
 from skyvern.exceptions import ScriptNotFound
 from skyvern.forge import app
-from skyvern.schemas.scripts import FileNode, ScriptFileCreate
+from skyvern.schemas.scripts import CreateScriptResponse, FileNode, ScriptFileCreate
 LOG = structlog.get_logger(__name__)
@ -96,6 +96,52 @@ async def build_file_tree(
    return file_tree
 async def create_script(
    organization_id: str,
    workflow_id: str | None = None,
    run_id: str | None = None,
    files: list[ScriptFileCreate] | None = None,
 ) -> CreateScriptResponse:
    LOG.info(
        "Creating script",
        organization_id=organization_id,
        file_count=len(files) if files else 0,
    )
    try:
        if run_id and not await app.DATABASE.get_run(run_id=run_id, organization_id=organization_id):
            raise HTTPException(status_code=404, detail=f"Run_id {run_id} not found")
        script = await app.DATABASE.create_script(
            organization_id=organization_id,
            run_id=run_id,
        )
        file_tree: dict[str, FileNode] = {}
        file_count = 0
        if files:
            file_tree = await build_file_tree(
                files,
                organization_id=organization_id,
                script_id=script.script_id,
                script_version=script.version,
                script_revision_id=script.script_revision_id,
            )
            file_count = len(files)
        return CreateScriptResponse(
            script_id=script.script_id,
            version=script.version,
            run_id=script.run_id,
            file_count=file_count,
            created_at=script.created_at,
            file_tree=file_tree,
        )
    except Exception as e:
        LOG.error("Failed to create script", error=str(e), exc_info=True)
        raise HTTPException(status_code=500, detail="Failed to create script")
 async def execute_script(
    script_id: str,
    organization_id: str,
--- a/skyvern/webeye/actions/action_types.py
+++ b/skyvern/webeye/actions/action_types.py
@ -23,7 +23,6 @@ class ActionType(StrEnum):
    SCROLL = "scroll"
    KEYPRESS = "keypress"
    TYPE = "type"
    MOVE = "move"
    DRAG = "drag"
    LEFT_MOUSE = "left_mouse"
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@ -1804,55 +1804,7 @@ async def handle_keypress_action(
    task: Task,
    step: Step,
 ) -> list[ActionResult]:
-    updated_keys = []
+    await handler_utils.keypress(page, action.keys, hold=action.hold, duration=action.duration)
    for key in action.keys:
        key_lower_case = key.lower()
        if key_lower_case in ("enter", "return"):
            updated_keys.append("Enter")
        elif key_lower_case == "space":
            updated_keys.append(" ")
        elif key_lower_case == "ctrl":
            updated_keys.append("Control")
        elif key_lower_case == "backspace":
            updated_keys.append("Backspace")
        elif key_lower_case == "pagedown":
            updated_keys.append("PageDown")
        elif key_lower_case == "pageup":
            updated_keys.append("PageUp")
        elif key_lower_case == "tab":
            updated_keys.append("Tab")
        elif key_lower_case == "shift":
            updated_keys.append("Shift")
        elif key_lower_case in ("arrowleft", "left"):
            updated_keys.append("ArrowLeft")
        elif key_lower_case in ("arrowright", "right"):
            updated_keys.append("ArrowRight")
        elif key_lower_case in ("arrowup", "up"):
            updated_keys.append("ArrowUp")
        elif key_lower_case in ("arrowdown", "down"):
            updated_keys.append("ArrowDown")
        elif key_lower_case == "home":
            updated_keys.append("Home")
        elif key_lower_case == "end":
            updated_keys.append("End")
        elif key_lower_case == "delete":
            updated_keys.append("Delete")
        elif key_lower_case == "ecs":
            updated_keys.append("Escape")
        elif key_lower_case == "alt":
            updated_keys.append("Alt")
        elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit():
            # Handle function keys: f1 -> F1, f5 -> F5, etc.
            updated_keys.append(key_lower_case.upper())
        else:
            updated_keys.append(key)
    keypress_str = "+".join(updated_keys)
    if action.hold:
        await page.keyboard.down(keypress_str)
        await asyncio.sleep(action.duration)
        await page.keyboard.up(keypress_str)
    else:
        await page.keyboard.press(keypress_str)
    return [ActionSuccess()]
@ -1876,13 +1828,7 @@ async def handle_drag_action(
    task: Task,
    step: Step,
 ) -> list[ActionResult]:
-    if action.start_x and action.start_y:
+    await handler_utils.drag(page, action.start_x, action.start_y, action.path)
        await page.mouse.move(action.start_x, action.start_y)
    await page.mouse.down()
    for point in action.path:
        x, y = point[0], point[1]
        await page.mouse.move(x, y)
    await page.mouse.up()
    return [ActionSuccess()]
@ -1913,12 +1859,7 @@ async def handle_left_mouse_action(
    task: Task,
    step: Step,
 ) -> list[ActionResult]:
-    if action.x and action.y:
+    await handler_utils.left_mouse(page, action.x, action.y, action.direction)
        await page.mouse.move(action.x, action.y)
    if action.direction == "down":
        await page.mouse.down()
    elif action.direction == "up":
        await page.mouse.up()
    return [ActionSuccess()]
--- a/skyvern/webeye/actions/handler_utils.py
+++ b/skyvern/webeye/actions/handler_utils.py
@ -1,7 +1,8 @@
-from typing import Any
+import asyncio
 from typing import Any, Literal
 import structlog
-from playwright.async_api import Locator
+from playwright.async_api import Locator, Page
 from skyvern.config import settings
 from skyvern.constants import TEXT_INPUT_DELAY, TEXT_PRESS_MAX_LENGTH
@ -31,3 +32,79 @@ async def input_sequentially(locator: Locator, text: str, timeout: float = setti
        text = text[length - TEXT_PRESS_MAX_LENGTH :]
    await locator.press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout)
 async def keypress(page: Page, keys: list[str], hold: bool = False, duration: float = 0) -> None:
    updated_keys = []
    for key in keys:
        key_lower_case = key.lower()
        if key_lower_case in ("enter", "return"):
            updated_keys.append("Enter")
        elif key_lower_case == "space":
            updated_keys.append(" ")
        elif key_lower_case == "ctrl":
            updated_keys.append("Control")
        elif key_lower_case == "backspace":
            updated_keys.append("Backspace")
        elif key_lower_case == "pagedown":
            updated_keys.append("PageDown")
        elif key_lower_case == "pageup":
            updated_keys.append("PageUp")
        elif key_lower_case == "tab":
            updated_keys.append("Tab")
        elif key_lower_case == "shift":
            updated_keys.append("Shift")
        elif key_lower_case in ("arrowleft", "left"):
            updated_keys.append("ArrowLeft")
        elif key_lower_case in ("arrowright", "right"):
            updated_keys.append("ArrowRight")
        elif key_lower_case in ("arrowup", "up"):
            updated_keys.append("ArrowUp")
        elif key_lower_case in ("arrowdown", "down"):
            updated_keys.append("ArrowDown")
        elif key_lower_case == "home":
            updated_keys.append("Home")
        elif key_lower_case == "end":
            updated_keys.append("End")
        elif key_lower_case == "delete":
            updated_keys.append("Delete")
        elif key_lower_case == "esc":
            updated_keys.append("Escape")
        elif key_lower_case == "alt":
            updated_keys.append("Alt")
        elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit():
            # Handle function keys: f1 -> F1, f5 -> F5, etc.
            updated_keys.append(key_lower_case.upper())
        else:
            updated_keys.append(key)
    keypress_str = "+".join(updated_keys)
    if hold:
        await page.keyboard.down(keypress_str)
        await asyncio.sleep(duration)
        await page.keyboard.up(keypress_str)
    else:
        await page.keyboard.press(keypress_str)
 async def drag(
    page: Page, start_x: int | None = None, start_y: int | None = None, path: list[tuple[int, int]] | None = None
 ) -> None:
    if start_x and start_y:
        await page.mouse.move(start_x, start_y)
    await page.mouse.down()
    path = path or []
    for point in path:
        x, y = point[0], point[1]
        await page.mouse.move(x, y)
    await page.mouse.up()
 async def left_mouse(page: Page, x: int | None, y: int | None, direction: Literal["down", "up"]) -> None:
    if x and y:
        await page.mouse.move(x, y)
    if direction == "down":
        await page.mouse.down()
    elif direction == "up":
        await page.mouse.up()
    else:
        LOG.info("Invalid direction for left mouse action", direction=direction)