Script generation (#3157)
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run

This commit is contained in:
Shuchang Zheng 2025-08-10 13:16:46 -07:00 committed by GitHub
parent 19d7b951bb
commit 58bd43171e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 708 additions and 244 deletions

View file

@ -23,14 +23,16 @@ setup_logger()
from skyvern.forge import app # noqa: E402, F401 from skyvern.forge import app # noqa: E402, F401
from skyvern.library import Skyvern # noqa: E402 from skyvern.library import Skyvern # noqa: E402
from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage # noqa: E402 from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage # noqa: E402
from skyvern.core.code_generations.run_initializer import setup # noqa: E402 from skyvern.core.script_generations.run_initializer import setup # noqa: E402
from skyvern.core.code_generations.workflow_wrappers import ( # noqa: E402 from skyvern.core.script_generations.workflow_wrappers import ( # noqa: E402
workflow, # noqa: E402
task_block, # noqa: E402
file_download_block, # noqa: E402
email_block, # noqa: E402 email_block, # noqa: E402
file_download_block, # noqa: E402
navigation_block, # noqa: E402
task_block, # noqa: E402
url_block, # noqa: E402
wait_block, # noqa: E402 wait_block, # noqa: E402
workflow, # noqa: E402
) # noqa: E402 ) # noqa: E402
@ -38,10 +40,12 @@ __all__ = [
"Skyvern", "Skyvern",
"SkyvernPage", "SkyvernPage",
"RunContext", "RunContext",
"setup",
"workflow",
"task_block",
"file_download_block",
"email_block", "email_block",
"file_download_block",
"navigation_block",
"setup",
"task_block",
"url_block",
"wait_block", "wait_block",
"workflow",
] ]

View file

@ -1,59 +0,0 @@
from typing import Any, Callable
# Build a dummy workflow decorator
def workflow(
title: str | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
webhook_url: str | None = None,
max_steps: int | None = None,
) -> Callable:
def wrapper(func: Callable) -> Callable:
return func
return wrapper
def task_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
engine: str | None = None,
model: dict[str, Any] | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
max_steps: int | None = None,
navigation_payload: str | None = None,
webhook_url: str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
return func
return decorator
def file_download_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
max_steps: int | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
return func
return decorator
def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable:
def decorator(func: Callable) -> Callable:
return func
return decorator
def wait_block(seconds: int) -> Callable:
def decorator(func: Callable) -> Callable:
return func
return decorator

View file

@ -16,15 +16,21 @@ Path("workflow.py").write_text(src)
from __future__ import annotations from __future__ import annotations
import hashlib
import keyword import keyword
from enum import StrEnum from enum import StrEnum
from typing import Any from typing import Any
import libcst as cst import libcst as cst
import structlog
from libcst import Attribute, Call, Dict, DictElement, FunctionDef, Name, Param from libcst import Attribute, Call, Dict, DictElement, FunctionDef, Name, Param
from skyvern.forge import app
from skyvern.webeye.actions.action_types import ActionType from skyvern.webeye.actions.action_types import ActionType
LOG = structlog.get_logger(__name__)
# --------------------------------------------------------------------- # # --------------------------------------------------------------------- #
# 1. helpers # # 1. helpers #
# --------------------------------------------------------------------- # # --------------------------------------------------------------------- #
@ -45,6 +51,12 @@ ACTION_MAP = {
"wait": "wait", "wait": "wait",
"extract": "extract", "extract": "extract",
} }
ACTIONS_WITH_XPATH = [
"click",
"input_text",
"upload_file",
"select_option",
]
INDENT = " " * 4 INDENT = " " * 4
@ -130,6 +142,12 @@ def _make_decorator(block: dict[str, Any]) -> cst.Decorator:
"send_email": "email_block", "send_email": "email_block",
"wait": "wait_block", "wait": "wait_block",
"navigation": "navigation_block", "navigation": "navigation_block",
"for_loop": "for_loop_block",
"action": "action_block",
"extraction": "extraction_block",
"login": "login_block",
"text_prompt": "text_prompt_block",
"goto_url": "url_block",
}[bt] }[bt]
kwargs = [] kwargs = []
@ -177,17 +195,28 @@ def _action_to_stmt(act: dict[str, Any]) -> cst.BaseStatement:
""" """
method = ACTION_MAP[act["action_type"]] method = ACTION_MAP[act["action_type"]]
args = [ args: list[cst.Arg] = []
cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"])), if method == "input_text":
cst.Arg( args.append(cst.Arg(keyword=cst.Name("text"), value=_value(act["text"])))
keyword=cst.Name("intention"), elif method == "select_option":
value=_value(act.get("intention") or act.get("reasoning") or ""), args.append(cst.Arg(keyword=cst.Name("option"), value=_value(act["option"]["value"])))
), elif method == "wait":
cst.Arg( args.append(cst.Arg(keyword=cst.Name("seconds"), value=_value(act["seconds"])))
keyword=cst.Name("data"),
value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")), args.extend(
), [
] cst.Arg(
keyword=cst.Name("intention"),
value=_value(act.get("intention") or act.get("reasoning") or ""),
),
cst.Arg(
keyword=cst.Name("data"),
value=cst.Attribute(value=cst.Name("context"), attr=cst.Name("parameters")),
),
]
)
if method in ACTIONS_WITH_XPATH:
args.append(cst.Arg(keyword=cst.Name("xpath"), value=_value(act["xpath"])))
call = cst.Call( call = cst.Call(
func=cst.Attribute(value=cst.Name("page"), attr=cst.Name(method)), func=cst.Attribute(value=cst.Name("page"), attr=cst.Name(method)),
@ -209,7 +238,7 @@ def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> Fun
body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})")) body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})"))
for act in actions: for act in actions:
if act["action_type"] in [ActionType.COMPLETE]: if act["action_type"] in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]:
continue continue
body_stmts.append(_action_to_stmt(act)) body_stmts.append(_action_to_stmt(act))
@ -329,19 +358,22 @@ def _build_run_fn(task_titles: list[str], wf_req: dict[str, Any]) -> FunctionDef
# --------------------------------------------------------------------- # # --------------------------------------------------------------------- #
def generate_workflow_script( async def generate_workflow_script(
*, *,
file_name: str, file_name: str,
workflow_run_request: dict[str, Any], workflow_run_request: dict[str, Any],
workflow: dict[str, Any], workflow: dict[str, Any],
tasks: list[dict[str, Any]], tasks: list[dict[str, Any]],
actions_by_task: dict[str, list[dict[str, Any]]], actions_by_task: dict[str, list[dict[str, Any]]],
organization_id: str | None = None,
run_id: str | None = None,
) -> str: ) -> str:
""" """
Build a LibCST Module and emit .code (PEP-8-formatted source). Build a LibCST Module and emit .code (PEP-8-formatted source).
""" """
# --- imports -------------------------------------------------------- # --- imports --------------------------------------------------------
imports: list[cst.BaseStatement] = [ imports: list[cst.BaseStatement] = [
cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]),
cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("pydantic"))])]), cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("pydantic"))])]),
cst.SimpleStatementLine( cst.SimpleStatementLine(
[ [
@ -372,8 +404,43 @@ def generate_workflow_script(
# --- blocks --------------------------------------------------------- # --- blocks ---------------------------------------------------------
block_fns = [] block_fns = []
length_of_tasks = len(tasks) length_of_tasks = len(tasks)
# Create script first if organization_id is provided
script_id = None
script_revision_id = None
if organization_id:
try:
script = await app.DATABASE.create_script(
organization_id=organization_id,
run_id=run_id,
)
script_id = script.script_id
script_revision_id = script.script_revision_id
except Exception as e:
LOG.error("Failed to create script", error=str(e), exc_info=True)
# Continue without script creation if it fails
for idx, task in enumerate(tasks): for idx, task in enumerate(tasks):
block_fns.append(_build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))) block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
# Create script block if we have script context
if script_id and script_revision_id and organization_id:
try:
block_name = task.get("title") or task.get("label") or task.get("task_id") or f"task_{idx}"
block_description = f"Generated block for task: {block_name}"
await create_script_block(
block_fn_def=block_fn_def,
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
block_name=block_name,
block_description=block_description,
)
except Exception as e:
LOG.error("Failed to create script block", error=str(e), exc_info=True)
# Continue without script block creation if it fails
block_fns.append(block_fn_def)
if idx < length_of_tasks - 1: if idx < length_of_tasks - 1:
block_fns.append(cst.EmptyLine()) block_fns.append(cst.EmptyLine())
block_fns.append(cst.EmptyLine()) block_fns.append(cst.EmptyLine())
@ -400,8 +467,122 @@ def generate_workflow_script(
cst.EmptyLine(), cst.EmptyLine(),
cst.EmptyLine(), cst.EmptyLine(),
run_fn, run_fn,
cst.EmptyLine(),
cst.EmptyLine(),
cst.parse_statement("if __name__ == '__main__':\n asyncio.run(run_workflow())"),
] ]
) )
# Create main script file if we have script context
if script_id and script_revision_id and organization_id:
try:
main_script_code = module.code
main_file_name = "main.py"
main_file_path = main_file_name
# Create artifact and upload to S3
artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact(
organization_id=organization_id,
script_id=script_id,
script_version=1, # Assuming version 1 for now
file_path=main_file_path,
data=main_script_code.encode("utf-8"),
)
# Create script file record for main file
await app.DATABASE.create_script_file(
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
file_path=main_file_path,
file_name=main_file_name,
file_type="file",
content_hash=f"sha256:{hashlib.sha256(main_script_code.encode('utf-8')).hexdigest()}",
file_size=len(main_script_code.encode("utf-8")),
mime_type="text/x-python",
artifact_id=artifact_id,
)
except Exception as e:
LOG.error("Failed to create main script file", error=str(e), exc_info=True)
# Continue without main script file creation if it fails
with open(file_name, "w") as f: with open(file_name, "w") as f:
f.write(module.code) f.write(module.code)
return module.code return module.code
async def create_script_block(
block_fn_def: FunctionDef,
script_revision_id: str,
script_id: str,
organization_id: str,
block_name: str,
block_description: str | None = None,
) -> None:
"""
Create a script block in the database and save the block code to a script file.
Args:
block_fn_def: The LibCST function definition to save
script_revision_id: The script revision ID
script_id: The script ID
organization_id: The organization ID
block_name: Optional custom name for the block (defaults to function name)
block_description: Optional description for the block
"""
try:
# Step 1: Transform the block function definition to a string
block_code = block_fn_def.code
# Step 2: Use the function name as block name if not provided
if not block_name:
block_name = block_fn_def.name.value
# Step 3: Create script block in database
script_block = await app.DATABASE.create_script_block(
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
script_block_label=block_name,
)
# Step 4: Create script file for the block
# Generate a unique filename for the block
file_name = f"{block_name}.skyvern"
file_path = f"blocks/{script_block.script_block_id}/{file_name}"
# Create artifact and upload to S3
artifact_id = await app.ARTIFACT_MANAGER.create_script_file_artifact(
organization_id=organization_id,
script_id=script_id,
script_version=1, # Assuming version 1 for now
file_path=file_path,
data=block_code.encode("utf-8"),
)
# Create script file record
script_file = await app.DATABASE.create_script_file(
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
file_path=file_path,
file_name=file_name,
file_type="file",
content_hash=f"sha256:{hashlib.sha256(block_code.encode('utf-8')).hexdigest()}",
file_size=len(block_code.encode("utf-8")),
mime_type="text/x-python",
artifact_id=artifact_id,
)
# update script block with script file id
await app.DATABASE.update_script_block(
script_block_id=script_block.script_block_id,
organization_id=organization_id,
script_file_id=script_file.script_file_id,
)
except Exception as e:
# Log error but don't fail the entire generation process
LOG.error("Failed to create script block", error=str(e), exc_info=True)
# For now, just log the error and continue
# In production, you might want to handle this differently

View file

@ -2,13 +2,13 @@ from typing import Any
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from skyvern.core.code_generations.skyvern_page import RunContext, SkyvernPage from skyvern.core.script_generations.skyvern_page import RunContext, SkyvernPage
from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core import skyvern_context
from skyvern.webeye.browser_factory import BrowserContextFactory from skyvern.webeye.browser_factory import BrowserContextFactory
# TODO: find a better name for this function # TODO: find a better name for this function
async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]: async def setup(parameters: dict[str, Any], generate_response: bool = False) -> tuple[SkyvernPage, RunContext]:
# set up skyvern context # set up skyvern context
skyvern_context.set(skyvern_context.SkyvernContext()) skyvern_context.set(skyvern_context.SkyvernContext())
# start playwright # start playwright
@ -19,5 +19,6 @@ async def setup(parameters: dict[str, Any]) -> tuple[SkyvernPage, RunContext]:
_, _,
) = await BrowserContextFactory.create_browser_context(playwright=pw) ) = await BrowserContextFactory.create_browser_context(playwright=pw)
new_page = await browser_context.new_page() new_page = await browser_context.new_page()
# skyvern_page = SkyvernPage(page=new_page, generate_response=generate_response)
skyvern_page = SkyvernPage(page=new_page) skyvern_page = SkyvernPage(page=new_page)
return skyvern_page, RunContext(parameters=parameters, page=skyvern_page) return skyvern_page, RunContext(parameters=parameters, page=skyvern_page)

View file

@ -1,7 +1,7 @@
from skyvern.core.code_generations.skyvern_page import RunContext from skyvern.core.script_generations.skyvern_page import RunContext
class CodeRunContextManager: class ScriptRunContextManager:
""" """
Manages the run context for code runs. Manages the run context for code runs.
""" """

View file

@ -1,8 +1,9 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum from enum import StrEnum
from typing import Any, Callable from typing import Any, Callable, Literal
from playwright.async_api import Page from playwright.async_api import Page
@ -48,6 +49,7 @@ class SkyvernPage:
driver: Driver = Driver.PLAYWRIGHT, driver: Driver = Driver.PLAYWRIGHT,
*, *,
recorder: Callable[[ActionCall], None] | None = None, recorder: Callable[[ActionCall], None] | None = None,
# generate_response: bool = False,
): ):
self.driver = driver self.driver = driver
self.page = page # e.g. Playwright's Page self.page = page # e.g. Playwright's Page
@ -95,7 +97,32 @@ class SkyvernPage:
######### Public Interfaces ######### ######### Public Interfaces #########
@action_wrap(ActionType.CLICK) @action_wrap(ActionType.CLICK)
async def click(self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def click(self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
locator = self.page.locator(xpath) # if self.generate_response:
# # TODO: get element tree
# # generate click action based on the current html
# single_click_prompt = prompt_engine.load_prompt(
# template="single-click-action",
# navigation_goal=intention,
# navigation_payload_str=data,
# current_url=self.page.url,
# elements=element_tree,
# local_datetime=datetime.now(context.tz_info).isoformat(),
# user_context=context.prompt,
# )
# json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
# prompt=single_click_prompt,
# prompt_name="single-click-action",
# step=step,
# )
# click_actions = parse_actions(new_task, step.step_id, step.order, scraped_page, json_response["actions"])
# if not click_actions:
# raise CachedActionPlanError("No click actions to execute")
# for click_action in click_actions:
# await _handle_action(
# click_action, step, new_task, scraped_page, current_page, detailed_output, browser_state, engine
# )
locator = self.page.locator(f"xpath={xpath}")
await locator.click(timeout=5000) await locator.click(timeout=5000)
@action_wrap(ActionType.INPUT_TEXT) @action_wrap(ActionType.INPUT_TEXT)
@ -107,53 +134,83 @@ class SkyvernPage:
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> None: ) -> None:
locator = self.page.locator(xpath) # if self.generate_response:
# # TODO: regenerate text
# pass
locator = self.page.locator(f"xpath={xpath}")
await handler_utils.input_sequentially(locator, text, timeout=timeout) await handler_utils.input_sequentially(locator, text, timeout=timeout)
@action_wrap(ActionType.UPLOAD_FILE) @action_wrap(ActionType.UPLOAD_FILE)
async def upload_file( async def upload_file(
self, xpath: str, file_path: str, intention: str | None = None, data: str | dict[str, Any] | None = None self, xpath: str, file_path: str, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ) -> None:
# if self.generate_response:
# # TODO: regenerate file_path and xpath
# pass
file = await download_file(file_path) file = await download_file(file_path)
await self.page.set_input_files(xpath, file) await self.page.set_input_files(xpath, file)
@action_wrap(ActionType.SELECT_OPTION) @action_wrap(ActionType.SELECT_OPTION)
async def select_option( async def select_option(
self, xpath: str, option: str, intention: str | None = None, data: str | dict[str, Any] | None = None self,
xpath: str,
option: str,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> None: ) -> None:
locator = self.page.locator(xpath) # if self.generate_response:
await locator.select_option(option, timeout=5000) # # TODO: regenerate option
# pass
locator = self.page.locator(f"xpath={xpath}")
try:
await locator.click(timeout=timeout)
except Exception:
print("Failed to click before select action")
return
await locator.select_option(option, timeout=timeout)
@action_wrap(ActionType.WAIT) @action_wrap(ActionType.WAIT)
async def wait( async def wait(
self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
await asyncio.sleep(seconds)
@action_wrap(ActionType.NULL_ACTION) @action_wrap(ActionType.NULL_ACTION)
async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
return
@action_wrap(ActionType.SOLVE_CAPTCHA) @action_wrap(ActionType.SOLVE_CAPTCHA)
async def solve_captcha( async def solve_captcha(
self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
await asyncio.sleep(30)
@action_wrap(ActionType.TERMINATE) @action_wrap(ActionType.TERMINATE)
async def terminate( async def terminate(
self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
# TODO: update the workflow run status to terminated
return
@action_wrap(ActionType.COMPLETE) @action_wrap(ActionType.COMPLETE)
async def complete( async def complete(
self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
# TODO: update the workflow run status to completed
return
@action_wrap(ActionType.RELOAD_PAGE) @action_wrap(ActionType.RELOAD_PAGE)
async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
await self.page.reload()
return
@action_wrap(ActionType.EXTRACT) @action_wrap(ActionType.EXTRACT)
async def extract( async def extract(
self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
# TODO: extract the data
return
@action_wrap(ActionType.VERIFICATION_CODE) @action_wrap(ActionType.VERIFICATION_CODE)
async def verification_code( async def verification_code(
@ -162,37 +219,48 @@ class SkyvernPage:
@action_wrap(ActionType.SCROLL) @action_wrap(ActionType.SCROLL)
async def scroll( async def scroll(
self, amount: int, intention: str | None = None, data: str | dict[str, Any] | None = None self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
@action_wrap(ActionType.KEYPRESS) @action_wrap(ActionType.KEYPRESS)
async def keypress( async def keypress(
self, key: str, intention: str | None = None, data: str | dict[str, Any] | None = None self,
) -> None: ... keys: list[str],
hold: bool = False,
@action_wrap(ActionType.TYPE) duration: float = 0,
async def type(self, text: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: ... intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> None:
await handler_utils.keypress(self.page, keys, hold=hold, duration=duration)
@action_wrap(ActionType.MOVE) @action_wrap(ActionType.MOVE)
async def move( async def move(
self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None: ... ) -> None:
await self.page.mouse.move(x, y)
@action_wrap(ActionType.DRAG) @action_wrap(ActionType.DRAG)
async def drag( async def drag(
self, self,
start_x: int, start_x: int,
start_y: int, start_y: int,
end_x: int, path: list[tuple[int, int]],
end_y: int,
intention: str | None = None, intention: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
) -> None: ... ) -> None:
await handler_utils.drag(self.page, start_x, start_y, path)
@action_wrap(ActionType.LEFT_MOUSE) @action_wrap(ActionType.LEFT_MOUSE)
async def left_mouse( async def left_mouse(
self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None self,
) -> None: ... x: int,
y: int,
direction: Literal["down", "up"],
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> None:
await handler_utils.left_mouse(self.page, x, y, direction)
class RunContext: class RunContext:
@ -204,3 +272,4 @@ class RunContext:
self.parameters = parameters self.parameters = parameters
self.page = page self.page = page
self.trace: list[ActionCall] = [] self.trace: list[ActionCall] = []
self.prompt: str | None = None

View file

@ -63,7 +63,9 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
LOG.warning(f"Task {block.task_id} not found") LOG.warning(f"Task {block.task_id} not found")
continue continue
block_dump.update(task.model_dump()) block_dump.update(task.model_dump())
actions = await app.DATABASE.get_task_actions(task_id=block.task_id, organization_id=organization_id) actions = await app.DATABASE.get_task_actions_hydrated(
task_id=block.task_id, organization_id=organization_id
)
action_dumps = [] action_dumps = []
for action in actions: for action in actions:
action_dump = action.model_dump() action_dump = action.model_dump()

View file

@ -0,0 +1,195 @@
from typing import Any, Callable
from skyvern import RunContext, SkyvernPage
# Build a dummy workflow decorator
def workflow(
title: str | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
webhook_url: str | None = None,
max_steps: int | None = None,
) -> Callable:
def wrapper(func: Callable) -> Callable:
return func
return wrapper
def task_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
engine: str | None = None,
model: dict[str, Any] | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
max_steps: int | None = None,
navigation_payload: str | None = None,
webhook_url: str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def login_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
engine: str | None = None,
model: dict[str, Any] | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
max_steps: int | None = None,
navigation_payload: str | None = None,
webhook_url: str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def navigation_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
engine: str | None = None,
model: dict[str, Any] | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
max_steps: int | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def action_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
engine: str | None = None,
model: dict[str, Any] | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
max_steps: int | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def extraction_block(
title: str | None = None,
data_extraction_goal: str | None = None,
data_extraction_schema: dict[str, Any] | list | str | None = None,
model: dict[str, Any] | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the data_extraction_goal as prompt in the context
context.prompt = data_extraction_goal
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def url_block(
title: str | None = None,
url: str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# No prompt to store for url_block
context.prompt = None
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def file_download_block(
prompt: str | None = None,
title: str | None = None,
url: str | None = None,
max_steps: int | None = None,
engine: str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def email_block(prompt: str | None = None, title: str | None = None, url: str | None = None) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def wait_block(seconds: int, title: str | None = None) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# No prompt to store for wait_block
context.prompt = None
return await func(page, context, *args, **kwargs)
return wrapper
return decorator
def text_prompt_block(
prompt: str | None = None,
title: str | None = None,
json_schema: dict[str, Any] | list | str | None = None,
) -> Callable:
def decorator(func: Callable) -> Callable:
async def wrapper(page: SkyvernPage, context: RunContext, *args: Any, **kwargs: Any) -> Any:
# Store the prompt in the context
context.prompt = prompt
return await func(page, context, *args, **kwargs)
return wrapper
return decorator

View file

@ -3742,31 +3742,71 @@ class AgentDB:
mime_type: str | None = None, mime_type: str | None = None,
encoding: str = "utf-8", encoding: str = "utf-8",
artifact_id: str | None = None, artifact_id: str | None = None,
) -> None: ) -> ScriptFile:
"""Create a script file record.""" """Create a script file."""
try: async with self.Session() as session:
async with self.Session() as session: script_file = ScriptFileModel(
script_file = ScriptFileModel( script_revision_id=script_revision_id,
script_revision_id=script_revision_id, script_id=script_id,
script_id=script_id, organization_id=organization_id,
organization_id=organization_id, file_path=file_path,
file_path=file_path, file_name=file_name,
file_name=file_name, file_type=file_type,
file_type=file_type, content_hash=content_hash,
content_hash=content_hash, file_size=file_size,
file_size=file_size, mime_type=mime_type,
mime_type=mime_type, encoding=encoding,
encoding=encoding, artifact_id=artifact_id,
artifact_id=artifact_id, )
session.add(script_file)
await session.commit()
await session.refresh(script_file)
return convert_to_script_file(script_file)
async def create_script_block(
self,
script_revision_id: str,
script_id: str,
organization_id: str,
script_block_label: str,
script_file_id: str | None = None,
) -> ScriptBlock:
"""Create a script block."""
async with self.Session() as session:
script_block = ScriptBlockModel(
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
script_block_label=script_block_label,
script_file_id=script_file_id,
)
session.add(script_block)
await session.commit()
await session.refresh(script_block)
return convert_to_script_block(script_block)
async def update_script_block(
self,
script_block_id: str,
organization_id: str,
script_file_id: str | None = None,
) -> ScriptBlock:
async with self.Session() as session:
script_block = (
await session.scalars(
select(ScriptBlockModel)
.filter_by(script_block_id=script_block_id)
.filter_by(organization_id=organization_id)
) )
session.add(script_file) ).first()
if script_block:
if script_file_id:
script_block.script_file_id = script_file_id
await session.commit() await session.commit()
except SQLAlchemyError: await session.refresh(script_block)
LOG.error("SQLAlchemyError", exc_info=True) return convert_to_script_block(script_block)
raise else:
except Exception: raise NotFoundError("Script block not found")
LOG.error("UnexpectedError", exc_info=True)
raise
async def get_script_files(self, script_revision_id: str, organization_id: str) -> list[ScriptFile]: async def get_script_files(self, script_revision_id: str, organization_id: str) -> list[ScriptFile]:
async with self.Session() as session: async with self.Session() as session:

View file

@ -35,44 +35,12 @@ async def create_script(
current_org: Organization = Depends(org_auth_service.get_current_org), current_org: Organization = Depends(org_auth_service.get_current_org),
) -> CreateScriptResponse: ) -> CreateScriptResponse:
"""Create a new script with optional files and metadata.""" """Create a new script with optional files and metadata."""
organization_id = current_org.organization_id return await script_service.create_script(
LOG.info( organization_id=current_org.organization_id,
"Creating script", workflow_id=data.workflow_id,
organization_id=organization_id, run_id=data.run_id,
file_count=len(data.files) if data.files else 0, files=data.files,
) )
if data.run_id:
if not await app.DATABASE.get_run(run_id=data.run_id, organization_id=organization_id):
raise HTTPException(status_code=404, detail=f"Run_id {data.run_id} not found")
try:
# Create the script in the database
script = await app.DATABASE.create_script(
organization_id=organization_id,
run_id=data.run_id,
)
# Process files if provided
file_tree = {}
file_count = 0
if data.files:
file_tree = await script_service.build_file_tree(
data.files,
organization_id=organization_id,
script_id=script.script_id,
script_version=script.version,
script_revision_id=script.script_revision_id,
)
file_count = len(data.files)
return CreateScriptResponse(
script_id=script.script_id,
version=script.version,
run_id=script.run_id,
file_count=file_count,
created_at=script.created_at,
file_tree=file_tree,
)
except Exception as e:
LOG.error("Failed to create script", error=str(e), exc_info=True)
raise HTTPException(status_code=500, detail="Failed to create script")
@base_router.get( @base_router.get(

View file

@ -11,8 +11,8 @@ from jinja2.sandbox import SandboxedEnvironment
from skyvern import analytics from skyvern import analytics
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT
from skyvern.core.code_generations.generate_code import generate_workflow_script as generate_python_workflow_script from skyvern.core.script_generations.generate_script import generate_workflow_script as generate_python_workflow_script
from skyvern.core.code_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input from skyvern.core.script_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input
from skyvern.exceptions import ( from skyvern.exceptions import (
BlockNotFound, BlockNotFound,
BrowserSessionNotFound, BrowserSessionNotFound,
@ -2287,7 +2287,7 @@ class WorkflowService:
workflow_run_id=workflow_run.workflow_run_id, workflow_run_id=workflow_run.workflow_run_id,
organization_id=workflow.organization_id, organization_id=workflow.organization_id,
) )
python_src = generate_python_workflow_script( python_src = await generate_python_workflow_script(
file_name=codegen_input.file_name, file_name=codegen_input.file_name,
workflow_run_request=codegen_input.workflow_run, workflow_run_request=codegen_input.workflow_run,
workflow=codegen_input.workflow, workflow=codegen_input.workflow,

View file

@ -5,11 +5,11 @@ import subprocess
from datetime import datetime from datetime import datetime
import structlog import structlog
from fastapi import BackgroundTasks from fastapi import BackgroundTasks, HTTPException
from skyvern.exceptions import ScriptNotFound from skyvern.exceptions import ScriptNotFound
from skyvern.forge import app from skyvern.forge import app
from skyvern.schemas.scripts import FileNode, ScriptFileCreate from skyvern.schemas.scripts import CreateScriptResponse, FileNode, ScriptFileCreate
LOG = structlog.get_logger(__name__) LOG = structlog.get_logger(__name__)
@ -96,6 +96,52 @@ async def build_file_tree(
return file_tree return file_tree
async def create_script(
organization_id: str,
workflow_id: str | None = None,
run_id: str | None = None,
files: list[ScriptFileCreate] | None = None,
) -> CreateScriptResponse:
LOG.info(
"Creating script",
organization_id=organization_id,
file_count=len(files) if files else 0,
)
try:
if run_id and not await app.DATABASE.get_run(run_id=run_id, organization_id=organization_id):
raise HTTPException(status_code=404, detail=f"Run_id {run_id} not found")
script = await app.DATABASE.create_script(
organization_id=organization_id,
run_id=run_id,
)
file_tree: dict[str, FileNode] = {}
file_count = 0
if files:
file_tree = await build_file_tree(
files,
organization_id=organization_id,
script_id=script.script_id,
script_version=script.version,
script_revision_id=script.script_revision_id,
)
file_count = len(files)
return CreateScriptResponse(
script_id=script.script_id,
version=script.version,
run_id=script.run_id,
file_count=file_count,
created_at=script.created_at,
file_tree=file_tree,
)
except Exception as e:
LOG.error("Failed to create script", error=str(e), exc_info=True)
raise HTTPException(status_code=500, detail="Failed to create script")
async def execute_script( async def execute_script(
script_id: str, script_id: str,
organization_id: str, organization_id: str,

View file

@ -23,7 +23,6 @@ class ActionType(StrEnum):
SCROLL = "scroll" SCROLL = "scroll"
KEYPRESS = "keypress" KEYPRESS = "keypress"
TYPE = "type"
MOVE = "move" MOVE = "move"
DRAG = "drag" DRAG = "drag"
LEFT_MOUSE = "left_mouse" LEFT_MOUSE = "left_mouse"

View file

@ -1804,55 +1804,7 @@ async def handle_keypress_action(
task: Task, task: Task,
step: Step, step: Step,
) -> list[ActionResult]: ) -> list[ActionResult]:
updated_keys = [] await handler_utils.keypress(page, action.keys, hold=action.hold, duration=action.duration)
for key in action.keys:
key_lower_case = key.lower()
if key_lower_case in ("enter", "return"):
updated_keys.append("Enter")
elif key_lower_case == "space":
updated_keys.append(" ")
elif key_lower_case == "ctrl":
updated_keys.append("Control")
elif key_lower_case == "backspace":
updated_keys.append("Backspace")
elif key_lower_case == "pagedown":
updated_keys.append("PageDown")
elif key_lower_case == "pageup":
updated_keys.append("PageUp")
elif key_lower_case == "tab":
updated_keys.append("Tab")
elif key_lower_case == "shift":
updated_keys.append("Shift")
elif key_lower_case in ("arrowleft", "left"):
updated_keys.append("ArrowLeft")
elif key_lower_case in ("arrowright", "right"):
updated_keys.append("ArrowRight")
elif key_lower_case in ("arrowup", "up"):
updated_keys.append("ArrowUp")
elif key_lower_case in ("arrowdown", "down"):
updated_keys.append("ArrowDown")
elif key_lower_case == "home":
updated_keys.append("Home")
elif key_lower_case == "end":
updated_keys.append("End")
elif key_lower_case == "delete":
updated_keys.append("Delete")
elif key_lower_case == "ecs":
updated_keys.append("Escape")
elif key_lower_case == "alt":
updated_keys.append("Alt")
elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit():
# Handle function keys: f1 -> F1, f5 -> F5, etc.
updated_keys.append(key_lower_case.upper())
else:
updated_keys.append(key)
keypress_str = "+".join(updated_keys)
if action.hold:
await page.keyboard.down(keypress_str)
await asyncio.sleep(action.duration)
await page.keyboard.up(keypress_str)
else:
await page.keyboard.press(keypress_str)
return [ActionSuccess()] return [ActionSuccess()]
@ -1876,13 +1828,7 @@ async def handle_drag_action(
task: Task, task: Task,
step: Step, step: Step,
) -> list[ActionResult]: ) -> list[ActionResult]:
if action.start_x and action.start_y: await handler_utils.drag(page, action.start_x, action.start_y, action.path)
await page.mouse.move(action.start_x, action.start_y)
await page.mouse.down()
for point in action.path:
x, y = point[0], point[1]
await page.mouse.move(x, y)
await page.mouse.up()
return [ActionSuccess()] return [ActionSuccess()]
@ -1913,12 +1859,7 @@ async def handle_left_mouse_action(
task: Task, task: Task,
step: Step, step: Step,
) -> list[ActionResult]: ) -> list[ActionResult]:
if action.x and action.y: await handler_utils.left_mouse(page, action.x, action.y, action.direction)
await page.mouse.move(action.x, action.y)
if action.direction == "down":
await page.mouse.down()
elif action.direction == "up":
await page.mouse.up()
return [ActionSuccess()] return [ActionSuccess()]

View file

@ -1,7 +1,8 @@
from typing import Any import asyncio
from typing import Any, Literal
import structlog import structlog
from playwright.async_api import Locator from playwright.async_api import Locator, Page
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import TEXT_INPUT_DELAY, TEXT_PRESS_MAX_LENGTH from skyvern.constants import TEXT_INPUT_DELAY, TEXT_PRESS_MAX_LENGTH
@ -31,3 +32,79 @@ async def input_sequentially(locator: Locator, text: str, timeout: float = setti
text = text[length - TEXT_PRESS_MAX_LENGTH :] text = text[length - TEXT_PRESS_MAX_LENGTH :]
await locator.press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout) await locator.press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout)
async def keypress(page: Page, keys: list[str], hold: bool = False, duration: float = 0) -> None:
updated_keys = []
for key in keys:
key_lower_case = key.lower()
if key_lower_case in ("enter", "return"):
updated_keys.append("Enter")
elif key_lower_case == "space":
updated_keys.append(" ")
elif key_lower_case == "ctrl":
updated_keys.append("Control")
elif key_lower_case == "backspace":
updated_keys.append("Backspace")
elif key_lower_case == "pagedown":
updated_keys.append("PageDown")
elif key_lower_case == "pageup":
updated_keys.append("PageUp")
elif key_lower_case == "tab":
updated_keys.append("Tab")
elif key_lower_case == "shift":
updated_keys.append("Shift")
elif key_lower_case in ("arrowleft", "left"):
updated_keys.append("ArrowLeft")
elif key_lower_case in ("arrowright", "right"):
updated_keys.append("ArrowRight")
elif key_lower_case in ("arrowup", "up"):
updated_keys.append("ArrowUp")
elif key_lower_case in ("arrowdown", "down"):
updated_keys.append("ArrowDown")
elif key_lower_case == "home":
updated_keys.append("Home")
elif key_lower_case == "end":
updated_keys.append("End")
elif key_lower_case == "delete":
updated_keys.append("Delete")
elif key_lower_case == "esc":
updated_keys.append("Escape")
elif key_lower_case == "alt":
updated_keys.append("Alt")
elif key_lower_case.startswith("f") and key_lower_case[1:].isdigit():
# Handle function keys: f1 -> F1, f5 -> F5, etc.
updated_keys.append(key_lower_case.upper())
else:
updated_keys.append(key)
keypress_str = "+".join(updated_keys)
if hold:
await page.keyboard.down(keypress_str)
await asyncio.sleep(duration)
await page.keyboard.up(keypress_str)
else:
await page.keyboard.press(keypress_str)
async def drag(
page: Page, start_x: int | None = None, start_y: int | None = None, path: list[tuple[int, int]] | None = None
) -> None:
if start_x and start_y:
await page.mouse.move(start_x, start_y)
await page.mouse.down()
path = path or []
for point in path:
x, y = point[0], point[1]
await page.mouse.move(x, y)
await page.mouse.up()
async def left_mouse(page: Page, x: int | None, y: int | None, direction: Literal["down", "up"]) -> None:
if x and y:
await page.mouse.move(x, y)
if direction == "down":
await page.mouse.down()
elif direction == "up":
await page.mouse.up()
else:
LOG.info("Invalid direction for left mouse action", direction=direction)