Use persistent browser session in runnables ()

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
Co-authored-by: Shuchang Zheng <shu@skyvern.com>
This commit is contained in:
Maksim Ivanov 2025-01-09 22:04:53 +01:00 committed by GitHub
parent 5ed7e5ad8e
commit a4744ed9f5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 506 additions and 59 deletions

View file

@ -1,11 +1,12 @@
import json
import os
from typing import Any, Optional
from typing import Any, Optional, cast
import requests
from dotenv import load_dotenv
from skyvern.forge import app
from skyvern.forge.sdk.schemas.tasks import TaskRequest
load_dotenv("./skyvern-frontend/.env")
API_KEY = os.getenv("VITE_SKYVERN_API_KEY")
@ -56,7 +57,7 @@ def list_sessions() -> None:
print(f"Error listing sessions: {str(e)}")
def create_session() -> Optional[str]:
def create_browser_session() -> Optional[str]:
"""Create a new browser session"""
try:
response = make_request("POST", "/browser_sessions")
@ -162,14 +163,128 @@ def close_session(session_id: str) -> None:
print(f"Error closing session: {str(e)}")
def create_task(
url: str | None = None,
goal: str | None = None,
browser_session_id: str | None = None,
) -> Optional[str]:
"""Create a new task
Args:
url: URL to navigate to (default: https://news.ycombinator.com)
goal: Task goal/instructions (default: Extract top HN post)
browser_session_id: Optional browser session ID to use
"""
try:
default_url = "https://news.ycombinator.com"
default_goal = "Navigate to the Hacker News homepage and identify the top post. COMPLETE when the title and URL of the top post are extracted. Ensure that the top post is the first post listed on the page."
data = TaskRequest(
url=url or default_url,
goal=goal or default_goal,
browser_session_id=browser_session_id,
)
response = make_request("POST", "/tasks", data=data.model_dump())
task = cast(dict[str, Any], response.json())
print("\nCreated new task:")
try:
print(f" ID: {task.get('task_id', 'N/A')}")
print(f"Full response: {json.dumps(task, indent=2)}")
return task.get("task_id")
except Exception as e:
print(f"Error parsing response: {task}")
print(f"Error: {str(e)}")
return None
except Exception as e:
print(f"Error creating task: {str(e)}")
return None
def create_workflow_run(
workflow_permanent_id: str = "wpid_346464432851787586",
browser_session_id: str | None = None,
) -> Optional[str]:
"""Create a new workflow run
Args:
workflow_permanent_id: Workflow permanent ID (default: wpid_346464432851787586)
browser_session_id: Optional browser session ID to use
"""
try:
data: dict[str, Any] = {
"parameters": {}, # Add parameters if needed
"browser_session_id": browser_session_id,
}
response = make_request("POST", f"/workflows/{workflow_permanent_id}/run", data=data)
workflow_run = response.json()
print("\nCreated new workflow run:")
try:
print(f" Workflow Run ID: {workflow_run.get('workflow_run_id', 'N/A')}")
print(f" Workflow ID: {workflow_run.get('workflow_id', 'N/A')}")
print(f"Full response: {json.dumps(workflow_run, indent=2)}")
return workflow_run.get("workflow_run_id")
except Exception as e:
print(f"Error parsing response: {workflow_run}")
print(f"Error: {str(e)}")
return None
except Exception as e:
print(f"Error creating workflow run: {str(e)}")
return None
def create_cruise(
prompt: str | None = None,
url: str | None = None,
browser_session_id: str | None = None,
) -> Optional[str]:
"""Create a new observer cruise
Args:
prompt: Task prompt/instructions (default: Extract top HN post)
url: URL to navigate to (default: None)
browser_session_id: Optional browser session ID to use
"""
try:
default_prompt = "Navigate to the Hacker News homepage and identify the top post. COMPLETE when the title and URL of the top post are extracted. Ensure that the top post is the first post listed on the page."
data = {"user_prompt": prompt or default_prompt, "url": url, "browser_session_id": browser_session_id}
response = make_request("POST", "/cruise", data=data)
cruise = response.json()
print("\nCreated new observer cruise:")
try:
print(f" Cruise ID: {cruise.get('observer_cruise_id', 'N/A')}")
print(f" URL: {cruise.get('url', 'N/A')}")
print(f"Full response: {json.dumps(cruise, indent=2)}")
return cruise.get("observer_cruise_id")
except Exception as e:
print(f"Error parsing response: {cruise}")
print(f"Error: {str(e)}")
return None
except Exception as e:
print(f"Error creating cruise: {str(e)}")
return None
def print_help() -> None:
"""Print available commands"""
print("\nHTTP API Commands:")
print(" list - List all active browser sessions")
print(" create - Create a new browser session")
print(" get <session_id> - Get details of a specific session")
print(" close <session_id> - Close a specific session")
print(" close_all - Close all active browser sessions")
print(" list_sessions - List all active browser sessions")
print(" create_browser_session - Create a new browser session")
print(" get_session <session_id> - Get details of a specific session")
print(" close_session <session_id> - Close a specific session")
print(" close_all_sessions - Close all active browser sessions")
print(" create_task [args] - Create a new task")
print(" Optional args:")
print(" --url <url> - URL to navigate to")
print(" --goal <goal> - Task goal/instructions")
print(" --browser_session_id <id> - Browser session ID to use")
print(" create_workflow_run [args] - Create a new workflow run")
print(" Optional args:")
print(" --workflow_id <id> - Workflow permanent ID")
print(" --browser_session_id <id> - Browser session ID to use")
print(" create_cruise [args] - Create a new observer cruise")
print(" Optional args:")
print(" --prompt <prompt> - Task prompt/instructions")
print(" --url <url> - URL to navigate to")
print(" --browser_session_id <id> - Browser session ID to use")
print(" help - Show this help message")
print("\nDirect Method Commands:")
print(" direct_list <org_id> - List sessions directly")
@ -200,22 +315,75 @@ async def main() -> None:
await handle_direct_command(cmd, args)
elif cmd == "help":
print_help()
elif cmd == "list":
elif cmd == "list_sessions":
list_sessions()
elif cmd == "create":
create_session()
elif cmd == "get":
elif cmd == "create_browser_session":
create_browser_session()
elif cmd == "create_task":
# Parse optional args
url = None
goal = None
browser_session_id = None
i = 0
while i < len(args):
if args[i] == "--url" and i + 1 < len(args):
url = args[i + 1]
i += 2
elif args[i] == "--goal" and i + 1 < len(args):
goal = args[i + 1]
i += 2
elif args[i] == "--browser_session_id" and i + 1 < len(args):
browser_session_id = args[i + 1]
i += 2
else:
i += 1
create_task(url=url, goal=goal, browser_session_id=browser_session_id)
elif cmd == "get_session":
if not args:
print("Error: session_id required")
continue
get_session(args[0])
elif cmd == "close":
elif cmd == "close_session":
if not args:
print("Error: session_id required")
continue
close_session(args[0])
elif cmd == "close_all":
elif cmd == "close_all_sessions":
close_all_sessions()
elif cmd == "create_workflow_run":
# Parse optional args
workflow_id = "wpid_346464432851787586" # Default workflow ID
browser_session_id = None
i = 0
while i < len(args):
if args[i] == "--workflow_id" and i + 1 < len(args):
workflow_id = args[i + 1]
i += 2
elif args[i] == "--browser_session_id" and i + 1 < len(args):
browser_session_id = args[i + 1]
i += 2
else:
i += 1
create_workflow_run(workflow_permanent_id=workflow_id, browser_session_id=browser_session_id)
elif cmd == "create_cruise":
# Parse optional args
prompt = None
url = None
browser_session_id = None
i = 0
while i < len(args):
if args[i] == "--prompt" and i + 1 < len(args):
prompt = args[i + 1]
i += 2
elif args[i] == "--url" and i + 1 < len(args):
url = args[i + 1]
i += 2
elif args[i] == "--browser_session_id" and i + 1 < len(args):
browser_session_id = args[i + 1]
i += 2
else:
i += 1
create_cruise(prompt=prompt, url=url, browser_session_id=browser_session_id)
else:
print(f"Unknown command: {cmd}")
print("Type 'help' for available commands")

View file

@ -239,6 +239,7 @@ class ForgeAgent:
api_key: str | None = None,
close_browser_on_completion: bool = True,
task_block: BaseTaskBlock | None = None,
browser_session_id: str | None = None,
) -> Tuple[Step, DetailedAgentStepOutput | None, Step | None]:
workflow_run: WorkflowRun | None = None
if task.workflow_run_id:
@ -284,6 +285,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=True,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, None, None
@ -316,7 +319,7 @@ class ForgeAgent:
step,
browser_state,
detailed_output,
) = await self._initialize_execution_state(task, step, workflow_run)
) = await self._initialize_execution_state(task, step, workflow_run, browser_session_id)
if page := await browser_state.get_working_page():
await self.register_async_operations(organization, task, page)
@ -366,6 +369,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
@ -382,6 +386,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
elif step.status == StepStatus.completed:
@ -404,6 +409,7 @@ class ForgeAgent:
last_step=last_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
return last_step, detailed_output, None
elif maybe_next_step:
@ -433,6 +439,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
elif settings.execute_all_steps() and next_step:
@ -442,6 +449,7 @@ class ForgeAgent:
next_step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
task_block=task_block,
)
else:
@ -477,6 +485,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@ -512,6 +521,7 @@ class ForgeAgent:
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
need_final_screenshot=False,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@ -530,6 +540,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
except InvalidTaskStatusTransition:
@ -544,6 +555,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction) as e:
@ -560,6 +573,8 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
need_call_webhook=False,
browser_session_id=browser_session_id,
close_browser_on_completion=close_browser_on_completion,
)
return step, detailed_output, None
@ -581,6 +596,7 @@ class ForgeAgent:
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
else:
LOG.warning(
@ -1110,14 +1126,23 @@ class ForgeAgent:
)
async def _initialize_execution_state(
self, task: Task, step: Step, workflow_run: WorkflowRun | None = None
self,
task: Task,
step: Step,
workflow_run: WorkflowRun | None = None,
browser_session_id: str | None = None,
) -> tuple[Step, BrowserState, DetailedAgentStepOutput]:
if workflow_run:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=task.url
workflow_run=workflow_run,
url=task.url,
browser_session_id=browser_session_id,
)
else:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(task)
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(
task=task,
browser_session_id=browser_session_id,
)
# Initialize video artifact for the task here, afterwards it'll only get updated
if browser_state and browser_state.browser_artifacts:
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(
@ -1465,6 +1490,7 @@ class ForgeAgent:
need_call_webhook: bool = True,
close_browser_on_completion: bool = True,
need_final_screenshot: bool = True,
browser_session_id: str | None = None,
) -> None:
"""
send the task response to the webhook callback url
@ -1544,7 +1570,9 @@ class ForgeAgent:
)
await self.async_operation_pool.remove_task(task.task_id)
await self.cleanup_browser_and_create_artifacts(close_browser_on_completion, last_step, task)
await self.cleanup_browser_and_create_artifacts(
close_browser_on_completion, last_step, task, browser_session_id=browser_session_id
)
# Wait for all tasks to complete before generating the links for the artifacts
await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([task.task_id])
@ -1713,7 +1741,11 @@ class ForgeAgent:
)
async def cleanup_browser_and_create_artifacts(
self, close_browser_on_completion: bool, last_step: Step, task: Task
self,
close_browser_on_completion: bool,
last_step: Step,
task: Task,
browser_session_id: str | None = None,
) -> None:
"""
Developer notes: we should not expect any exception to be raised here.
@ -1721,7 +1753,12 @@ class ForgeAgent:
If errors are raised and not caught inside this function, please catch and handle them.
"""
# We need to close the browser even if there is no webhook callback url or api key
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(task.task_id, close_browser_on_completion)
browser_state = await app.BROWSER_MANAGER.cleanup_for_task(
task.task_id,
close_browser_on_completion,
browser_session_id,
task.organization_id,
)
if browser_state:
# Update recording artifact after closing the browser, so we can get an accurate recording
video_artifacts = await app.BROWSER_MANAGER.get_video_artifacts(

View file

@ -25,6 +25,7 @@ class AsyncExecutor(abc.ABC):
organization_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@ -39,6 +40,7 @@ class AsyncExecutor(abc.ABC):
workflow_run_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@ -51,6 +53,7 @@ class AsyncExecutor(abc.ABC):
organization_id: str,
observer_cruise_id: str,
max_iterations_override: int | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
pass
@ -65,10 +68,13 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info("Executing task using background task executor", task_id=task_id)
close_browser_on_completion = browser_session_id is None
organization = await app.DATABASE.get_organization(organization_id)
if organization is None:
raise OrganizationNotFound(organization_id)
@ -98,6 +104,8 @@ class BackgroundTaskExecutor(AsyncExecutor):
task,
step,
api_key,
close_browser_on_completion=close_browser_on_completion,
browser_session_id=browser_session_id,
)
async def execute_workflow(
@ -109,6 +117,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
workflow_run_id: str,
max_steps_override: int | None,
api_key: str | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info(
@ -126,6 +135,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
workflow_run_id=workflow_run_id,
api_key=api_key,
organization=organization,
browser_session_id=browser_session_id,
)
async def execute_cruise(
@ -135,6 +145,7 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization_id: str,
observer_cruise_id: str,
max_iterations_override: int | None,
browser_session_id: str | None,
**kwargs: dict,
) -> None:
LOG.info(
@ -169,4 +180,5 @@ class BackgroundTaskExecutor(AsyncExecutor):
organization=organization,
observer_cruise_id=observer_cruise_id,
max_iterations_override=max_iterations_override,
browser_session_id=browser_session_id,
)

View file

@ -156,6 +156,7 @@ async def create_agent_task(
task_id=created_task.task_id,
organization_id=current_org.organization_id,
max_steps_override=x_max_steps_override,
browser_session_id=task.browser_session_id,
api_key=x_api_key,
)
return CreateTaskResponse(task_id=created_task.task_id)
@ -653,6 +654,7 @@ async def execute_workflow(
workflow_id=workflow_run.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
max_steps_override=x_max_steps_override,
browser_session_id=workflow_request.browser_session_id,
api_key=x_api_key,
)
return RunWorkflowResponse(
@ -1143,6 +1145,7 @@ async def observer_cruise(
organization_id=organization.organization_id,
observer_cruise_id=observer_cruise.observer_cruise_id,
max_iterations_override=x_max_iterations_override,
browser_session_id=data.browser_session_id,
)
return observer_cruise

View file

@ -89,3 +89,4 @@ class ObserverMetadata(BaseModel):
class CruiseRequest(BaseModel):
user_prompt: str
url: HttpUrl | None = None
browser_session_id: str | None = None

View file

@ -155,6 +155,7 @@ class TaskRequest(TaskBase):
examples=["https://my-webhook.com"],
)
totp_verification_url: str | None = None
browser_session_id: str | None = None
@field_validator("url", "webhook_callback_url", "totp_verification_url")
@classmethod

View file

@ -173,6 +173,7 @@ async def run_observer_cruise(
observer_cruise_id: str,
request_id: str | None = None,
max_iterations_override: str | int | None = None,
browser_session_id: str | None = None,
) -> None:
organization_id = organization.organization_id
try:
@ -197,6 +198,7 @@ async def run_observer_cruise(
observer_cruise=observer_cruise,
request_id=request_id,
max_iterations_override=max_iterations_override,
browser_session_id=browser_session_id,
)
except OperationalError:
LOG.error("Database error when running observer cruise", exc_info=True)
@ -219,7 +221,12 @@ async def run_observer_cruise(
return
finally:
if workflow and workflow_run:
await app.WORKFLOW_SERVICE.clean_up_workflow(workflow=workflow, workflow_run=workflow_run)
await app.WORKFLOW_SERVICE.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
else:
LOG.warning("Workflow or workflow run not found")
@ -231,6 +238,7 @@ async def run_observer_cruise_helper(
observer_cruise: ObserverCruise,
request_id: str | None = None,
max_iterations_override: str | int | None = None,
browser_session_id: str | None = None,
) -> tuple[Workflow, WorkflowRun] | tuple[None, None]:
organization_id = organization.organization_id
observer_cruise_id = observer_cruise.observer_cruise_id
@ -318,6 +326,7 @@ async def run_observer_cruise_helper(
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run,
url=url,
browser_session_id=browser_session_id,
)
scraped_page = await scrape_website(
browser_state,
@ -494,7 +503,13 @@ async def run_observer_cruise_helper(
LOG.info("Workflow created", workflow_id=workflow.workflow_id)
# execute the extraction task
workflow_run = await handle_block_result(block, block_result, workflow, workflow_run)
workflow_run = await handle_block_result(
block,
block_result,
workflow,
workflow_run,
browser_session_id=browser_session_id,
)
if workflow_run.status != WorkflowRunStatus.running:
LOG.info(
"Workflow run is not running anymore, stopping the observer",
@ -575,6 +590,7 @@ async def handle_block_result(
workflow: Workflow,
workflow_run: WorkflowRun,
is_last_block: bool = True,
browser_session_id: str | None = None,
) -> WorkflowRun:
workflow_run_id = workflow_run.workflow_run_id
if block_result.status == BlockStatus.canceled:
@ -593,6 +609,8 @@ async def handle_block_result(
workflow=workflow,
workflow_run=workflow_run,
need_call_webhook=False,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
elif block_result.status == BlockStatus.failed:
LOG.error(

View file

@ -187,7 +187,12 @@ class Block(BaseModel, abc.ABC):
@abc.abstractmethod
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
pass
@ -196,6 +201,7 @@ class Block(BaseModel, abc.ABC):
workflow_run_id: str,
parent_workflow_run_block_id: str | None = None,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_block_id = None
@ -267,7 +273,13 @@ class Block(BaseModel, abc.ABC):
LOG.info(
"Executing block", workflow_run_id=workflow_run_id, block_label=self.label, block_type=self.block_type
)
return await self.execute(workflow_run_id, workflow_run_block_id, organization_id=organization_id, **kwargs)
return await self.execute(
workflow_run_id,
workflow_run_block_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
**kwargs,
)
except Exception as e:
LOG.exception(
"Block execution failed",
@ -409,7 +421,12 @@ class BaseTaskBlock(Block):
return order, retry + 1
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
current_retry = 0
@ -503,7 +520,7 @@ class BaseTaskBlock(Block):
# the first task block will create the browser state and do the navigation
try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=self.url
workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id
)
# add screenshot artifact for the first task
screenshot = await browser_state.take_screenshot(full_page=True)
@ -568,6 +585,8 @@ class BaseTaskBlock(Block):
task=task,
step=step,
task_block=self,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
except Exception as e:
# Make sure the task is marked as failed in the database before raising the exception
@ -918,7 +937,12 @@ class ForLoopBlock(Block):
)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
try:
@ -1025,7 +1049,12 @@ class CodeBlock(Block):
self.code = self.format_block_parameter_template_from_workflow_run_context(self.code, workflow_run_context)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
raise DisabledBlockExecutionError("CodeBlock is disabled")
# get workflow run context
@ -1145,7 +1174,12 @@ class TextPromptBlock(Block):
return response
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@ -1215,7 +1249,12 @@ class DownloadToS3Block(Block):
os.unlink(file_path)
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@ -1296,7 +1335,12 @@ class UploadToS3Block(Block):
return f"s3://{s3_bucket}/{s3_key}"
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# get workflow run context
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
@ -1619,7 +1663,12 @@ class SendEmailBlock(Block):
return msg
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
await app.DATABASE.update_workflow_run_block(
@ -1716,7 +1765,12 @@ class FileParserBlock(Block):
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
workflow_run_context = self.get_workflow_run_context(workflow_run_id)
if (
@ -1784,7 +1838,12 @@ class WaitBlock(Block):
return self.parameters
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
# TODO: we need to support to interrupt the sleep when the workflow run failed/cancelled/terminated
await app.DATABASE.update_workflow_run_block(
@ -1821,7 +1880,12 @@ class ValidationBlock(BaseTaskBlock):
return self.parameters
async def execute(
self, workflow_run_id: str, workflow_run_block_id: str, organization_id: str | None = None, **kwargs: dict
self,
workflow_run_id: str,
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
**kwargs: dict,
) -> BlockResult:
task_order, _ = await self.get_task_order(workflow_run_id, 0)
is_first_task = task_order == 0

View file

@ -18,6 +18,7 @@ class WorkflowRequestBody(BaseModel):
webhook_callback_url: str | None = None
totp_verification_url: str | None = None
totp_identifier: str | None = None
browser_session_id: str | None = None
@field_validator("webhook_callback_url", "totp_verification_url")
@classmethod

View file

@ -189,9 +189,16 @@ class WorkflowService:
workflow_run_id: str,
api_key: str,
organization: Organization,
browser_session_id: str | None = None,
) -> WorkflowRun:
"""Execute a workflow."""
organization_id = organization.organization_id
LOG.info(
"Executing workflow",
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
workflow = await self.get_workflow(workflow_id=workflow_run.workflow_id, organization_id=organization_id)
@ -236,6 +243,8 @@ class WorkflowService:
workflow_run=workflow_run,
api_key=api_key,
need_call_webhook=True,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
parameters = block.get_all_parameters(workflow_run_id)
@ -253,6 +262,7 @@ class WorkflowService:
block_result = await block.execute_safe(
workflow_run_id=workflow_run_id,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
if block_result.status == BlockStatus.canceled:
LOG.info(
@ -271,6 +281,8 @@ class WorkflowService:
workflow_run=workflow_run,
api_key=api_key,
need_call_webhook=False,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
elif block_result.status == BlockStatus.failed:
@ -292,6 +304,8 @@ class WorkflowService:
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
@ -326,6 +340,8 @@ class WorkflowService:
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
close_browser_on_completion=browser_session_id is None,
browser_session_id=browser_session_id,
)
return workflow_run
@ -357,7 +373,13 @@ class WorkflowService:
await self.mark_workflow_run_as_failed(
workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
)
await self.clean_up_workflow(workflow=workflow, workflow_run=workflow_run, api_key=api_key)
await self.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
return workflow_run
refreshed_workflow_run = await app.DATABASE.get_workflow_run(
@ -376,7 +398,13 @@ class WorkflowService:
workflow_run_id=workflow_run.workflow_run_id,
workflow_run_status=refreshed_workflow_run.status if refreshed_workflow_run else None,
)
await self.clean_up_workflow(workflow=workflow, workflow_run=workflow_run, api_key=api_key)
await self.clean_up_workflow(
workflow=workflow,
workflow_run=workflow_run,
api_key=api_key,
browser_session_id=browser_session_id,
close_browser_on_completion=browser_session_id is None,
)
return workflow_run
async def create_workflow(
@ -865,6 +893,7 @@ class WorkflowService:
api_key: str | None = None,
close_browser_on_completion: bool = True,
need_call_webhook: bool = True,
browser_session_id: str | None = None,
) -> None:
analytics.capture("skyvern-oss-agent-workflow-status", {"status": workflow_run.status})
tasks = await self.get_tasks_by_workflow_run_id(workflow_run.workflow_run_id)
@ -873,6 +902,8 @@ class WorkflowService:
workflow_run.workflow_run_id,
all_workflow_task_ids,
close_browser_on_completion,
browser_session_id,
organization_id=workflow_run.organization_id,
)
if browser_state:
await self.persist_video_data(browser_state, workflow, workflow_run)

View file

@ -6,6 +6,7 @@ import structlog
from playwright.async_api import async_playwright
from skyvern.exceptions import MissingBrowserState
from skyvern.forge import app
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRun
from skyvern.webeye.browser_factory import BrowserContextFactory, BrowserState, VideoArtifact
@ -66,18 +67,52 @@ class BrowserManager:
return None
async def get_or_create_for_task(self, task: Task) -> BrowserState:
async def get_or_create_for_task(
self,
task: Task,
browser_session_id: str | None = None,
) -> BrowserState:
browser_state = self.get_for_task(task_id=task.task_id, workflow_run_id=task.workflow_run_id)
if browser_state is not None:
return browser_state
LOG.info("Creating browser state for task", task_id=task.task_id)
browser_state = await self._create_browser_state(
proxy_location=task.proxy_location,
url=task.url,
task_id=task.task_id,
organization_id=task.organization_id,
)
if browser_session_id:
LOG.info(
"Getting browser state for task from persistent sessions manager",
browser_session_id=browser_session_id,
)
browser_state = app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id)
if browser_state is None:
LOG.warning(
"Browser state not found in persistent sessions manager",
browser_session_id=browser_session_id,
)
raise MissingBrowserState(task_id=task.task_id)
else:
if task.organization_id:
await app.PERSISTENT_SESSIONS_MANAGER.occupy_browser_session(
browser_session_id,
organization_id=task.organization_id,
runnable_type="task",
runnable_id=task.task_id,
)
else:
LOG.warning("Organization ID is not set for task", task_id=task.task_id)
page = await browser_state.get_working_page()
if page:
await browser_state.navigate_to_url(page=page, url=task.url)
else:
LOG.warning("Browser state has no page", workflow_run_id=task.workflow_run_id)
if browser_state is None:
LOG.info("Creating browser state for task", task_id=task.task_id)
browser_state = await self._create_browser_state(
proxy_location=task.proxy_location,
url=task.url,
task_id=task.task_id,
organization_id=task.organization_id,
)
self.pages[task.task_id] = browser_state
if task.workflow_run_id:
self.pages[task.workflow_run_id] = browser_state
@ -89,21 +124,53 @@ class BrowserManager:
)
return browser_state
async def get_or_create_for_workflow_run(self, workflow_run: WorkflowRun, url: str | None = None) -> BrowserState:
async def get_or_create_for_workflow_run(
self,
workflow_run: WorkflowRun,
url: str | None = None,
browser_session_id: str | None = None,
) -> BrowserState:
browser_state = self.get_for_workflow_run(workflow_run_id=workflow_run.workflow_run_id)
if browser_state is not None:
return browser_state
LOG.info(
"Creating browser state for workflow run",
workflow_run_id=workflow_run.workflow_run_id,
)
browser_state = await self._create_browser_state(
workflow_run.proxy_location,
url=url,
workflow_run_id=workflow_run.workflow_run_id,
organization_id=workflow_run.organization_id,
)
if browser_session_id:
LOG.info(
"Getting browser state for workflow run from persistent sessions manager",
browser_session_id=browser_session_id,
)
browser_state = app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id)
if browser_state is None:
LOG.warning(
"Browser state not found in persistent sessions manager", browser_session_id=browser_session_id
)
raise MissingBrowserState(workflow_run_id=workflow_run.workflow_run_id)
else:
await app.PERSISTENT_SESSIONS_MANAGER.occupy_browser_session(
browser_session_id,
runnable_type="workflow_run",
runnable_id=workflow_run.workflow_run_id,
organization_id=workflow_run.organization_id,
)
page = await browser_state.get_working_page()
if page:
if url:
await browser_state.navigate_to_url(page=page, url=url)
else:
LOG.warning("Browser state has no page", workflow_run_id=workflow_run.workflow_run_id)
if browser_state is None:
LOG.info(
"Creating browser state for workflow run",
workflow_run_id=workflow_run.workflow_run_id,
)
browser_state = await self._create_browser_state(
proxy_location=workflow_run.proxy_location,
url=url,
workflow_run_id=workflow_run.workflow_run_id,
organization_id=workflow_run.organization_id,
)
self.pages[workflow_run.workflow_run_id] = browser_state
# The URL here is only used when creating a new page, and not when using an existing page.
@ -201,7 +268,13 @@ class BrowserManager:
cls.pages = dict()
LOG.info("BrowserManger is closed")
async def cleanup_for_task(self, task_id: str, close_browser_on_completion: bool = True) -> BrowserState | None:
async def cleanup_for_task(
self,
task_id: str,
close_browser_on_completion: bool = True,
browser_session_id: str | None = None,
organization_id: str | None = None,
) -> BrowserState | None:
"""
Developer notes: handle errors here. Do not raise error from this function.
If error occurs, log it and address the cleanup error.
@ -217,6 +290,15 @@ class BrowserManager:
await browser_state_to_close.close(close_browser_on_completion=close_browser_on_completion)
LOG.info("Task is cleaned up")
if browser_session_id:
if organization_id:
await app.PERSISTENT_SESSIONS_MANAGER.release_browser_session(
browser_session_id, organization_id=organization_id
)
LOG.info("Released browser session", browser_session_id=browser_session_id)
else:
LOG.warning("Organization ID not specified, cannot release browser session", task_id=task_id)
return browser_state_to_close
async def cleanup_for_workflow_run(
@ -224,6 +306,8 @@ class BrowserManager:
workflow_run_id: str,
task_ids: list[str],
close_browser_on_completion: bool = True,
browser_session_id: str | None = None,
organization_id: str | None = None,
) -> BrowserState | None:
LOG.info("Cleaning up for workflow run")
browser_state_to_close = self.pages.pop(workflow_run_id, None)
@ -250,4 +334,15 @@ class BrowserManager:
)
LOG.info("Workflow run is cleaned up")
if browser_session_id:
if organization_id:
await app.PERSISTENT_SESSIONS_MANAGER.release_browser_session(
browser_session_id, organization_id=organization_id
)
LOG.info("Released browser session", browser_session_id=browser_session_id)
else:
LOG.warning(
"Organization ID not specified, cannot release browser session", workflow_run_id=workflow_run_id
)
return browser_state_to_close

View file

@ -6,6 +6,7 @@ from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
import structlog
from playwright._impl._errors import TargetClosedError
from playwright.async_api import async_playwright
from skyvern.forge.sdk.db.client import AgentDB
@ -24,7 +25,7 @@ class BrowserSession:
class PersistentSessionsManager:
instance = None
instance: PersistentSessionsManager | None = None
_browser_sessions: Dict[str, BrowserSession] = dict()
database: AgentDB
@ -164,7 +165,22 @@ class PersistentSessionsManager:
session_id=session_id,
)
self._browser_sessions.pop(session_id, None)
await browser_session.browser_state.close()
try:
await browser_session.browser_state.close()
except TargetClosedError:
LOG.info(
"Browser context already closed",
organization_id=organization_id,
session_id=session_id,
)
except Exception:
LOG.warning(
"Error while closing browser session",
organization_id=organization_id,
session_id=session_id,
exc_info=True,
)
else:
LOG.info(
"Browser session not found in memory, marking as deleted in database",