[SKY-6974] Browser Profiles [2/3] Marc/backend browser session profiles (#3923)
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run

This commit is contained in:
Marc Kelechava 2025-11-06 01:24:39 -08:00 committed by GitHub
parent f0172a22df
commit 3db5ec6cd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 662 additions and 27 deletions

View file

@ -0,0 +1,33 @@
"""oss sync alembic
Revision ID: 89d531e8b4ed
Revises: 2c34dee3304e
Create Date: 2025-11-06 08:05:01.832765+00:00
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "89d531e8b4ed"
down_revision: Union[str, None] = "2c34dee3304e"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("workflow_runs", sa.Column("browser_profile_id", sa.String(), nullable=True))
op.create_index(op.f("ix_workflow_runs_browser_profile_id"), "workflow_runs", ["browser_profile_id"], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_workflow_runs_browser_profile_id"), table_name="workflow_runs")
op.drop_column("workflow_runs", "browser_profile_id")
# ### end Alembic commands ###

View file

@ -0,0 +1,27 @@
"""oss sync alembic
Revision ID: 7581811d57b1
Revises: 89d531e8b4ed
Create Date: 2025-11-06 08:57:29.949001+00:00
"""
from typing import Sequence, Union
# revision identifiers, used by Alembic.
revision: str = "7581811d57b1"
down_revision: Union[str, None] = "89d531e8b4ed"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View file

@ -0,0 +1,27 @@
"""oss sync alembic
Revision ID: 39e16cf92225
Revises: 7581811d57b1
Create Date: 2025-11-06 09:14:11.912223+00:00
"""
from typing import Sequence, Union
# revision identifiers, used by Alembic.
revision: str = "39e16cf92225"
down_revision: Union[str, None] = "7581811d57b1"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View file

@ -83,6 +83,11 @@ class WorkflowRunResponse(UniversalBaseModel):
ID of the Skyvern persistent browser session used for this run ID of the Skyvern persistent browser session used for this run
""" """
browser_profile_id: typing.Optional[str] = pydantic.Field(default=None)
"""
ID of the browser profile used for this run
"""
max_screenshot_scrolls: typing.Optional[int] = pydantic.Field(default=None) max_screenshot_scrolls: typing.Optional[int] = pydantic.Field(default=None)
""" """
The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot

View file

@ -95,6 +95,7 @@ class SkyvernPage:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, workflow_run=workflow_run,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
else: else:
raise WorkflowRunNotFound(workflow_run_id=context.workflow_run_id) raise WorkflowRunNotFound(workflow_run_id=context.workflow_run_id)

View file

@ -1918,6 +1918,7 @@ class ForgeAgent:
workflow_run=workflow_run, workflow_run=workflow_run,
url=task.url, url=task.url,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
else: else:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_task( browser_state = await app.BROWSER_MANAGER.get_or_create_for_task(

View file

@ -180,58 +180,92 @@ class LocalStorage(BaseStorage):
return None return None
async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None: async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None:
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / workflow_permanent_id stored_folder_path = self._resolve_browser_storage_path(organization_id, workflow_permanent_id)
if directory == str(stored_folder_path): if stored_folder_path is None:
LOG.warning(
"Refused to store browser session outside storage base path",
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
base_path=settings.BROWSER_SESSION_BASE_PATH,
)
return
source_directory = Path(directory).resolve()
if source_directory == stored_folder_path:
return return
self._create_directories_if_not_exists(stored_folder_path) self._create_directories_if_not_exists(stored_folder_path)
LOG.info( LOG.info(
"Storing browser session locally", "Storing browser session locally",
organization_id=organization_id, organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id, workflow_permanent_id=workflow_permanent_id,
directory=directory, directory=str(source_directory),
browser_session_path=stored_folder_path, browser_session_path=str(stored_folder_path),
) )
# Copy all files from the directory to the stored folder # Copy all files from the directory to the stored folder
for root, _, files in os.walk(directory): for root, _, files in os.walk(source_directory):
for file in files: for file in files:
source_file_path = Path(root) / file source_file_path = Path(root) / file
relative_path = source_file_path.relative_to(directory) relative_path = source_file_path.relative_to(source_directory)
target_file_path = stored_folder_path / relative_path target_file_path = stored_folder_path / relative_path
self._create_directories_if_not_exists(target_file_path) self._create_directories_if_not_exists(target_file_path)
shutil.copy2(source_file_path, target_file_path) shutil.copy2(source_file_path, target_file_path)
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None: async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / workflow_permanent_id stored_folder_path = self._resolve_browser_storage_path(organization_id, workflow_permanent_id)
if stored_folder_path is None:
LOG.warning(
"Refused to retrieve browser session outside storage base path",
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
base_path=settings.BROWSER_SESSION_BASE_PATH,
)
return None
if not stored_folder_path.exists(): if not stored_folder_path.exists():
return None return None
return str(stored_folder_path) return str(stored_folder_path)
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None: async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store browser profile locally.""" """Store browser profile locally."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id stored_folder_path = self._resolve_browser_storage_path(organization_id, "profiles", profile_id)
if directory == str(stored_folder_path): if stored_folder_path is None:
LOG.warning(
"Refused to store browser profile outside storage base path",
organization_id=organization_id,
profile_id=profile_id,
base_path=settings.BROWSER_SESSION_BASE_PATH,
)
return
source_directory = Path(directory).resolve()
if source_directory == stored_folder_path:
return return
self._create_directories_if_not_exists(stored_folder_path) self._create_directories_if_not_exists(stored_folder_path)
LOG.info( LOG.info(
"Storing browser profile locally", "Storing browser profile locally",
organization_id=organization_id, organization_id=organization_id,
profile_id=profile_id, profile_id=profile_id,
directory=directory, directory=str(source_directory),
browser_profile_path=stored_folder_path, browser_profile_path=str(stored_folder_path),
) )
for root, _, files in os.walk(directory): for root, _, files in os.walk(source_directory):
for file in files: for file in files:
source_file_path = Path(root) / file source_file_path = Path(root) / file
relative_path = source_file_path.relative_to(directory) relative_path = source_file_path.relative_to(source_directory)
target_file_path = stored_folder_path / relative_path target_file_path = stored_folder_path / relative_path
self._create_directories_if_not_exists(target_file_path) self._create_directories_if_not_exists(target_file_path)
shutil.copy2(source_file_path, target_file_path) shutil.copy2(source_file_path, target_file_path)
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None: async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve browser profile from local storage.""" """Retrieve browser profile from local storage."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id stored_folder_path = self._resolve_browser_storage_path(organization_id, "profiles", profile_id)
if stored_folder_path is None:
LOG.warning(
"Refused to retrieve browser profile outside storage base path",
organization_id=organization_id,
profile_id=profile_id,
base_path=settings.BROWSER_SESSION_BASE_PATH,
)
return None
if not stored_folder_path.exists(): if not stored_folder_path.exists():
return None return None
return str(stored_folder_path) return str(stored_folder_path)
@ -282,6 +316,30 @@ class LocalStorage(BaseStorage):
path = path_including_file_name.parent path = path_including_file_name.parent
path.mkdir(parents=True, exist_ok=True) path.mkdir(parents=True, exist_ok=True)
@staticmethod
def _resolve_browser_storage_path(*relative_parts: str) -> Path | None:
if not relative_parts:
return None
normalized_parts: list[str] = []
for part in relative_parts:
if part in {"", "."}:
return None
part_path = Path(part)
if part_path.is_absolute() or part_path.drive:
return None
if any(segment in {"", ".", ".."} for segment in part_path.parts):
return None
normalized_parts.extend(part_path.parts)
if not normalized_parts:
return None
base_path = Path(settings.BROWSER_SESSION_BASE_PATH).resolve()
candidate = base_path.joinpath(*normalized_parts).resolve()
try:
candidate.relative_to(base_path)
except ValueError:
return None
return candidate
async def save_legacy_file( async def save_legacy_file(
self, *, organization_id: str, filename: str, fileObj: BinaryIO self, *, organization_id: str, filename: str, fileObj: BinaryIO
) -> tuple[str, str] | None: ) -> tuple[str, str] | None:

View file

@ -2239,6 +2239,7 @@ class AgentDB:
workflow_id: str, workflow_id: str,
organization_id: str, organization_id: str,
browser_session_id: str | None = None, browser_session_id: str | None = None,
browser_profile_id: str | None = None,
proxy_location: ProxyLocation | None = None, proxy_location: ProxyLocation | None = None,
webhook_callback_url: str | None = None, webhook_callback_url: str | None = None,
totp_verification_url: str | None = None, totp_verification_url: str | None = None,
@ -2260,6 +2261,7 @@ class AgentDB:
workflow_id=workflow_id, workflow_id=workflow_id,
organization_id=organization_id, organization_id=organization_id,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=browser_profile_id,
proxy_location=proxy_location, proxy_location=proxy_location,
status="created", status="created",
webhook_callback_url=webhook_callback_url, webhook_callback_url=webhook_callback_url,

View file

@ -304,6 +304,7 @@ class WorkflowRunModel(Base):
parent_workflow_run_id = Column(String, nullable=True, index=True) parent_workflow_run_id = Column(String, nullable=True, index=True)
organization_id = Column(String, nullable=False, index=True) organization_id = Column(String, nullable=False, index=True)
browser_session_id = Column(String, nullable=True, index=True) browser_session_id = Column(String, nullable=True, index=True)
browser_profile_id = Column(String, nullable=True, index=True)
status = Column(String, nullable=False) status = Column(String, nullable=False)
failure_reason = Column(String) failure_reason = Column(String)
proxy_location = Column(String) proxy_location = Column(String)

View file

@ -309,6 +309,7 @@ def convert_to_workflow_run(
workflow_id=workflow_run_model.workflow_id, workflow_id=workflow_run_model.workflow_id,
organization_id=workflow_run_model.organization_id, organization_id=workflow_run_model.organization_id,
browser_session_id=workflow_run_model.browser_session_id, browser_session_id=workflow_run_model.browser_session_id,
browser_profile_id=workflow_run_model.browser_profile_id,
status=WorkflowRunStatus[workflow_run_model.status], status=WorkflowRunStatus[workflow_run_model.status],
failure_reason=workflow_run_model.failure_reason, failure_reason=workflow_run_model.failure_reason,
proxy_location=( proxy_location=(

View file

@ -1,4 +1,5 @@
from skyvern.forge.sdk.routes import agent_protocol # noqa: F401 from skyvern.forge.sdk.routes import agent_protocol # noqa: F401
from skyvern.forge.sdk.routes import browser_profiles # noqa: F401
from skyvern.forge.sdk.routes import browser_sessions # noqa: F401 from skyvern.forge.sdk.routes import browser_sessions # noqa: F401
from skyvern.forge.sdk.routes import credentials # noqa: F401 from skyvern.forge.sdk.routes import credentials # noqa: F401
from skyvern.forge.sdk.routes import debug_sessions # noqa: F401 from skyvern.forge.sdk.routes import debug_sessions # noqa: F401

View file

@ -348,6 +348,7 @@ async def run_workflow(
totp_identifier=workflow_run_request.totp_identifier, totp_identifier=workflow_run_request.totp_identifier,
totp_verification_url=workflow_run_request.totp_url, totp_verification_url=workflow_run_request.totp_url,
browser_session_id=workflow_run_request.browser_session_id, browser_session_id=workflow_run_request.browser_session_id,
browser_profile_id=workflow_run_request.browser_profile_id,
max_screenshot_scrolls=workflow_run_request.max_screenshot_scrolls, max_screenshot_scrolls=workflow_run_request.max_screenshot_scrolls,
extra_http_headers=workflow_run_request.extra_http_headers, extra_http_headers=workflow_run_request.extra_http_headers,
browser_address=workflow_run_request.browser_address, browser_address=workflow_run_request.browser_address,

View file

@ -0,0 +1,387 @@
import asyncio
from typing import NoReturn
import structlog
from fastapi import Depends, HTTPException, Path, Query, status
from sqlalchemy.exc import IntegrityError
from skyvern.exceptions import (
BrowserProfileNotFound,
BrowserSessionNotFound,
WorkflowNotFound,
WorkflowRunNotFound,
)
from skyvern.forge import app
from skyvern.forge.sdk.routes.routers import base_router
from skyvern.forge.sdk.schemas.browser_profiles import (
BrowserProfile,
CreateBrowserProfileRequest,
)
from skyvern.forge.sdk.schemas.organizations import Organization
from skyvern.forge.sdk.services import org_auth_service
LOG = structlog.get_logger()
def _handle_duplicate_profile_name(*, organization_id: str, name: str, exc: IntegrityError) -> NoReturn:
LOG.warning(
"Duplicate browser profile name",
organization_id=organization_id,
name=name,
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"A browser profile named '{name}' already exists. Use a different name or delete the existing profile.",
) from exc
@base_router.post(
"/browser_profiles",
response_model=BrowserProfile,
tags=["Browser Profiles"],
summary="Create a browser profile",
)
@base_router.post(
"/browser_profiles/",
response_model=BrowserProfile,
include_in_schema=False,
)
async def create_browser_profile(
request: CreateBrowserProfileRequest,
current_org: Organization = Depends(org_auth_service.get_current_org),
) -> BrowserProfile:
organization_id = current_org.organization_id
LOG.info(
"Creating browser profile",
organization_id=organization_id,
browser_session_id=request.browser_session_id,
workflow_run_id=request.workflow_run_id,
)
if request.browser_session_id:
browser_session_id = request.browser_session_id
return await _create_profile_from_session(
organization_id=organization_id,
name=request.name,
description=request.description,
browser_session_id=browser_session_id,
)
workflow_run_id = request.workflow_run_id
assert workflow_run_id is not None # model validator guarantees one of the sources
return await _create_profile_from_workflow_run(
organization_id=organization_id,
name=request.name,
description=request.description,
workflow_run_id=workflow_run_id,
)
@base_router.get(
"/browser_profiles",
response_model=list[BrowserProfile],
tags=["Browser Profiles"],
summary="List browser profiles",
description="Get all browser profiles for the organization",
)
@base_router.get(
"/browser_profiles/",
response_model=list[BrowserProfile],
include_in_schema=False,
)
async def list_browser_profiles(
include_deleted: bool = Query(default=False, description="Include deleted browser profiles"),
current_org: Organization = Depends(org_auth_service.get_current_org),
) -> list[BrowserProfile]:
"""List all browser profiles for the current organization."""
organization_id = current_org.organization_id
LOG.info(
"Listing browser profiles",
organization_id=organization_id,
include_deleted=include_deleted,
)
profiles = await app.DATABASE.list_browser_profiles(
organization_id=organization_id,
include_deleted=include_deleted,
)
LOG.info(
"Listed browser profiles",
organization_id=organization_id,
count=len(profiles),
)
return profiles
@base_router.get(
"/browser_profiles/{profile_id}",
response_model=BrowserProfile,
tags=["Browser Profiles"],
summary="Get browser profile",
description="Get a specific browser profile by ID",
responses={
200: {"description": "Successfully retrieved browser profile"},
404: {"description": "Browser profile not found"},
},
)
@base_router.get(
"/browser_profiles/{profile_id}/",
response_model=BrowserProfile,
include_in_schema=False,
)
async def get_browser_profile(
profile_id: str = Path(
...,
description="The ID of the browser profile. browser_profile_id starts with `bp_`",
examples=["bp_123456"],
),
current_org: Organization = Depends(org_auth_service.get_current_org),
) -> BrowserProfile:
"""Get a browser profile for the current organization."""
organization_id = current_org.organization_id
LOG.info(
"Getting browser profile",
organization_id=organization_id,
browser_profile_id=profile_id,
)
profile = await app.DATABASE.get_browser_profile(
profile_id=profile_id,
organization_id=organization_id,
)
if not profile:
LOG.warning(
"Browser profile not found",
organization_id=organization_id,
browser_profile_id=profile_id,
)
raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id)
LOG.info(
"Retrieved browser profile",
organization_id=organization_id,
browser_profile_id=profile_id,
)
return profile
@base_router.delete(
"/browser_profiles/{profile_id}",
status_code=status.HTTP_204_NO_CONTENT,
tags=["Browser Profiles"],
summary="Delete browser profile",
description="Delete a browser profile (soft delete)",
responses={
204: {"description": "Successfully deleted browser profile"},
404: {"description": "Browser profile not found"},
},
)
@base_router.delete(
"/browser_profiles/{profile_id}/",
status_code=status.HTTP_204_NO_CONTENT,
include_in_schema=False,
)
async def delete_browser_profile(
profile_id: str = Path(
...,
description="The ID of the browser profile to delete. browser_profile_id starts with `bp_`",
examples=["bp_123456"],
),
current_org: Organization = Depends(org_auth_service.get_current_org),
) -> None:
"""Delete a browser profile for the current organization."""
organization_id = current_org.organization_id
LOG.info(
"Deleting browser profile",
organization_id=organization_id,
browser_profile_id=profile_id,
)
try:
await app.DATABASE.delete_browser_profile(
profile_id=profile_id,
organization_id=organization_id,
)
except BrowserProfileNotFound:
LOG.warning(
"Browser profile not found for deletion",
organization_id=organization_id,
browser_profile_id=profile_id,
)
raise
LOG.info(
"Deleted browser profile",
organization_id=organization_id,
browser_profile_id=profile_id,
)
async def _create_profile_from_session(
*,
organization_id: str,
name: str,
description: str | None,
browser_session_id: str,
) -> BrowserProfile:
browser_state = await app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id, organization_id)
if browser_state is None:
LOG.warning(
"Browser session not found for profile creation",
organization_id=organization_id,
browser_session_id=browser_session_id,
)
raise BrowserSessionNotFound(browser_session_id)
session_dir = browser_state.browser_artifacts.browser_session_dir
if not session_dir:
LOG.warning(
"Browser session has no persisted data",
organization_id=organization_id,
browser_session_id=browser_session_id,
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Browser session does not have persisted data to store",
)
try:
profile = await app.DATABASE.create_browser_profile(
organization_id=organization_id,
name=name,
description=description,
)
except IntegrityError as exc:
_handle_duplicate_profile_name(organization_id=organization_id, name=name, exc=exc)
try:
await app.STORAGE.store_browser_profile(
organization_id=organization_id,
profile_id=profile.browser_profile_id,
directory=session_dir,
)
except Exception:
# Rollback: delete the profile if storage fails
await app.DATABASE.delete_browser_profile(profile.browser_profile_id, organization_id=organization_id)
LOG.error(
"Failed to store browser profile artifacts, rolled back profile creation",
organization_id=organization_id,
browser_profile_id=profile.browser_profile_id,
exc_info=True,
)
raise
LOG.info(
"Created browser profile from session",
organization_id=organization_id,
browser_profile_id=profile.browser_profile_id,
browser_session_id=browser_session_id,
)
return profile
async def _create_profile_from_workflow_run(
*,
organization_id: str,
name: str,
description: str | None,
workflow_run_id: str,
) -> BrowserProfile:
workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id, organization_id=organization_id)
if not workflow_run:
LOG.warning(
"Workflow run not found for profile creation",
organization_id=organization_id,
workflow_run_id=workflow_run_id,
)
raise WorkflowRunNotFound(workflow_run_id)
workflow = await app.DATABASE.get_workflow(
workflow_id=workflow_run.workflow_id,
organization_id=organization_id,
)
if not workflow:
LOG.warning(
"Workflow not found for profile creation",
organization_id=organization_id,
workflow_id=workflow_run.workflow_id,
workflow_permanent_id=workflow_run.workflow_permanent_id,
)
raise WorkflowNotFound(workflow_id=workflow_run.workflow_id)
if not getattr(workflow, "persist_browser_session", False):
LOG.warning(
"Workflow does not persist browser sessions",
organization_id=organization_id,
workflow_run_id=workflow_run_id,
workflow_permanent_id=workflow.workflow_permanent_id,
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Workflow does not persist browser sessions",
)
# The session persistence task runs asynchronously after workflow completion.
# Poll for a short grace period so that immediate profile-creation requests
# succeed without forcing clients to implement retry loops.
poll_attempts = 30 # ~30 s max wait
session_dir: str | None = None
for attempt in range(poll_attempts):
session_dir = await app.STORAGE.retrieve_browser_session(
organization_id=organization_id,
workflow_permanent_id=workflow.workflow_permanent_id,
)
if session_dir:
break # session found
# Avoid busy-waiting; sleep 1 s between attempts (non-blocking asyncio sleep)
if attempt < poll_attempts - 1:
await asyncio.sleep(1)
if not session_dir:
LOG.warning(
"Workflow run has no persisted session after waiting",
organization_id=organization_id,
workflow_run_id=workflow_run_id,
)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Workflow run does not have a persisted session",
)
try:
profile = await app.DATABASE.create_browser_profile(
organization_id=organization_id,
name=name,
description=description,
)
except IntegrityError as exc:
_handle_duplicate_profile_name(organization_id=organization_id, name=name, exc=exc)
try:
await app.STORAGE.store_browser_profile(
organization_id=organization_id,
profile_id=profile.browser_profile_id,
directory=session_dir,
)
LOG.info(
"Created browser profile from workflow run",
organization_id=organization_id,
browser_profile_id=profile.browser_profile_id,
workflow_run_id=workflow_run_id,
)
except Exception:
# Rollback: delete the profile if storage fails
await app.DATABASE.delete_browser_profile(profile.browser_profile_id, organization_id=organization_id)
LOG.error(
"Failed to store browser profile artifacts, rolled back profile creation",
organization_id=organization_id,
browser_profile_id=profile.browser_profile_id,
workflow_run_id=workflow_run_id,
exc_info=True,
)
raise
return profile

View file

@ -201,6 +201,7 @@ async def login(
totp_identifier=login_request.totp_identifier, totp_identifier=login_request.totp_identifier,
totp_verification_url=totp_verification_url, totp_verification_url=totp_verification_url,
browser_session_id=login_request.browser_session_id, browser_session_id=login_request.browser_session_id,
browser_profile_id=login_request.browser_profile_id,
browser_address=login_request.browser_address, browser_address=login_request.browser_address,
max_screenshot_scrolls=login_request.max_screenshot_scrolling_times, max_screenshot_scrolls=login_request.max_screenshot_scrolling_times,
extra_http_headers=login_request.extra_http_headers, extra_http_headers=login_request.extra_http_headers,
@ -236,8 +237,10 @@ async def login(
totp_url=totp_verification_url, totp_url=totp_verification_url,
totp_identifier=login_request.totp_identifier, totp_identifier=login_request.totp_identifier,
browser_session_id=login_request.browser_session_id, browser_session_id=login_request.browser_session_id,
browser_profile_id=login_request.browser_profile_id,
max_screenshot_scrolls=login_request.max_screenshot_scrolling_times, max_screenshot_scrolls=login_request.max_screenshot_scrolling_times,
), ),
app_url=f"{settings.SKYVERN_APP_URL.rstrip('/')}/runs/{workflow_run.workflow_run_id}", app_url=f"{settings.SKYVERN_APP_URL.rstrip('/')}/runs/{workflow_run.workflow_run_id}",
browser_session_id=login_request.browser_session_id, browser_session_id=login_request.browser_session_id,
browser_profile_id=login_request.browser_profile_id,
) )

View file

@ -1,6 +1,6 @@
from datetime import datetime from datetime import datetime
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict, Field, model_validator
class BrowserProfile(BaseModel): class BrowserProfile(BaseModel):
@ -13,3 +13,20 @@ class BrowserProfile(BaseModel):
created_at: datetime created_at: datetime
modified_at: datetime modified_at: datetime
deleted_at: datetime | None = None deleted_at: datetime | None = None
class CreateBrowserProfileRequest(BaseModel):
name: str = Field(..., description="Name for the browser profile")
description: str | None = Field(None, description="Optional profile description")
browser_session_id: str | None = Field(
default=None, description="Persistent browser session to convert into a profile"
)
workflow_run_id: str | None = Field(
default=None, description="Workflow run whose persisted session should be captured"
)
@model_validator(mode="after")
def _validate_source(self) -> "CreateBrowserProfileRequest":
if bool(self.browser_session_id) == bool(self.workflow_run_id):
raise ValueError("Provide either browser_session_id or workflow_run_id")
return self

View file

@ -645,7 +645,10 @@ class BaseTaskBlock(Block):
# the first task block will create the browser state and do the navigation # the first task block will create the browser state and do the navigation
try: try:
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id workflow_run=workflow_run,
url=self.url,
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
working_page = await browser_state.get_working_page() working_page = await browser_state.get_working_page()
if not working_page: if not working_page:
@ -1586,6 +1589,7 @@ async def wrapper():
workflow_run=workflow_run, workflow_run=workflow_run,
url=None, # Code block doesn't need to navigate to a URL initially url=None, # Code block doesn't need to navigate to a URL initially
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
# Ensure the browser state has a working page # Ensure the browser state has a working page
await browser_state.check_and_fix_state( await browser_state.check_and_fix_state(
@ -1595,6 +1599,7 @@ async def wrapper():
organization_id=workflow_run.organization_id, organization_id=workflow_run.organization_id,
extra_http_headers=workflow_run.extra_http_headers, extra_http_headers=workflow_run.extra_http_headers,
browser_address=workflow_run.browser_address, browser_address=workflow_run.browser_address,
browser_profile_id=workflow_run.browser_profile_id,
) )
except Exception as e: except Exception as e:
LOG.exception( LOG.exception(

View file

@ -2,7 +2,7 @@ from datetime import datetime
from enum import StrEnum from enum import StrEnum
from typing import Any, List from typing import Any, List
from pydantic import BaseModel, field_validator from pydantic import BaseModel, field_validator, model_validator
from typing_extensions import deprecated from typing_extensions import deprecated
from skyvern.forge.sdk.schemas.files import FileInfo from skyvern.forge.sdk.schemas.files import FileInfo
@ -23,6 +23,7 @@ class WorkflowRequestBody(BaseModel):
totp_verification_url: str | None = None totp_verification_url: str | None = None
totp_identifier: str | None = None totp_identifier: str | None = None
browser_session_id: str | None = None browser_session_id: str | None = None
browser_profile_id: str | None = None
max_screenshot_scrolls: int | None = None max_screenshot_scrolls: int | None = None
extra_http_headers: dict[str, str] | None = None extra_http_headers: dict[str, str] | None = None
browser_address: str | None = None browser_address: str | None = None
@ -36,6 +37,12 @@ class WorkflowRequestBody(BaseModel):
return None return None
return validate_url(url) return validate_url(url)
@model_validator(mode="after")
def validate_browser_reference(cls, values: "WorkflowRequestBody") -> "WorkflowRequestBody":
if values.browser_session_id and values.browser_profile_id:
raise ValueError("Cannot specify both browser_session_id and browser_profile_id")
return values
@deprecated("Use WorkflowRunResponse instead") @deprecated("Use WorkflowRunResponse instead")
class RunWorkflowResponse(BaseModel): class RunWorkflowResponse(BaseModel):
@ -130,6 +137,7 @@ class WorkflowRun(BaseModel):
workflow_permanent_id: str workflow_permanent_id: str
organization_id: str organization_id: str
browser_session_id: str | None = None browser_session_id: str | None = None
browser_profile_id: str | None = None
debug_session_id: str | None = None debug_session_id: str | None = None
status: WorkflowRunStatus status: WorkflowRunStatus
extra_http_headers: dict[str, str] | None = None extra_http_headers: dict[str, str] | None = None
@ -199,6 +207,7 @@ class WorkflowRunResponseBase(BaseModel):
task_v2: TaskV2 | None = None task_v2: TaskV2 | None = None
workflow_title: str | None = None workflow_title: str | None = None
browser_session_id: str | None = None browser_session_id: str | None = None
browser_profile_id: str | None = None
max_screenshot_scrolls: int | None = None max_screenshot_scrolls: int | None = None
browser_address: str | None = None browser_address: str | None = None
script_run: ScriptRunResponse | None = None script_run: ScriptRunResponse | None = None

View file

@ -20,6 +20,7 @@ from skyvern.config import settings
from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT
from skyvern.exceptions import ( from skyvern.exceptions import (
BlockNotFound, BlockNotFound,
BrowserProfileNotFound,
BrowserSessionNotFound, BrowserSessionNotFound,
CannotUpdateWorkflowDueToCodeCache, CannotUpdateWorkflowDueToCodeCache,
FailedToSendWebhook, FailedToSendWebhook,
@ -540,6 +541,7 @@ class WorkflowService:
) )
workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id) workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id)
workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id=workflow_run.workflow_permanent_id) workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id=workflow_run.workflow_permanent_id)
browser_profile_id = workflow_run.browser_profile_id
close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address
# Set workflow run status to running, create workflow run parameters # Set workflow run status to running, create workflow run parameters
@ -608,12 +610,14 @@ class WorkflowService:
) )
return workflow_run return workflow_run
browser_session = await self.auto_create_browser_session_if_needed( browser_session = None
organization.organization_id, if not browser_profile_id:
workflow, browser_session = await self.auto_create_browser_session_if_needed(
browser_session_id=browser_session_id, organization.organization_id,
proxy_location=workflow_run.proxy_location, workflow,
) browser_session_id=browser_session_id,
proxy_location=workflow_run.proxy_location,
)
if browser_session: if browser_session:
browser_session_id = browser_session.persistent_browser_session_id browser_session_id = browser_session.persistent_browser_session_id
@ -640,6 +644,7 @@ class WorkflowService:
workflow_run=workflow_run, workflow_run=workflow_run,
organization=organization, organization=organization,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=browser_profile_id,
block_labels=block_labels, block_labels=block_labels,
block_outputs=block_outputs, block_outputs=block_outputs,
workflow_script=workflow_script, workflow_script=workflow_script,
@ -688,6 +693,7 @@ class WorkflowService:
workflow_run: WorkflowRun, workflow_run: WorkflowRun,
organization: Organization, organization: Organization,
browser_session_id: str | None = None, browser_session_id: str | None = None,
browser_profile_id: str | None = None,
block_labels: list[str] | None = None, block_labels: list[str] | None = None,
block_outputs: dict[str, Any] | None = None, block_outputs: dict[str, Any] | None = None,
workflow_script: WorkflowScript | None = None, workflow_script: WorkflowScript | None = None,
@ -1669,7 +1675,7 @@ class WorkflowService:
debug_session_id: str | None = None, debug_session_id: str | None = None,
code_gen: bool | None = None, code_gen: bool | None = None,
) -> WorkflowRun: ) -> WorkflowRun:
# validate the browser session id # validate the browser session or profile id
if workflow_request.browser_session_id: if workflow_request.browser_session_id:
browser_session = await app.DATABASE.get_persistent_browser_session( browser_session = await app.DATABASE.get_persistent_browser_session(
session_id=workflow_request.browser_session_id, session_id=workflow_request.browser_session_id,
@ -1678,11 +1684,23 @@ class WorkflowService:
if not browser_session: if not browser_session:
raise BrowserSessionNotFound(browser_session_id=workflow_request.browser_session_id) raise BrowserSessionNotFound(browser_session_id=workflow_request.browser_session_id)
if workflow_request.browser_profile_id:
browser_profile = await app.DATABASE.get_browser_profile(
workflow_request.browser_profile_id,
organization_id=organization_id,
)
if not browser_profile:
raise BrowserProfileNotFound(
profile_id=workflow_request.browser_profile_id,
organization_id=organization_id,
)
return await app.DATABASE.create_workflow_run( return await app.DATABASE.create_workflow_run(
workflow_permanent_id=workflow_permanent_id, workflow_permanent_id=workflow_permanent_id,
workflow_id=workflow_id, workflow_id=workflow_id,
organization_id=organization_id, organization_id=organization_id,
browser_session_id=workflow_request.browser_session_id, browser_session_id=workflow_request.browser_session_id,
browser_profile_id=workflow_request.browser_profile_id,
proxy_location=workflow_request.proxy_location, proxy_location=workflow_request.proxy_location,
webhook_callback_url=workflow_request.webhook_callback_url, webhook_callback_url=workflow_request.webhook_callback_url,
totp_verification_url=workflow_request.totp_verification_url, totp_verification_url=workflow_request.totp_verification_url,
@ -2224,6 +2242,7 @@ class WorkflowService:
total_cost=total_cost, total_cost=total_cost,
workflow_title=workflow.title, workflow_title=workflow.title,
browser_session_id=workflow_run.browser_session_id, browser_session_id=workflow_run.browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls,
task_v2=task_v2, task_v2=task_v2,
browser_address=workflow_run.browser_address, browser_address=workflow_run.browser_address,

View file

@ -5,7 +5,7 @@ from enum import StrEnum
from typing import Annotated, Any, Literal, Union from typing import Annotated, Any, Literal, Union
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator, model_validator
from skyvern.forge.sdk.schemas.files import FileInfo from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.schemas.docs.doc_examples import ( from skyvern.schemas.docs.doc_examples import (
@ -387,6 +387,10 @@ class WorkflowRunRequest(BaseModel):
default=None, default=None,
description="ID of a Skyvern browser session to reuse, having it continue from the current screen state", description="ID of a Skyvern browser session to reuse, having it continue from the current screen state",
) )
browser_profile_id: str | None = Field(
default=None,
description="ID of a browser profile to reuse for this workflow run",
)
max_screenshot_scrolls: int | None = Field( max_screenshot_scrolls: int | None = Field(
default=None, default=None,
description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot.", description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot.",
@ -416,6 +420,12 @@ class WorkflowRunRequest(BaseModel):
return None return None
return validate_url(url) return validate_url(url)
@model_validator(mode="after")
def validate_browser_reference(cls, values: WorkflowRunRequest) -> WorkflowRunRequest:
if values.browser_session_id and values.browser_profile_id:
raise ValueError("Cannot specify both browser_session_id and browser_profile_id")
return values
class BlockRunRequest(WorkflowRunRequest): class BlockRunRequest(WorkflowRunRequest):
block_labels: list[str] = Field( block_labels: list[str] = Field(
@ -477,6 +487,11 @@ class BaseRunResponse(BaseModel):
browser_session_id: str | None = Field( browser_session_id: str | None = Field(
default=None, description="ID of the Skyvern persistent browser session used for this run", examples=["pbs_123"] default=None, description="ID of the Skyvern persistent browser session used for this run", examples=["pbs_123"]
) )
browser_profile_id: str | None = Field(
default=None,
description="ID of the browser profile used for this run",
examples=["bp_123"],
)
max_screenshot_scrolls: int | None = Field( max_screenshot_scrolls: int | None = Field(
default=None, default=None,
description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot", description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot",

View file

@ -32,6 +32,7 @@ async def ensure_workflow_run(
totp_identifier=block_run_request.totp_identifier, totp_identifier=block_run_request.totp_identifier,
totp_verification_url=block_run_request.totp_url, totp_verification_url=block_run_request.totp_url,
browser_session_id=block_run_request.browser_session_id, browser_session_id=block_run_request.browser_session_id,
browser_profile_id=block_run_request.browser_profile_id,
max_screenshot_scrolls=block_run_request.max_screenshot_scrolls, max_screenshot_scrolls=block_run_request.max_screenshot_scrolls,
extra_http_headers=block_run_request.extra_http_headers, extra_http_headers=block_run_request.extra_http_headers,
) )

View file

@ -553,7 +553,9 @@ async def run_task_v2_helper(
current_url: str | None = None current_url: str | None = None
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, browser_session_id=browser_session_id workflow_run=workflow_run,
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
page = await browser_state.get_working_page() page = await browser_state.get_working_page()
@ -609,7 +611,9 @@ async def run_task_v2_helper(
# Always ensure browser_state is available at the start of the loop # Always ensure browser_state is available at the start of the loop
fallback_url = settings.TASK_BLOCKED_SITE_FALLBACK_URL fallback_url = settings.TASK_BLOCKED_SITE_FALLBACK_URL
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
workflow_run=workflow_run, browser_session_id=browser_session_id workflow_run=workflow_run,
browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
fallback_occurred = False fallback_occurred = False
@ -623,6 +627,7 @@ async def run_task_v2_helper(
script_id=task_v2.script_id, script_id=task_v2.script_id,
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=task_v2.extra_http_headers, extra_http_headers=task_v2.extra_http_headers,
browser_profile_id=workflow_run.browser_profile_id,
) )
else: else:
await browser_state.navigate_to_url(page, url) await browser_state.navigate_to_url(page, url)
@ -895,6 +900,7 @@ async def run_task_v2_helper(
workflow_run=workflow_run, workflow_run=workflow_run,
url=url, url=url,
browser_session_id=browser_session_id, browser_session_id=browser_session_id,
browser_profile_id=workflow_run.browser_profile_id,
) )
scraped_page = await scrape_website( scraped_page = await scrape_website(
browser_state, browser_state,

View file

@ -130,6 +130,7 @@ async def get_workflow_run_response(
app_url=app_url, app_url=app_url,
created_at=workflow_run.created_at, created_at=workflow_run.created_at,
modified_at=workflow_run.modified_at, modified_at=workflow_run.modified_at,
browser_profile_id=workflow_run.browser_profile_id,
run_request=WorkflowRunRequest( run_request=WorkflowRunRequest(
workflow_id=workflow_run.workflow_permanent_id, workflow_id=workflow_run.workflow_permanent_id,
title=workflow_run_resp.workflow_title, title=workflow_run_resp.workflow_title,
@ -140,6 +141,7 @@ async def get_workflow_run_response(
totp_identifier=workflow_run.totp_identifier, totp_identifier=workflow_run.totp_identifier,
max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls,
browser_address=workflow_run.browser_address, browser_address=workflow_run.browser_address,
browser_profile_id=workflow_run.browser_profile_id,
# TODO: add browser session id # TODO: add browser session id
), ),
errors=workflow_run_resp.errors, errors=workflow_run_resp.errors,

View file

@ -664,6 +664,7 @@ class BrowserState:
organization_id: str | None = None, organization_id: str | None = None,
extra_http_headers: dict[str, str] | None = None, extra_http_headers: dict[str, str] | None = None,
browser_address: str | None = None, browser_address: str | None = None,
browser_profile_id: str | None = None,
) -> None: ) -> None:
if self.browser_context is None: if self.browser_context is None:
LOG.info("creating browser context") LOG.info("creating browser context")
@ -681,6 +682,7 @@ class BrowserState:
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=extra_http_headers, extra_http_headers=extra_http_headers,
browser_address=browser_address, browser_address=browser_address,
browser_profile_id=browser_profile_id,
) )
self.browser_context = browser_context self.browser_context = browser_context
self.browser_artifacts = browser_artifacts self.browser_artifacts = browser_artifacts
@ -844,6 +846,7 @@ class BrowserState:
organization_id: str | None = None, organization_id: str | None = None,
extra_http_headers: dict[str, str] | None = None, extra_http_headers: dict[str, str] | None = None,
browser_address: str | None = None, browser_address: str | None = None,
browser_profile_id: str | None = None,
) -> Page: ) -> Page:
page = await self.get_working_page() page = await self.get_working_page()
if page is not None: if page is not None:
@ -859,6 +862,7 @@ class BrowserState:
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=extra_http_headers, extra_http_headers=extra_http_headers,
browser_address=browser_address, browser_address=browser_address,
browser_profile_id=browser_profile_id,
) )
except Exception as e: except Exception as e:
error_message = str(e) error_message = str(e)
@ -876,6 +880,7 @@ class BrowserState:
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=extra_http_headers, extra_http_headers=extra_http_headers,
browser_address=browser_address, browser_address=browser_address,
browser_profile_id=browser_profile_id,
) )
page = await self.__assert_page() page = await self.__assert_page()
@ -892,6 +897,7 @@ class BrowserState:
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=extra_http_headers, extra_http_headers=extra_http_headers,
browser_address=browser_address, browser_address=browser_address,
browser_profile_id=browser_profile_id,
) )
page = await self.__assert_page() page = await self.__assert_page()
return page return page

View file

@ -34,6 +34,7 @@ class BrowserManager:
organization_id: str | None = None, organization_id: str | None = None,
extra_http_headers: dict[str, str] | None = None, extra_http_headers: dict[str, str] | None = None,
browser_address: str | None = None, browser_address: str | None = None,
browser_profile_id: str | None = None,
) -> BrowserState: ) -> BrowserState:
pw = await async_playwright().start() pw = await async_playwright().start()
( (
@ -50,6 +51,7 @@ class BrowserManager:
organization_id=organization_id, organization_id=organization_id,
extra_http_headers=extra_http_headers, extra_http_headers=extra_http_headers,
browser_address=browser_address, browser_address=browser_address,
browser_profile_id=browser_profile_id,
) )
return BrowserState( return BrowserState(
pw=pw, pw=pw,
@ -145,9 +147,12 @@ class BrowserManager:
workflow_run: WorkflowRun, workflow_run: WorkflowRun,
url: str | None = None, url: str | None = None,
browser_session_id: str | None = None, browser_session_id: str | None = None,
browser_profile_id: str | None = None,
) -> BrowserState: ) -> BrowserState:
parent_workflow_run_id = workflow_run.parent_workflow_run_id parent_workflow_run_id = workflow_run.parent_workflow_run_id
workflow_run_id = workflow_run.workflow_run_id workflow_run_id = workflow_run.workflow_run_id
if browser_profile_id is None:
browser_profile_id = workflow_run.browser_profile_id
browser_state = self.get_for_workflow_run( browser_state = self.get_for_workflow_run(
workflow_run_id=workflow_run_id, parent_workflow_run_id=parent_workflow_run_id workflow_run_id=workflow_run_id, parent_workflow_run_id=parent_workflow_run_id
) )
@ -192,6 +197,7 @@ class BrowserManager:
organization_id=workflow_run.organization_id, organization_id=workflow_run.organization_id,
extra_http_headers=workflow_run.extra_http_headers, extra_http_headers=workflow_run.extra_http_headers,
browser_address=workflow_run.browser_address, browser_address=workflow_run.browser_address,
browser_profile_id=browser_profile_id,
) )
if browser_session_id: if browser_session_id:
@ -213,6 +219,7 @@ class BrowserManager:
organization_id=workflow_run.organization_id, organization_id=workflow_run.organization_id,
extra_http_headers=workflow_run.extra_http_headers, extra_http_headers=workflow_run.extra_http_headers,
browser_address=workflow_run.browser_address, browser_address=workflow_run.browser_address,
browser_profile_id=browser_profile_id,
) )
return browser_state return browser_state