mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-15 17:59:42 +00:00
fix task v2 block goto url issue (#1899)
This commit is contained in:
parent
e720177735
commit
fd934dcfe6
4 changed files with 45 additions and 18 deletions
|
@ -6,6 +6,7 @@ from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import structlog
|
import structlog
|
||||||
|
from playwright.async_api import Page
|
||||||
from sqlalchemy.exc import OperationalError
|
from sqlalchemy.exc import OperationalError
|
||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
|
@ -411,7 +412,15 @@ async def run_task_v2_helper(
|
||||||
task_history_record: dict[str, Any] = {}
|
task_history_record: dict[str, Any] = {}
|
||||||
context = skyvern_context.ensure_context()
|
context = skyvern_context.ensure_context()
|
||||||
|
|
||||||
if i == 0:
|
current_url: str | None = None
|
||||||
|
page: Page | None = None
|
||||||
|
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id, workflow_run.parent_workflow_run_id)
|
||||||
|
if browser_state:
|
||||||
|
page = await browser_state.get_working_page()
|
||||||
|
if page:
|
||||||
|
current_url = await SkyvernFrame.get_url(page)
|
||||||
|
|
||||||
|
if i == 0 and current_url != url:
|
||||||
# The first iteration is always a GOTO_URL task
|
# The first iteration is always a GOTO_URL task
|
||||||
task_type = "goto_url"
|
task_type = "goto_url"
|
||||||
plan = f"Go to this website: {url}"
|
plan = f"Go to this website: {url}"
|
||||||
|
@ -422,11 +431,12 @@ async def run_task_v2_helper(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
if browser_state is None:
|
||||||
workflow_run=workflow_run,
|
browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run(
|
||||||
url=url,
|
workflow_run=workflow_run,
|
||||||
browser_session_id=browser_session_id,
|
url=url,
|
||||||
)
|
browser_session_id=browser_session_id,
|
||||||
|
)
|
||||||
scraped_page = await scrape_website(
|
scraped_page = await scrape_website(
|
||||||
browser_state,
|
browser_state,
|
||||||
url,
|
url,
|
||||||
|
@ -434,15 +444,14 @@ async def run_task_v2_helper(
|
||||||
scrape_exclude=app.scrape_exclude,
|
scrape_exclude=app.scrape_exclude,
|
||||||
)
|
)
|
||||||
element_tree_in_prompt: str = scraped_page.build_element_tree(ElementTreeFormat.HTML)
|
element_tree_in_prompt: str = scraped_page.build_element_tree(ElementTreeFormat.HTML)
|
||||||
page = await browser_state.get_working_page()
|
if page is None:
|
||||||
|
page = await browser_state.get_working_page()
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(
|
LOG.exception(
|
||||||
"Failed to get browser state or scrape website in task v2 iteration", iteration=i, url=url
|
"Failed to get browser state or scrape website in task v2 iteration", iteration=i, url=url
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
current_url = str(
|
current_url = current_url if current_url else str(await SkyvernFrame.get_url(frame=page) if page else url)
|
||||||
await SkyvernFrame.evaluate(frame=page, expression="() => document.location.href") if page else url
|
|
||||||
)
|
|
||||||
|
|
||||||
task_v2_prompt = prompt_engine.load_prompt(
|
task_v2_prompt = prompt_engine.load_prompt(
|
||||||
"task_v2",
|
"task_v2",
|
||||||
|
|
|
@ -68,6 +68,7 @@ from skyvern.forge.sdk.workflow.models.parameter import (
|
||||||
WorkflowParameter,
|
WorkflowParameter,
|
||||||
)
|
)
|
||||||
from skyvern.webeye.browser_factory import BrowserState
|
from skyvern.webeye.browser_factory import BrowserState
|
||||||
|
from skyvern.webeye.utils.page import SkyvernFrame
|
||||||
|
|
||||||
LOG = structlog.get_logger()
|
LOG = structlog.get_logger()
|
||||||
|
|
||||||
|
@ -2144,6 +2145,15 @@ class TaskV2Block(Block):
|
||||||
from skyvern.forge.sdk.services import task_v2_service
|
from skyvern.forge.sdk.services import task_v2_service
|
||||||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus
|
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus
|
||||||
|
|
||||||
|
if not self.url:
|
||||||
|
browser_state = app.BROWSER_MANAGER.get_for_workflow_run(workflow_run_id)
|
||||||
|
if browser_state:
|
||||||
|
page = await browser_state.get_working_page()
|
||||||
|
if page:
|
||||||
|
current_url = await SkyvernFrame.get_url(frame=page)
|
||||||
|
if current_url != "about:blank":
|
||||||
|
self.url = current_url
|
||||||
|
|
||||||
if not organization_id:
|
if not organization_id:
|
||||||
raise ValueError("Running TaskV2Block requires organization_id")
|
raise ValueError("Running TaskV2Block requires organization_id")
|
||||||
|
|
||||||
|
|
|
@ -132,13 +132,11 @@ class BrowserManager:
|
||||||
) -> BrowserState:
|
) -> BrowserState:
|
||||||
parent_workflow_run_id = workflow_run.parent_workflow_run_id
|
parent_workflow_run_id = workflow_run.parent_workflow_run_id
|
||||||
workflow_run_id = workflow_run.workflow_run_id
|
workflow_run_id = workflow_run.workflow_run_id
|
||||||
browser_state = self.get_for_workflow_run(workflow_run_id=workflow_run_id)
|
browser_state = self.get_for_workflow_run(
|
||||||
if parent_workflow_run_id:
|
workflow_run_id=workflow_run_id, parent_workflow_run_id=parent_workflow_run_id
|
||||||
browser_state = self.get_for_workflow_run(workflow_run_id=parent_workflow_run_id)
|
)
|
||||||
if browser_state:
|
if browser_state:
|
||||||
self.pages[workflow_run_id] = browser_state
|
self.pages[workflow_run_id] = browser_state
|
||||||
|
|
||||||
if browser_state is not None:
|
|
||||||
return browser_state
|
return browser_state
|
||||||
|
|
||||||
if browser_session_id:
|
if browser_session_id:
|
||||||
|
@ -193,9 +191,15 @@ class BrowserManager:
|
||||||
)
|
)
|
||||||
return browser_state
|
return browser_state
|
||||||
|
|
||||||
def get_for_workflow_run(self, workflow_run_id: str) -> BrowserState | None:
|
def get_for_workflow_run(
|
||||||
|
self, workflow_run_id: str, parent_workflow_run_id: str | None = None
|
||||||
|
) -> BrowserState | None:
|
||||||
if workflow_run_id in self.pages:
|
if workflow_run_id in self.pages:
|
||||||
return self.pages[workflow_run_id]
|
return self.pages[workflow_run_id]
|
||||||
|
|
||||||
|
if parent_workflow_run_id and parent_workflow_run_id in self.pages:
|
||||||
|
return self.pages[parent_workflow_run_id]
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def set_video_artifact_for_task(self, task: Task, artifacts: list[VideoArtifact]) -> None:
|
def set_video_artifact_for_task(self, task: Task, artifacts: list[VideoArtifact]) -> None:
|
||||||
|
|
|
@ -46,6 +46,10 @@ class SkyvernFrame:
|
||||||
LOG.exception("Timeout to evaluate expression", expression=expression)
|
LOG.exception("Timeout to evaluate expression", expression=expression)
|
||||||
raise TimeoutError("timeout to evaluate expression")
|
raise TimeoutError("timeout to evaluate expression")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def get_url(frame: Page | Frame) -> str:
|
||||||
|
return await SkyvernFrame.evaluate(frame=frame, expression="() => document.location.href")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def take_screenshot(
|
async def take_screenshot(
|
||||||
page: Page,
|
page: Page,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue