mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-02 18:50:24 +00:00
remove the old context tree (#268)
This commit is contained in:
parent
bce6326eef
commit
8d87e71891
4 changed files with 10 additions and 20 deletions
|
@ -136,13 +136,11 @@ class BrowserState:
|
||||||
browser_context: BrowserContext | None = None,
|
browser_context: BrowserContext | None = None,
|
||||||
page: Page | None = None,
|
page: Page | None = None,
|
||||||
browser_artifacts: BrowserArtifacts = BrowserArtifacts(),
|
browser_artifacts: BrowserArtifacts = BrowserArtifacts(),
|
||||||
new_context_tree: bool = False,
|
|
||||||
):
|
):
|
||||||
self.pw = pw
|
self.pw = pw
|
||||||
self.browser_context = browser_context
|
self.browser_context = browser_context
|
||||||
self.page = page
|
self.page = page
|
||||||
self.browser_artifacts = browser_artifacts
|
self.browser_artifacts = browser_artifacts
|
||||||
self.new_context_tree = new_context_tree
|
|
||||||
|
|
||||||
async def _close_all_other_pages(self) -> None:
|
async def _close_all_other_pages(self) -> None:
|
||||||
if not self.browser_context or not self.page:
|
if not self.browser_context or not self.page:
|
||||||
|
|
|
@ -25,7 +25,6 @@ class BrowserManager:
|
||||||
async def _create_browser_state(
|
async def _create_browser_state(
|
||||||
proxy_location: ProxyLocation | None = None,
|
proxy_location: ProxyLocation | None = None,
|
||||||
url: str | None = None,
|
url: str | None = None,
|
||||||
new_context_tree: bool = False,
|
|
||||||
task_id: str | None = None,
|
task_id: str | None = None,
|
||||||
) -> BrowserState:
|
) -> BrowserState:
|
||||||
pw = await async_playwright().start()
|
pw = await async_playwright().start()
|
||||||
|
@ -40,7 +39,6 @@ class BrowserManager:
|
||||||
browser_context=browser_context,
|
browser_context=browser_context,
|
||||||
page=None,
|
page=None,
|
||||||
browser_artifacts=browser_artifacts,
|
browser_artifacts=browser_artifacts,
|
||||||
new_context_tree=new_context_tree,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_or_create_for_task(self, task: Task) -> BrowserState:
|
async def get_or_create_for_task(self, task: Task) -> BrowserState:
|
||||||
|
@ -55,9 +53,8 @@ class BrowserManager:
|
||||||
self.pages[task.task_id] = self.pages[task.workflow_run_id]
|
self.pages[task.task_id] = self.pages[task.workflow_run_id]
|
||||||
return self.pages[task.task_id]
|
return self.pages[task.task_id]
|
||||||
|
|
||||||
new_ctx = True
|
LOG.info("Creating browser state for task", task_id=task.task_id)
|
||||||
LOG.info("Creating browser state for task", task_id=task.task_id, new_ctx=new_ctx)
|
browser_state = await self._create_browser_state(task.proxy_location, task.url, task.task_id)
|
||||||
browser_state = await self._create_browser_state(task.proxy_location, task.url, new_ctx, task.task_id)
|
|
||||||
|
|
||||||
# The URL here is only used when creating a new page, and not when using an existing page.
|
# The URL here is only used when creating a new page, and not when using an existing page.
|
||||||
# This will make sure browser_state.page is not None.
|
# This will make sure browser_state.page is not None.
|
||||||
|
|
|
@ -544,7 +544,7 @@ function getListboxOptions(element) {
|
||||||
return selectOptions;
|
return selectOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildTreeFromBody(new_ctx = false) {
|
function buildTreeFromBody() {
|
||||||
var elements = [];
|
var elements = [];
|
||||||
var resultArray = [];
|
var resultArray = [];
|
||||||
|
|
||||||
|
@ -634,7 +634,6 @@ function buildTreeFromBody(new_ctx = false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
new_ctx &&
|
|
||||||
checkRequiredFromStyle(element) &&
|
checkRequiredFromStyle(element) &&
|
||||||
!attrs["required"] &&
|
!attrs["required"] &&
|
||||||
!attrs["aria-required"]
|
!attrs["aria-required"]
|
||||||
|
@ -712,7 +711,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||||
elements[interactableParentId].children.push(elementObj);
|
elements[interactableParentId].children.push(elementObj);
|
||||||
}
|
}
|
||||||
// options already added to the select.options, no need to add options anymore
|
// options already added to the select.options, no need to add options anymore
|
||||||
if (new_ctx && elementObj.options && elementObj.options.length > 0) {
|
if (elementObj.options && elementObj.options.length > 0) {
|
||||||
return elementObj;
|
return elementObj;
|
||||||
}
|
}
|
||||||
// Recursively process the children of the element
|
// Recursively process the children of the element
|
||||||
|
@ -744,7 +743,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||||
if (parentEle) {
|
if (parentEle) {
|
||||||
if (
|
if (
|
||||||
targetParentElements.has(parentEle.tagName.toLowerCase()) ||
|
targetParentElements.has(parentEle.tagName.toLowerCase()) ||
|
||||||
(new_ctx && checkParentClass(parentEle.className.toLowerCase()))
|
checkParentClass(parentEle.className.toLowerCase())
|
||||||
) {
|
) {
|
||||||
targetContextualParent = parentEle;
|
targetContextualParent = parentEle;
|
||||||
}
|
}
|
||||||
|
@ -939,7 +938,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||||
element.context = context;
|
element.context = context;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_ctx && checkStringIncludeRequire(context)) {
|
if (checkStringIncludeRequire(context)) {
|
||||||
if (
|
if (
|
||||||
!element.attributes["required"] &&
|
!element.attributes["required"] &&
|
||||||
!element.attributes["aria-required"]
|
!element.attributes["aria-required"]
|
||||||
|
@ -949,10 +948,6 @@ function buildTreeFromBody(new_ctx = false) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!new_ctx) {
|
|
||||||
return [elements, resultArray];
|
|
||||||
}
|
|
||||||
|
|
||||||
resultArray = removeOrphanNode(resultArray);
|
resultArray = removeOrphanNode(resultArray);
|
||||||
resultArray.forEach((root) => {
|
resultArray.forEach((root) => {
|
||||||
trimDuplicatedText(root);
|
trimDuplicatedText(root);
|
||||||
|
|
|
@ -184,7 +184,7 @@ async def scrape_web_unsafe(
|
||||||
await remove_bounding_boxes(page)
|
await remove_bounding_boxes(page)
|
||||||
await scroll_to_top(page, drow_boxes=False)
|
await scroll_to_top(page, drow_boxes=False)
|
||||||
|
|
||||||
elements, element_tree = await get_interactable_element_tree(page, browser_state.new_context_tree)
|
elements, element_tree = await get_interactable_element_tree(page)
|
||||||
element_tree = cleanup_elements(copy.deepcopy(element_tree))
|
element_tree = cleanup_elements(copy.deepcopy(element_tree))
|
||||||
|
|
||||||
_build_element_links(elements)
|
_build_element_links(elements)
|
||||||
|
@ -211,15 +211,15 @@ async def scrape_web_unsafe(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def get_interactable_element_tree(page: Page, new_context_tree: bool) -> tuple[list[dict], list[dict]]:
|
async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[dict]]:
|
||||||
"""
|
"""
|
||||||
Get the element tree of the page, including all the elements that are interactable.
|
Get the element tree of the page, including all the elements that are interactable.
|
||||||
:param page: Page instance to get the element tree from.
|
:param page: Page instance to get the element tree from.
|
||||||
:return: Tuple containing the element tree and a map of element IDs to elements.
|
:return: Tuple containing the element tree and a map of element IDs to elements.
|
||||||
"""
|
"""
|
||||||
await page.evaluate(JS_FUNCTION_DEFS)
|
await page.evaluate(JS_FUNCTION_DEFS)
|
||||||
js_script = "(new_ctx) => buildTreeFromBody(new_ctx)"
|
js_script = "() => buildTreeFromBody()"
|
||||||
elements, element_tree = await page.evaluate(js_script, new_context_tree)
|
elements, element_tree = await page.evaluate(js_script)
|
||||||
return elements, element_tree
|
return elements, element_tree
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue