From eb5220b0581443af4dd4804fbf3ce7e65115f698 Mon Sep 17 00:00:00 2001 From: Alessandro <155005371+3clyp50@users.noreply.github.com> Date: Sat, 2 May 2026 12:23:31 +0200 Subject: [PATCH] fix(browser): read content inside shadow DOM Teach the browser page-content helper to traverse open shadow roots and assigned slot nodes when collecting text, rendering list/inline children, and resolving selectors. This lets Agent Zero inspect modern component-heavy pages more accurately without depending only on light-DOM textContent. Bump the injected helper version so existing browser contexts can refresh to the new DOM traversal behavior. --- .../_browser/assets/browser-page-content.js | 93 +++++++++++++++++-- 1 file changed, 86 insertions(+), 7 deletions(-) diff --git a/plugins/_browser/assets/browser-page-content.js b/plugins/_browser/assets/browser-page-content.js index edf9d9403..d31d70fe3 100644 --- a/plugins/_browser/assets/browser-page-content.js +++ b/plugins/_browser/assets/browser-page-content.js @@ -1,7 +1,7 @@ (() => { const GLOBAL_KEY = "__spaceBrowserPageContent__"; const DOM_HELPER_KEY = "__spaceBrowserDomHelper__"; - const VERSION = "7"; + const VERSION = "8"; const BLOCK_TAGS = new Set([ "ADDRESS", "ARTICLE", @@ -518,6 +518,82 @@ return String(element?.tagName || "").toUpperCase(); } + function expandSlotNodes(node) { + if (!isElementNode(node) || getTagName(node) !== "SLOT" || typeof node.assignedNodes !== "function") { + return [node]; + } + + try { + const assignedNodes = [...(node.assignedNodes({ flatten: true }) || [])].filter(Boolean); + if (assignedNodes.length) { + return assignedNodes.flatMap((assignedNode) => expandSlotNodes(assignedNode)); + } + } catch { + // Fall through to the slot's fallback children. + } + + return [...(node.childNodes || [])].flatMap((childNode) => expandSlotNodes(childNode)); + } + + function getReadableChildNodes(element) { + const shadowRoot = element?.shadowRoot; + if (shadowRoot) { + const shadowNodes = [...(shadowRoot.childNodes || [])].flatMap((childNode) => expandSlotNodes(childNode)); + if (shadowNodes.length) { + return shadowNodes; + } + } + + return [...(element?.childNodes || [])].flatMap((childNode) => expandSlotNodes(childNode)); + } + + function getReadableElementChildren(element) { + return getReadableChildNodes(element).filter((childNode) => isElementNode(childNode)); + } + + function getReadableNodeText(node) { + if (isTextNode(node)) { + return node.textContent || ""; + } + + if (!isElementNode(node) || isHiddenElement(node)) { + return ""; + } + + return getReadableChildNodes(node) + .map((childNode) => getReadableNodeText(childNode)) + .filter(Boolean) + .join(" "); + } + + function querySelectorAllDeep(selector, root = globalThis.document) { + const results = []; + const seen = new Set(); + + const addResult = (element) => { + if (element && !seen.has(element)) { + seen.add(element); + results.push(element); + } + }; + + const visitRoot = (scope) => { + if (!scope || typeof scope.querySelectorAll !== "function") { + return; + } + + [...(scope.querySelectorAll(selector) || [])].forEach(addResult); + [...(scope.querySelectorAll("*") || [])].forEach((element) => { + if (element.shadowRoot) { + visitRoot(element.shadowRoot); + } + }); + }; + + visitRoot(root); + return results; + } + function getAttributeNamesSafe(element) { try { if (typeof element?.getAttributeNames === "function") { @@ -968,7 +1044,8 @@ } function getElementText(element) { - return normalizeText(element?.textContent || ""); + const readableText = normalizeText(getReadableNodeText(element)); + return readableText || normalizeText(element?.textContent || ""); } function collectLabelCandidates(element, options = {}) { @@ -1473,7 +1550,7 @@ function renderInlineChildren(element, context) { const parts = []; - element.childNodes.forEach((childNode) => { + getReadableChildNodes(element).forEach((childNode) => { const renderedChild = renderInlineNode(childNode, context); if (renderedChild) { parts.push(renderedChild); @@ -1522,7 +1599,7 @@ const inlineParts = []; const nestedBlocks = []; - element.childNodes.forEach((childNode) => { + getReadableChildNodes(element).forEach((childNode) => { if (isElementNode(childNode) && (getTagName(childNode) === "UL" || getTagName(childNode) === "OL")) { const nestedList = renderList(childNode, context, depth + 1); if (nestedList) { @@ -1548,7 +1625,7 @@ function renderList(element, context, depth = 0) { const ordered = getTagName(element) === "OL"; - return [...element.children] + return getReadableElementChildren(element) .filter((child) => getTagName(child) === "LI" && !isHiddenElement(child)) .map((item, index) => renderListItem(item, context, depth, index, ordered)) .filter(Boolean) @@ -1661,7 +1738,7 @@ } }; - element.childNodes.forEach((childNode) => { + getReadableChildNodes(element).forEach((childNode) => { if (isTextNode(childNode)) { const rawTextContent = normalizeText(childNode.textContent || ""); if (shouldDropReadableText(rawTextContent)) { @@ -1732,7 +1809,9 @@ items: selectors.map((selector) => { let targets = []; try { - targets = [...(doc?.querySelectorAll?.(selector) || [])]; + targets = doc === globalThis.document + ? querySelectorAllDeep(selector, doc) + : [...(doc?.querySelectorAll?.(selector) || [])]; } catch (error) { throw createNamedError( "BrowserPageContentSelectorError",