New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@browserbasehq/stagehand

Package Overview
Dependencies
Maintainers
0
Versions
101
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@browserbasehq/stagehand - npm Package Compare versions

Comparing version 1.7.0-alpha-b902192bc7ff8eb02c85150c1fe6f89c2a95b211 to 1.7.0-alpha-ba4ec335a5323648c6016cc480300fd58868311a

dist/evals/tasks/allrecipes.d.ts

10

dist/index.d.ts

@@ -151,7 +151,4 @@ import { Page, BrowserContext, Browser } from '@playwright/test';

sessionUrl: string;
sessionId: string;
}
interface InitResult {
debugUrl: string;
sessionUrl: string;
}
interface InitFromPageOptions {

@@ -186,2 +183,3 @@ page: Page;

domSettleTimeoutMs?: number;
useTextExtract?: boolean;
}

@@ -207,2 +205,3 @@ type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;

contextPath?: string;
sessionId?: string;
}

@@ -227,2 +226,3 @@

context: BrowserContext;
browserbaseSessionID?: string;
private env;

@@ -260,3 +260,3 @@ private apiKey;

act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>;
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, useTextExtract, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
observe(options?: ObserveOptions): Promise<ObserveResult[]>;

@@ -263,0 +263,0 @@ close(): Promise<void>;

@@ -1,1 +0,1 @@

export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";
export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";

@@ -18,1 +18,11 @@ export declare function isElementNode(node: Node): node is Element;

}>;
export declare function storeDOM(): string;
export declare function restoreDOM(storedDOM: string): void;
export declare function createTextBoundingBoxes(): void;
export declare function getElementBoundingBoxes(xpath: string): Array<{
text: string;
top: number;
left: number;
width: number;
height: number;
}>;

@@ -5,2 +5,71 @@ import { LLMProvider } from "../llm/LLMProvider";

import { LLMClient } from "../llm/LLMClient";
/**
* The `StagehandExtractHandler` class is responsible for extracting structured data from a webpage.
* It provides two approaches: `textExtract` and `domExtract`. `textExtract` is used by default.
*
* Here is what `textExtract` does at a high level:
*
* **1. Wait for the DOM to settle and start DOM debugging.**
* - Ensures the page is fully loaded and stable before extraction.
*
* **2. Store the original DOM before any mutations.**
* - Preserves the initial state of the DOM to restore later.
* - We do this because creating spans around every word in the DOM (see step 4)
* becomes very difficult to revert. Text nodes can be finicky, and directly
* removing the added spans often corrupts the structure of the DOM.
*
* **3. Process the DOM to generate a selector map of candidate elements.**
* - Identifies potential elements that contain the data to extract.
*
* **4. Create text bounding boxes around every word in the webpage.**
* - Wraps words in spans so that their bounding boxes can be used to
* determine their positions on the text-rendered-webpage.
*
* **5. Collect all text annotations (with positions and dimensions) from each of the candidate elements.**
* - Gathers text and positional data for each word.
*
* **6. Group annotations by text and deduplicate them based on proximity.**
* - There is no guarantee that the text annotations are unique (candidate elements can be nested).
* - Thus, we must remove duplicate words that are close to each other on the page.
*
* **7. Restore the original DOM after mutations.**
* - Returns the DOM to its original state after processing.
*
* **8. Format the deduplicated annotations into a text representation.**
* - Prepares the text data for the extraction process.
*
* **9. Pass the formatted text to an LLM for extraction according to the given instruction and schema.**
* - Uses a language model to extract structured data based on instructions.
*
* **10. Handle the extraction response and logging the results.**
* - Processes the output from the LLM and logs relevant information.
*
*
* Here is what `domExtract` does at a high level:
*
* **1. Wait for the DOM to settle and start DOM debugging.**
* - Ensures the page is fully loaded and stable before extraction.
*
* **2. Process the DOM in chunks.**
* - The `processDom` function:
* - Divides the page into vertical "chunks" based on viewport height.
* - Picks the next chunk that hasn't been processed yet.
* - Scrolls to that chunk and extracts candidate elements.
* - Returns `outputString` (HTML snippets of candidate elements),
* `selectorMap` (the XPaths of the candidate elements),
* `chunk` (the current chunk index), and `chunks` (the array of all chunk indices).
* - This chunk-based approach ensures that large or lengthy pages can be processed in smaller, manageable sections.
*
* **3. Pass the extracted DOM elements (in `outputString`) to the LLM for structured data extraction.**
* - Uses the instructions, schema, and previously extracted content as context to
* guide the LLM in extracting the structured data.
*
* **4. Check if extraction is complete.**
* - If the extraction is complete (all chunks have been processed or the LLM determines
* that we do not need to continue), return the final result.
* - If not, repeat steps 1-4 with the next chunk until extraction is complete or no more chunks remain.
*
* @remarks
* Each step corresponds to specific code segments, as noted in the comments throughout the code.
*/
export declare class StagehandExtractHandler {

@@ -35,3 +104,3 @@ private readonly stagehand;

});
extract<T extends z.AnyZodObject>({ instruction, schema, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, }: {
extract<T extends z.AnyZodObject>({ instruction, schema, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, useTextExtract, }: {
instruction: string;

@@ -44,3 +113,6 @@ schema: T;

domSettleTimeoutMs?: number;
useTextExtract?: boolean;
}): Promise<z.infer<T>>;
private textExtract;
private domExtract;
}

@@ -10,2 +10,3 @@ import { type BrowserContext, type Page } from "@playwright/test";

context: BrowserContext;
browserbaseSessionID?: string;
private env;

@@ -43,3 +44,3 @@ private apiKey;

act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>;
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, useTextExtract, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
observe(options?: ObserveOptions): Promise<ObserveResult[]>;

@@ -46,0 +47,0 @@ close(): Promise<void>;

@@ -8,3 +8,3 @@ import { z } from "zod";

export declare function act({ action, domElements, steps, llmClient, screenshot, retries, logger, requestId, variables, }: ActParams): Promise<ActResult | null>;
export declare function extract({ instruction, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, }: {
export declare function extract({ instruction, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, isUsingTextExtract, }: {
instruction: string;

@@ -18,2 +18,3 @@ previouslyExtractedContent: object;

requestId: string;
isUsingTextExtract?: boolean;
}): Promise<{

@@ -20,0 +21,0 @@ metadata: {

@@ -8,3 +8,3 @@ import OpenAI from "openai";

export declare const actTools: Array<OpenAI.ChatCompletionTool>;
export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean): ChatMessage;
export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean, useTextExtract?: boolean): ChatMessage;
export declare function buildExtractUserPrompt(instruction: string, domElements: string, isUsingPrintExtractedDataTool?: boolean): ChatMessage;

@@ -11,0 +11,0 @@ export declare function buildRefineSystemPrompt(): ChatMessage;

import { LogLine } from "../types/log";
import { TextAnnotation } from "../types/textannotation";
import { z } from "zod";
export declare function generateId(operation: string): string;
/**
* `formatText` converts a list of text annotations into a formatted text representation.
* Each annotation represents a piece of text at a certain position on a webpage.
* The formatting attempts to reconstruct a textual "screenshot" of the page by:
* - Grouping annotations into lines based on their vertical positions.
* - Adjusting spacing to reflect line gaps.
* - Attempting to preserve relative positions and formatting.
*
* The output is a text block, optionally surrounded by lines of dashes, that aims
* to closely mirror the visual layout of the text on the page.
*
* @param textAnnotations - An array of TextAnnotations describing text and their positions.
* @param pageWidth - The width of the page in pixels, used to normalize positions.
* @returns A string representing the text layout of the page.
*/
export declare function formatText(textAnnotations: TextAnnotation[], pageWidth: number): string;
export declare function logLineToString(logLine: LogLine): string;
export declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;

@@ -8,2 +8,3 @@ import { Browser, BrowserContext } from "@playwright/test";

contextPath?: string;
sessionId?: string;
}

@@ -9,2 +9,3 @@ import { EvalLogger } from "../evals/utils";

logger: EvalLogger;
useTextExtract: boolean;
}) => Promise<{

@@ -17,3 +18,3 @@ _success: boolean;

}>;
export declare const EvalCategorySchema: z.ZodEnum<["observe", "act", "combination", "extract", "experimental"]>;
export declare const EvalCategorySchema: z.ZodEnum<["observe", "act", "combination", "extract", "experimental", "text_extract"]>;
export type EvalCategory = z.infer<typeof EvalCategorySchema>;

@@ -20,0 +21,0 @@ export interface EvalInput {

@@ -23,6 +23,2 @@ import Browserbase from "@browserbasehq/sdk";

}
export interface InitResult {
debugUrl: string;
sessionUrl: string;
}
export interface InitOptions {

@@ -39,2 +35,3 @@ /** @deprecated Pass this into the Stagehand constructor instead. This will be removed in the next major version. */

sessionUrl: string;
sessionId: string;
}

@@ -70,2 +67,3 @@ export interface InitFromPageOptions {

domSettleTimeoutMs?: number;
useTextExtract?: boolean;
}

@@ -72,0 +70,0 @@ export type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;

@@ -356,2 +356,152 @@ (() => {

}
function storeDOM() {
const originalDOM = document.body.cloneNode(true);
console.log("DOM state stored.");
return originalDOM.outerHTML;
}
function restoreDOM(storedDOM) {
console.log("Restoring DOM");
if (storedDOM) {
document.body.innerHTML = storedDOM;
} else {
console.error("No DOM state was provided.");
}
}
function createTextBoundingBoxes() {
const style = document.createElement("style");
document.head.appendChild(style);
if (style.sheet) {
style.sheet.insertRule(
`
.stagehand-highlighted-word, .stagehand-space {
border: 0px solid orange;
display: inline-block !important;
visibility: visible;
}
`,
0
);
style.sheet.insertRule(
`
code .stagehand-highlighted-word, code .stagehand-space,
pre .stagehand-highlighted-word, pre .stagehand-space {
white-space: pre-wrap;
display: inline !important;
}
`,
1
);
}
function applyHighlighting(root) {
root.querySelectorAll("body *").forEach((element) => {
if (element.closest(".stagehand-nav, .stagehand-marker")) {
return;
}
if (["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes(
element.tagName
)) {
return;
}
const childNodes = Array.from(element.childNodes);
childNodes.forEach((node) => {
if (node.nodeType === 3 && node.textContent?.trim().length > 0) {
const textContent = node.textContent.replace(/\u00A0/g, " ");
const tokens = textContent.split(/(\s+)/g);
const fragment = document.createDocumentFragment();
const parentIsCode = element.tagName === "CODE";
tokens.forEach((token) => {
const span = document.createElement("span");
span.textContent = token;
if (parentIsCode) {
span.style.whiteSpace = "pre-wrap";
span.style.display = "inline";
}
span.className = token.trim().length === 0 ? "stagehand-space" : "stagehand-highlighted-word";
fragment.appendChild(span);
});
if (fragment.childNodes.length > 0 && node.parentNode) {
element.insertBefore(fragment, node);
node.remove();
}
}
});
});
}
applyHighlighting(document);
document.querySelectorAll("iframe").forEach((iframe) => {
try {
iframe.contentWindow?.postMessage({ action: "highlight" }, "*");
} catch (error) {
console.error("Error accessing iframe content: ", error);
}
});
}
function getElementBoundingBoxes(xpath) {
const element = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
if (!element) return [];
const isValidText = (text) => text && text.trim().length > 0;
let dropDownElem = element.querySelector("option[selected]");
if (!dropDownElem) {
dropDownElem = element.querySelector("option");
}
if (dropDownElem) {
const elemText = dropDownElem.textContent || "";
if (isValidText(elemText)) {
const parentRect = element.getBoundingClientRect();
return [
{
text: elemText.trim(),
top: parentRect.top + window.scrollY,
left: parentRect.left + window.scrollX,
width: parentRect.width,
height: parentRect.height
}
];
} else {
return [];
}
}
let placeholderText = "";
if ((element.tagName.toLowerCase() === "input" || element.tagName.toLowerCase() === "textarea") && element.placeholder) {
placeholderText = element.placeholder;
} else if (element.tagName.toLowerCase() === "a") {
placeholderText = "";
} else if (element.tagName.toLowerCase() === "img") {
placeholderText = element.alt || "";
}
const words = element.querySelectorAll(
".stagehand-highlighted-word"
);
const boundingBoxes = Array.from(words).map((word) => {
const rect = word.getBoundingClientRect();
return {
text: word.innerText || "",
top: rect.top + window.scrollY,
left: rect.left + window.scrollX,
width: rect.width,
height: rect.height * 0.75
};
}).filter(
(box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)
);
if (boundingBoxes.length === 0) {
const elementRect = element.getBoundingClientRect();
return [
{
text: placeholderText,
top: elementRect.top + window.scrollY,
left: elementRect.left + window.scrollX,
width: elementRect.width,
height: elementRect.height * 0.75
}
];
}
return boundingBoxes;
}
window.processDom = processDom;

@@ -361,2 +511,6 @@ window.processAllOfDom = processAllOfDom;

window.scrollToHeight = scrollToHeight;
window.storeDOM = storeDOM;
window.restoreDOM = restoreDOM;
window.createTextBoundingBoxes = createTextBoundingBoxes;
window.getElementBoundingBoxes = getElementBoundingBoxes;
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];

@@ -363,0 +517,0 @@ var interactiveElementTypes = [

@@ -1,1 +0,1 @@

export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";
export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";

@@ -27,3 +27,13 @@ export {};

__PW_inspect?: unknown;
storeDOM: () => string;
restoreDOM: (storedDOM: string) => void;
createTextBoundingBoxes: () => void;
getElementBoundingBoxes: (xpath: string) => Array<{
text: string;
top: number;
left: number;
width: number;
height: number;
}>;
}
}

@@ -243,2 +243,195 @@ import { generateXPathsForElement as generateXPaths } from "./xpathUtils";

export function storeDOM(): string {
const originalDOM = document.body.cloneNode(true) as HTMLElement;
console.log("DOM state stored.");
return originalDOM.outerHTML;
}
export function restoreDOM(storedDOM: string): void {
console.log("Restoring DOM");
if (storedDOM) {
document.body.innerHTML = storedDOM;
} else {
console.error("No DOM state was provided.");
}
}
export function createTextBoundingBoxes(): void {
const style = document.createElement("style");
document.head.appendChild(style);
if (style.sheet) {
style.sheet.insertRule(
`
.stagehand-highlighted-word, .stagehand-space {
border: 0px solid orange;
display: inline-block !important;
visibility: visible;
}
`,
0,
);
style.sheet.insertRule(
`
code .stagehand-highlighted-word, code .stagehand-space,
pre .stagehand-highlighted-word, pre .stagehand-space {
white-space: pre-wrap;
display: inline !important;
}
`,
1,
);
}
function applyHighlighting(root: Document | HTMLElement): void {
root.querySelectorAll("body *").forEach((element) => {
if (element.closest(".stagehand-nav, .stagehand-marker")) {
return;
}
if (
["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes(
element.tagName,
)
) {
return;
}
const childNodes = Array.from(element.childNodes);
childNodes.forEach((node) => {
if (node.nodeType === 3 && node.textContent?.trim().length > 0) {
const textContent = node.textContent.replace(/\u00A0/g, " ");
const tokens = textContent.split(/(\s+)/g); // Split text by spaces
const fragment = document.createDocumentFragment();
const parentIsCode = element.tagName === "CODE";
tokens.forEach((token) => {
const span = document.createElement("span");
span.textContent = token;
if (parentIsCode) {
// Special handling for <code> tags
span.style.whiteSpace = "pre-wrap";
span.style.display = "inline";
}
span.className =
token.trim().length === 0
? "stagehand-space"
: "stagehand-highlighted-word";
fragment.appendChild(span);
});
if (fragment.childNodes.length > 0 && node.parentNode) {
element.insertBefore(fragment, node);
node.remove();
}
}
});
});
}
applyHighlighting(document);
document.querySelectorAll("iframe").forEach((iframe) => {
try {
iframe.contentWindow?.postMessage({ action: "highlight" }, "*");
} catch (error) {
console.error("Error accessing iframe content: ", error);
}
});
}
export function getElementBoundingBoxes(xpath: string): Array<{
text: string;
top: number;
left: number;
width: number;
height: number;
}> {
const element = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null,
).singleNodeValue as HTMLElement;
if (!element) return [];
const isValidText = (text: string) => text && text.trim().length > 0;
let dropDownElem = element.querySelector("option[selected]");
if (!dropDownElem) {
dropDownElem = element.querySelector("option");
}
if (dropDownElem) {
const elemText = dropDownElem.textContent || "";
if (isValidText(elemText)) {
const parentRect = element.getBoundingClientRect();
return [
{
text: elemText.trim(),
top: parentRect.top + window.scrollY,
left: parentRect.left + window.scrollX,
width: parentRect.width,
height: parentRect.height,
},
];
} else {
return [];
}
}
let placeholderText = "";
if (
(element.tagName.toLowerCase() === "input" ||
element.tagName.toLowerCase() === "textarea") &&
(element as HTMLInputElement).placeholder
) {
placeholderText = (element as HTMLInputElement).placeholder;
} else if (element.tagName.toLowerCase() === "a") {
placeholderText = "";
} else if (element.tagName.toLowerCase() === "img") {
placeholderText = (element as HTMLImageElement).alt || "";
}
const words = element.querySelectorAll(
".stagehand-highlighted-word",
) as NodeListOf<HTMLElement>;
const boundingBoxes = Array.from(words)
.map((word) => {
const rect = word.getBoundingClientRect();
return {
text: word.innerText || "",
top: rect.top + window.scrollY,
left: rect.left + window.scrollX,
width: rect.width,
height: rect.height * 0.75,
};
})
.filter(
(box) =>
box.width > 0 &&
box.height > 0 &&
box.top >= 0 &&
box.left >= 0 &&
isValidText(box.text),
);
if (boundingBoxes.length === 0) {
const elementRect = element.getBoundingClientRect();
return [
{
text: placeholderText,
top: elementRect.top + window.scrollY,
left: elementRect.left + window.scrollX,
width: elementRect.width,
height: elementRect.height * 0.75,
},
];
}
return boundingBoxes;
}
window.processDom = processDom;

@@ -248,2 +441,6 @@ window.processAllOfDom = processAllOfDom;

window.scrollToHeight = scrollToHeight;
window.storeDOM = storeDOM;
window.restoreDOM = restoreDOM;
window.createTextBoundingBoxes = createTextBoundingBoxes;
window.getElementBoundingBoxes = getElementBoundingBoxes;

@@ -250,0 +447,0 @@ const leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];

@@ -5,5 +5,79 @@ import { LLMProvider } from "../llm/LLMProvider";

import { LogLine } from "../../types/log";
import { TextAnnotation } from "../../types/textannotation";
import { extract } from "../inference";
import { LLMClient } from "../llm/LLMClient";
import { formatText } from "../utils";
const PROXIMITY_THRESHOLD = 15;
/**
* The `StagehandExtractHandler` class is responsible for extracting structured data from a webpage.
* It provides two approaches: `textExtract` and `domExtract`. `textExtract` is used by default.
*
* Here is what `textExtract` does at a high level:
*
* **1. Wait for the DOM to settle and start DOM debugging.**
* - Ensures the page is fully loaded and stable before extraction.
*
* **2. Store the original DOM before any mutations.**
* - Preserves the initial state of the DOM to restore later.
* - We do this because creating spans around every word in the DOM (see step 4)
* becomes very difficult to revert. Text nodes can be finicky, and directly
* removing the added spans often corrupts the structure of the DOM.
*
* **3. Process the DOM to generate a selector map of candidate elements.**
* - Identifies potential elements that contain the data to extract.
*
* **4. Create text bounding boxes around every word in the webpage.**
* - Wraps words in spans so that their bounding boxes can be used to
* determine their positions on the text-rendered-webpage.
*
* **5. Collect all text annotations (with positions and dimensions) from each of the candidate elements.**
* - Gathers text and positional data for each word.
*
* **6. Group annotations by text and deduplicate them based on proximity.**
* - There is no guarantee that the text annotations are unique (candidate elements can be nested).
* - Thus, we must remove duplicate words that are close to each other on the page.
*
* **7. Restore the original DOM after mutations.**
* - Returns the DOM to its original state after processing.
*
* **8. Format the deduplicated annotations into a text representation.**
* - Prepares the text data for the extraction process.
*
* **9. Pass the formatted text to an LLM for extraction according to the given instruction and schema.**
* - Uses a language model to extract structured data based on instructions.
*
* **10. Handle the extraction response and logging the results.**
* - Processes the output from the LLM and logs relevant information.
*
*
* Here is what `domExtract` does at a high level:
*
* **1. Wait for the DOM to settle and start DOM debugging.**
* - Ensures the page is fully loaded and stable before extraction.
*
* **2. Process the DOM in chunks.**
* - The `processDom` function:
* - Divides the page into vertical "chunks" based on viewport height.
* - Picks the next chunk that hasn't been processed yet.
* - Scrolls to that chunk and extracts candidate elements.
* - Returns `outputString` (HTML snippets of candidate elements),
* `selectorMap` (the XPaths of the candidate elements),
* `chunk` (the current chunk index), and `chunks` (the array of all chunk indices).
* - This chunk-based approach ensures that large or lengthy pages can be processed in smaller, manageable sections.
*
* **3. Pass the extracted DOM elements (in `outputString`) to the LLM for structured data extraction.**
* - Uses the instructions, schema, and previously extracted content as context to
* guide the LLM in extracting the structured data.
*
* **4. Check if extraction is complete.**
* - If the extraction is complete (all chunks have been processed or the LLM determines
* that we do not need to continue), return the final result.
* - If not, repeat steps 1-4 with the next chunk until extraction is complete or no more chunks remain.
*
* @remarks
* Each step corresponds to specific code segments, as noted in the comments throughout the code.
*/
export class StagehandExtractHandler {

@@ -64,2 +138,3 @@ private readonly stagehand: Stagehand;

domSettleTimeoutMs,
useTextExtract = false,
}: {

@@ -73,3 +148,41 @@ instruction: string;

domSettleTimeoutMs?: number;
useTextExtract?: boolean;
}): Promise<z.infer<T>> {
if (useTextExtract) {
return this.textExtract({
instruction,
schema,
content,
llmClient,
requestId,
domSettleTimeoutMs,
});
} else {
return this.domExtract({
instruction,
schema,
content,
chunksSeen,
llmClient,
requestId,
domSettleTimeoutMs,
});
}
}
private async textExtract<T extends z.AnyZodObject>({
instruction,
schema,
content = {},
llmClient,
requestId,
domSettleTimeoutMs,
}: {
instruction: string;
schema: T;
content?: z.infer<T>;
llmClient: LLMClient;
requestId?: string;
domSettleTimeoutMs?: number;
}): Promise<z.infer<T>> {
this.logger({

@@ -87,4 +200,228 @@ category: "extraction",

// **1:** Wait for the DOM to settle and start DOM debugging
await this.waitForSettledDom(domSettleTimeoutMs);
await this.startDomDebug();
// **2:** Store the original DOM before any mutations
// we need to store the original DOM here because calling createTextBoundingBoxes()
// will mutate the DOM by adding spans around every word
const originalDOM = await this.stagehand.page.evaluate(() =>
window.storeDOM(),
);
// **3:** Process the DOM to generate a selector map of candidate elements
const { selectorMap }: { selectorMap: Record<number, string[]> } =
await this.stagehand.page.evaluate(() => window.processAllOfDom());
this.logger({
category: "extraction",
message: `received output from processAllOfDom. selectorMap has ${Object.keys(selectorMap).length} entries`,
level: 1,
});
// **4:** Create text bounding boxes around every word in the webpage
// calling createTextBoundingBoxes() will create a span around every word on the
// webpage. The bounding boxes of these spans will be used to determine their
// positions in the text rendered webpage
await this.stagehand.page.evaluate(() => window.createTextBoundingBoxes());
const pageWidth = await this.stagehand.page.evaluate(
() => window.innerWidth,
);
const pageHeight = await this.stagehand.page.evaluate(
() => window.innerHeight,
);
// **5:** Collect all text annotations (with positions and dimensions) from the candidate elements
// allAnnotations will store all the TextAnnotations BEFORE deduplication
const allAnnotations: TextAnnotation[] = [];
// here we will loop through all the xpaths in the selectorMap,
// and get the bounding boxes for each one. These are xpaths to "candidate elements"
for (const xpaths of Object.values(selectorMap)) {
const xpath = xpaths[0];
// boundingBoxes is an array because there may be multiple bounding boxes within a single element
// (since each bounding box is around a single word)
const boundingBoxes: Array<{
text: string;
left: number;
top: number;
width: number;
height: number;
}> = await this.stagehand.page.evaluate(
(xpath) => window.getElementBoundingBoxes(xpath),
xpath,
);
for (const box of boundingBoxes) {
const bottom_left = {
x: box.left,
y: box.top + box.height,
};
const bottom_left_normalized = {
x: box.left / pageWidth,
y: (box.top + box.height) / pageHeight,
};
const annotation: TextAnnotation = {
text: box.text,
bottom_left,
bottom_left_normalized,
width: box.width,
height: box.height,
};
allAnnotations.push(annotation);
}
}
// **6:** Group annotations by text and deduplicate them based on proximity
const annotationsGroupedByText = new Map<string, TextAnnotation[]>();
for (const annotation of allAnnotations) {
if (!annotationsGroupedByText.has(annotation.text)) {
annotationsGroupedByText.set(annotation.text, []);
}
annotationsGroupedByText.get(annotation.text)!.push(annotation);
}
const deduplicatedTextAnnotations: TextAnnotation[] = [];
// here, we deduplicate annotations per text group
for (const [text, annotations] of annotationsGroupedByText.entries()) {
for (const annotation of annotations) {
// check if this annotation is close to any existing deduplicated annotation
const isDuplicate = deduplicatedTextAnnotations.some(
(existingAnnotation) => {
if (existingAnnotation.text !== text) return false;
const dx =
existingAnnotation.bottom_left.x - annotation.bottom_left.x;
const dy =
existingAnnotation.bottom_left.y - annotation.bottom_left.y;
const distance = Math.hypot(dx, dy);
// the annotation is a duplicate if it has the same text and its bottom_left
// position is within the PROXIMITY_THRESHOLD of an existing annotation.
// we calculate the Euclidean distance between the two bottom_left points,
// and if the distance is less than PROXIMITY_THRESHOLD,
// the annotation is considered a duplicate.
return distance < PROXIMITY_THRESHOLD;
},
);
if (!isDuplicate) {
deduplicatedTextAnnotations.push(annotation);
}
}
}
// **7:** Restore the original DOM after mutations
await this.stagehand.page.evaluate(
(dom) => window.restoreDOM(dom),
originalDOM,
);
// **8:** Format the deduplicated annotations into a text representation
const formattedText = formatText(deduplicatedTextAnnotations, pageWidth);
// **9:** Pass the formatted text to an LLM for extraction according to the given instruction and schema
const extractionResponse = await extract({
instruction,
previouslyExtractedContent: content,
domElements: formattedText,
schema,
chunksSeen: 1,
chunksTotal: 1,
llmClient,
requestId,
});
const {
metadata: { completed },
...output
} = extractionResponse;
await this.cleanupDomDebug();
// **10:** Handle the extraction response and log the results
this.logger({
category: "extraction",
message: "received extraction response",
auxiliary: {
extraction_response: {
value: JSON.stringify(extractionResponse),
type: "object",
},
},
});
if (completed) {
this.logger({
category: "extraction",
message: "extraction completed successfully",
level: 1,
auxiliary: {
extraction_response: {
value: JSON.stringify(extractionResponse),
type: "object",
},
},
});
} else {
this.logger({
category: "extraction",
message: "extraction incomplete after processing all data",
level: 1,
auxiliary: {
extraction_response: {
value: JSON.stringify(extractionResponse),
type: "object",
},
},
});
}
return output;
}
private async domExtract<T extends z.AnyZodObject>({
instruction,
schema,
content = {},
chunksSeen = [],
llmClient,
requestId,
domSettleTimeoutMs,
}: {
instruction: string;
schema: T;
content?: z.infer<T>;
chunksSeen?: Array<number>;
llmClient: LLMClient;
requestId?: string;
domSettleTimeoutMs?: number;
}): Promise<z.infer<T>> {
this.logger({
category: "extraction",
message: "starting extraction using old approach",
level: 1,
auxiliary: {
instruction: {
value: instruction,
type: "string",
},
},
});
// **1:** Wait for the DOM to settle and start DOM debugging
// This ensures the page is stable before extracting any data.
await this.waitForSettledDom(domSettleTimeoutMs);
await this.startDomDebug();
// **2:** Call processDom() to handle chunk-based extraction
// processDom determines which chunk of the page to process next.
// It will:
// - Identify all chunks (vertical segments of the page),
// - Pick the next unprocessed chunk,
// - Scroll to that chunk's region,
// - Extract candidate elements and their text,
// - Return the extracted text (outputString), a selectorMap (for referencing elements),
// the current chunk index, and the full list of chunks.
const { outputString, chunk, chunks } = await this.stagehand.page.evaluate(

@@ -114,2 +451,5 @@ (chunksSeen?: number[]) => window.processDom(chunksSeen ?? []),

// **3:** Pass the list of candidate HTML snippets to the LLM
// The LLM uses the provided instruction and schema to parse and extract
// structured data.
const extractionResponse = await extract({

@@ -124,2 +464,3 @@ instruction,

requestId,
isUsingTextExtract: false,
});

@@ -131,2 +472,3 @@

} = extractionResponse;
await this.cleanupDomDebug();

@@ -145,4 +487,8 @@

// Mark the current chunk as processed by adding it to chunksSeen
chunksSeen.push(chunk);
// **4:** Check if extraction is complete
// If the LLM deems the extraction complete or we've processed all chunks, return the final result.
// Otherwise, call domExtract again for the next chunk.
if (completed || chunksSeen.length === chunks.length) {

@@ -173,3 +519,5 @@ this.logger({

await this.waitForSettledDom(domSettleTimeoutMs);
return this.extract({
// Recursively continue with the next chunk
return this.domExtract({
instruction,

@@ -176,0 +524,0 @@ schema,

@@ -198,3 +198,3 @@ import { Browserbase } from "@browserbasehq/sdk";

return { browser, context, debugUrl, sessionUrl };
return { browser, context, debugUrl, sessionUrl, sessionId };
} else {

@@ -311,2 +311,4 @@ logger({

public context: BrowserContext;
public browserbaseSessionID?: string;
private env: "LOCAL" | "BROWSERBASE";

@@ -382,19 +384,21 @@ private apiKey: string | undefined;

}
const { context, debugUrl, sessionUrl, contextPath } = await getBrowser(
this.apiKey,
this.projectId,
this.env,
this.headless,
this.logger,
this.browserbaseSessionCreateParams,
this.browserbaseResumeSessionID,
).catch((e) => {
console.error("Error in init:", e);
const br: BrowserResult = {
context: undefined,
debugUrl: undefined,
sessionUrl: undefined,
};
return br;
});
const { context, debugUrl, sessionUrl, contextPath, sessionId } =
await getBrowser(
this.apiKey,
this.projectId,
this.env,
this.headless,
this.logger,
this.browserbaseSessionCreateParams,
this.browserbaseResumeSessionID,
).catch((e) => {
console.error("Error in init:", e);
const br: BrowserResult = {
context: undefined,
debugUrl: undefined,
sessionUrl: undefined,
sessionId: undefined,
};
return br;
});
this.contextPath = contextPath;

@@ -461,4 +465,5 @@ this.context = context;

});
this.browserbaseSessionID = sessionId;
return { debugUrl, sessionUrl };
return { debugUrl, sessionUrl, sessionId };
}

@@ -772,2 +777,3 @@

domSettleTimeoutMs,
useTextExtract,
}: ExtractOptions<T>): Promise<ExtractResult<T>> {

@@ -810,2 +816,3 @@ if (!this.extractHandler) {

domSettleTimeoutMs,
useTextExtract,
})

@@ -812,0 +819,0 @@ .catch((e) => {

@@ -163,2 +163,3 @@ import {

requestId,
isUsingTextExtract,
}: {

@@ -173,2 +174,3 @@ instruction: string;

requestId: string;
isUsingTextExtract?: boolean;
}) {

@@ -181,3 +183,3 @@ type ExtractionResponse = z.infer<typeof schema>;

messages: [
buildExtractSystemPrompt(isUsingAnthropic),
buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract),
buildExtractUserPrompt(instruction, domElements, isUsingAnthropic),

@@ -184,0 +186,0 @@ ],

@@ -225,3 +225,3 @@ import Anthropic, { ClientOptions } from "@anthropic-ai/sdk";

model: this.modelName,
max_tokens: options.maxTokens || 1500,
max_tokens: options.maxTokens || 8192,
messages: formattedMessages,

@@ -228,0 +228,0 @@ tools: anthropicTools,

@@ -204,19 +204,43 @@ import OpenAI from "openai";

// extract
const extractSystemPrompt = `You are extracting content on behalf of a user. You will be given:
export function buildExtractSystemPrompt(
isUsingPrintExtractedDataTool: boolean = false,
useTextExtract: boolean = true,
): ChatMessage {
const baseContent = `You are extracting content on behalf of a user.
If a user asks you to extract a 'list' of information, or 'all' information,
YOU MUST EXTRACT ALL OF THE INFORMATION THAT THE USER REQUESTS.
You will be given:
1. An instruction
2. A list of DOM elements to extract from
2. `;
Print the exact text from the DOM elements with all symbols, characters, and endlines as is.
const contentDetail = useTextExtract
? `A text representation of a webpage to extract information from.`
: `A list of DOM elements to extract from.`;
const instructions = `
Print the exact text from the ${
useTextExtract ? "text-rendered webpage" : "DOM elements"
} with all symbols, characters, and endlines as is.
Print null or an empty string if no new information is found.
`;
`.trim();
export function buildExtractSystemPrompt(
isUsingPrintExtractedDataTool: boolean = false,
): ChatMessage {
let content = extractSystemPrompt.replace(/\s+/g, " ");
if (isUsingPrintExtractedDataTool) {
content += `
const toolInstructions = isUsingPrintExtractedDataTool
? `
ONLY print the content using the print_extracted_data tool provided.
ONLY print the content using the print_extracted_data tool provided.`;
}
ONLY print the content using the print_extracted_data tool provided.
`.trim()
: "";
const additionalInstructions = useTextExtract
? `Once you are given the text-rendered webpage,
you must thoroughly and meticulously analyze it. Be very careful to ensure that you
do not miss any important information.`
: "";
const content =
`${baseContent}${contentDetail}\n\n${instructions}\n${toolInstructions}${
additionalInstructions ? `\n\n${additionalInstructions}` : ""
}`.replace(/\s+/g, " ");
return {

@@ -223,0 +247,0 @@ role: "system",

import crypto from "crypto";
import { LogLine } from "../types/log";
import { TextAnnotation } from "../types/textannotation";
import { z } from "zod";

@@ -9,2 +10,353 @@

/**
* `formatText` converts a list of text annotations into a formatted text representation.
* Each annotation represents a piece of text at a certain position on a webpage.
* The formatting attempts to reconstruct a textual "screenshot" of the page by:
* - Grouping annotations into lines based on their vertical positions.
* - Adjusting spacing to reflect line gaps.
* - Attempting to preserve relative positions and formatting.
*
* The output is a text block, optionally surrounded by lines of dashes, that aims
* to closely mirror the visual layout of the text on the page.
*
* @param textAnnotations - An array of TextAnnotations describing text and their positions.
* @param pageWidth - The width of the page in pixels, used to normalize positions.
* @returns A string representing the text layout of the page.
*/
export function formatText(
textAnnotations: TextAnnotation[],
pageWidth: number,
): string {
// **1:** Estimate the average character width in pixels by examining the text annotations.
// If no reliable measurement is found, default to 10 pixels per character.
const charWidth = estimateCharacterWidth(textAnnotations) || 10;
// **2:** Create a copy of textAnnotations and sort them by their vertical position (y-coordinate),
// ensuring that topmost annotations appear first and bottommost appear last.
const sortedAnnotations = [...textAnnotations].sort(
(a, b) => a.bottom_left.y - b.bottom_left.y,
);
// **3:** Group annotations by their line position. We use a small epsilon to handle
// floating-point differences. Two annotations are considered on the same line if their
// y-coordinates differ by less than epsilon.
const epsilon = 0.0001;
const lineMap: Map<number, TextAnnotation[]> = new Map();
for (const annotation of sortedAnnotations) {
let foundLineY: number | undefined;
// **4:** Check if the annotation belongs to an existing line group.
// If so, add it to that line. Otherwise, start a new line group.
for (const key of lineMap.keys()) {
if (Math.abs(key - annotation.bottom_left.y) < epsilon) {
foundLineY = key;
break;
}
}
if (foundLineY !== undefined) {
lineMap.get(foundLineY)!.push(annotation);
} else {
lineMap.set(annotation.bottom_left.y, [annotation]);
}
}
// **5:** Extract all line keys (y-coordinates) and sort them to process lines top-to-bottom.
const lineYs = Array.from(lineMap.keys()).sort((a, b) => a - b);
// **6:** For each line, group words together and calculate the maximum normalized end position (maxNormalizedEndX).
// This will help determine the necessary canvas width to accommodate all text.
let maxNormalizedEndX = 0;
const finalLines: TextAnnotation[][] = [];
for (const lineY of lineYs) {
const lineAnnotations = lineMap.get(lineY)!;
// **7:** Sort annotations in the current line by their horizontal position (x-coordinate),
// ensuring left-to-right ordering.
lineAnnotations.sort((a, b) => a.bottom_left.x - b.bottom_left.x);
// **8:** Group nearby annotations into word clusters, forming logical sentences or phrases.
const groupedLineAnnotations = groupWordsInSentence(lineAnnotations);
// **9:** Determine how far to the right the text in this line extends, normalized by page width.
// Update maxNormalizedEndX to track the widest line encountered.
for (const ann of groupedLineAnnotations) {
const textLengthInPx = ann.text.length * charWidth;
const normalizedTextLength = textLengthInPx / pageWidth;
const endX = ann.bottom_left_normalized.x + normalizedTextLength;
if (endX > maxNormalizedEndX) {
maxNormalizedEndX = endX;
}
}
// **10:** Save the processed line to finalLines for later rendering.
finalLines.push(groupedLineAnnotations);
}
// **11:** Determine the canvas width in characters. We scale according to maxNormalizedEndX and page width.
// Add a small buffer (20 chars) to ensure no text overflows the canvas.
let canvasWidth = Math.ceil(maxNormalizedEndX * (pageWidth / charWidth)) + 20;
canvasWidth = Math.max(canvasWidth, 1);
// **12:** Compute the baseline (lowest point) of each line. This helps us understand vertical spacing.
const lineBaselines = finalLines.map((line) =>
Math.min(...line.map((a) => a.bottom_left.y)),
);
// **13:** Compute vertical gaps between consecutive lines to determine line spacing.
const verticalGaps: number[] = [];
for (let i = 1; i < lineBaselines.length; i++) {
verticalGaps.push(lineBaselines[i] - lineBaselines[i - 1]);
}
// **14:** Estimate what a "normal" line spacing is by taking the median of all vertical gaps.
const normalLineSpacing = verticalGaps.length > 0 ? median(verticalGaps) : 0;
// **15:** Create a 2D character canvas initialized with spaces, onto which we'll "print" text lines.
let canvas: string[][] = [];
// **16:** lineIndex represents the current line of the canvas. Initialize with -1 so the first line starts at 0.
let lineIndex = -1;
// **17:** Iterate over each line of processed text.
for (let i = 0; i < finalLines.length; i++) {
if (i === 0) {
// **18:** For the first line, just increment lineIndex to start at 0 with no extra spacing.
lineIndex++;
ensureLineExists(canvas, lineIndex, canvasWidth);
} else {
// **19:** For subsequent lines, calculate how many extra blank lines to insert based on spacing.
const gap = lineBaselines[i] - lineBaselines[i - 1];
let extraLines = 0;
// **20:** If we have a known normal line spacing, and the gap is larger than expected,
// insert extra blank lines proportional to the ratio of gap to normal spacing.
if (normalLineSpacing > 0) {
if (gap > 1.2 * normalLineSpacing) {
extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0);
}
}
// **21:** Insert the calculated extra blank lines to maintain approximate vertical spacing.
for (let e = 0; e < extraLines; e++) {
lineIndex++;
ensureLineExists(canvas, lineIndex, canvasWidth);
}
// **22:** After adjusting for spacing, increment lineIndex for the current line of text.
lineIndex++;
ensureLineExists(canvas, lineIndex, canvasWidth);
}
// **23:** Now place the annotations for the current line onto the canvas at the appropriate horizontal positions.
const lineAnnotations = finalLines[i];
for (const annotation of lineAnnotations) {
const text = annotation.text;
// **24:** Calculate the starting x-position in the canvas based on normalized coordinates.
const startXInChars = Math.round(
annotation.bottom_left_normalized.x * canvasWidth,
);
// **25:** Place each character of the annotation text into the canvas.
for (let j = 0; j < text.length; j++) {
const xPos = startXInChars + j;
// **26:** Ensure we don't exceed the canvas width.
if (xPos < canvasWidth) {
canvas[lineIndex][xPos] = text[j];
}
}
}
}
// **27:** Trim trailing whitespace from each line to create a cleaner output.
canvas = canvas.map((row) => {
const lineStr = row.join("");
return Array.from(lineStr.trimEnd());
});
// **29:** Join all lines to form the final page text. Trim any trailing whitespace from the entire text.
let pageText = canvas.map((line) => line.join("")).join("\n");
pageText = pageText.trimEnd();
// **30:** Surround the page text with lines of dashes to clearly delineate the text block.
pageText =
"-".repeat(canvasWidth) + "\n" + pageText + "\n" + "-".repeat(canvasWidth);
// **31:** Return the fully formatted text.
return pageText;
}
/**
* `ensureLineExists` ensures that a specified line index exists in the canvas.
* If the canvas is not long enough, it extends it by adding new empty lines (filled with spaces).
* This function is used to dynamically grow the canvas as we progress through the lines.
*
* @param canvas - The 2D character canvas array.
* @param lineIndex - The desired line index that must exist.
* @param width - The width of each line in characters.
*/
function ensureLineExists(
canvas: string[][],
lineIndex: number,
width: number,
) {
// loop until the canvas has at least lineIndex+1 lines.
// each new line is filled with spaces to match the required width.
while (lineIndex >= canvas.length) {
canvas.push(new Array(width).fill(" "));
}
}
/**
* `estimateCharacterWidth` estimates the average character width (in pixels) from a collection of text annotations.
* It calculates the width per character for each annotation and uses their median as the result.
* If no annotations are available or they have zero-length text, returns 0.
*
* @param textAnnotations - An array of text annotations with text and width fields.
* @returns The median character width in pixels, or 0 if none can be calculated.
*/
function estimateCharacterWidth(textAnnotations: TextAnnotation[]): number {
// collect width-per-character measurements from each annotation
const charWidths: number[] = [];
for (const annotation of textAnnotations) {
const length = annotation.text.length;
if (length > 0) {
charWidths.push(annotation.width / length);
}
}
// return the median of all collected measurements
return median(charWidths);
}
/**
* `groupWordsInSentence` groups annotations within a single line into logical "words" or "sentences".
* It uses a set of heuristics involving horizontal proximity and similar height
* to decide when to join multiple annotations into a single grouped annotation.
*
* @param lineAnnotations - An array of annotations from a single line of text.
* @returns An array of grouped annotations, where each represents one concatenated piece of text.
*/
function groupWordsInSentence(
lineAnnotations: TextAnnotation[],
): TextAnnotation[] {
const groupedAnnotations: TextAnnotation[] = [];
let currentGroup: TextAnnotation[] = [];
for (const annotation of lineAnnotations) {
// if the current group is empty, start a new group with this annotation
if (currentGroup.length === 0) {
currentGroup.push(annotation);
continue;
}
// determine horizontal grouping criteria
// use a padding factor to allow slight spaces between words
const padding = 2;
const lastAnn = currentGroup[currentGroup.length - 1];
const characterWidth = (lastAnn.width / lastAnn.text.length) * padding;
const isWithinHorizontalRange =
annotation.bottom_left.x <=
lastAnn.bottom_left.x + lastAnn.width + characterWidth;
// check if the annotation can be grouped with the current group.
// conditions:
// 1. the height difference from the group's first annotation is ≤ 4 units
// 2. the annotation is horizontally close to the last annotation in the group
if (
Math.abs(annotation.height - currentGroup[0].height) <= 4 &&
isWithinHorizontalRange
) {
// if it meets the criteria, add to the current group
currentGroup.push(annotation);
} else {
// if it doesn't meet criteria:
// 1. finalize the current group into a single grouped annotation,
// 2. add it to groupedAnnotations,
// 3. start a new group with the current annotation
if (currentGroup.length > 0) {
const groupedAnnotation = createGroupedAnnotation(currentGroup);
groupedAnnotations.push(groupedAnnotation);
currentGroup = [annotation];
}
}
}
// after processing all annotations, if there's a remaining group, finalize it too
if (currentGroup.length > 0) {
const groupedAnnotation = createGroupedAnnotation(currentGroup);
groupedAnnotations.push(groupedAnnotation);
}
// return the final array of grouped annotations representing words or phrases
return groupedAnnotations;
}
/**
* `createGroupedAnnotation` combines a group of annotations into a single annotation by concatenating their text.
* It also attempts to preserve formatting, such as marking bold text if the median height suggests emphasis.
*
* @param group - An array of annotations that should be merged into a single text element.
* @returns A new TextAnnotation representing the combined text and averaged metrics from the group.
*/
function createGroupedAnnotation(group: TextAnnotation[]): TextAnnotation {
// initialize an empty string to build the combined text.
let text = "";
// concatenate the text from each annotation in the group.
// insert a space between words, except when punctuation directly follows a word
for (const word of group) {
if (
[".", ",", '"', "'", ":", ";", "!", "?", "{", "}", "’", "”"].includes(
word.text,
)
) {
text += word.text;
} else {
text += text !== "" ? " " + word.text : word.text;
}
}
// determine if the combined text qualifies as a "word" (contains alphanumeric chars)
// and whether its median height suggests emphasizing it (e.g., bold text).
const isWord = /[a-zA-Z0-9]/.test(text);
const medianHeight = median(group.map((word) => word.height));
// if it's considered a word and tall enough, surround it with `**` for bold formatting.
if (isWord && medianHeight > 25) {
text = "**" + text + "**";
}
// return a new annotation that represents the merged group.
// use the first annotation's coordinates and normalized positions as references,
// and sum the widths of all annotations to get the total width.
return {
text: text,
bottom_left: {
x: group[0].bottom_left.x,
y: group[0].bottom_left.y,
},
bottom_left_normalized: {
x: group[0].bottom_left_normalized.x,
y: group[0].bottom_left_normalized.y,
},
width: group.reduce((sum, a) => sum + a.width, 0),
height: group[0].height,
};
}
function median(values: number[]): number {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const middle = Math.floor(sorted.length / 2);
if (sorted.length % 2 === 0) {
return (sorted[middle - 1] + sorted[middle]) / 2;
} else {
return sorted[middle];
}
}
export function logLineToString(logLine: LogLine): string {

@@ -11,0 +363,0 @@ try {

{
"name": "@browserbasehq/stagehand",
"version": "1.7.0-alpha-b902192bc7ff8eb02c85150c1fe6f89c2a95b211",
"version": "1.7.0-alpha-ba4ec335a5323648c6016cc480300fd58868311a",
"description": "An AI web browsing framework focused on simplicity and extensibility.",

@@ -5,0 +5,0 @@ "main": "./dist/index.js",

@@ -201,2 +201,3 @@ <div id="toc" align="center">

- `sessionUrl`: a `string` representing the session URL. This is only available when using a Browserbase browser.
- `sessionId`: a `string` representing the session ID. This is only available when using a Browserbase browser.

@@ -263,2 +264,3 @@ - **Example:**

- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle
- `useTextExtract`: (optional) a `boolean` to determine if text-based extraction should be used. Defaults to `false`

@@ -265,0 +267,0 @@ - **Returns:**

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc