@browserbasehq/stagehand - npm Package Compare versions

dist/evals/tasks/allrecipes.d.ts

dist/evals/tasks/amazon_add_to_cart.d.ts

dist/evals/tasks/apple.d.ts

dist/evals/tasks/arxiv.d.ts

dist/evals/tasks/bidnet.d.ts

dist/evals/tasks/combination_sauce.d.ts

dist/evals/tasks/costar.d.ts

dist/evals/tasks/expedia_search.d.ts

dist/evals/tasks/expedia.d.ts

dist/evals/tasks/extract_aigrant_companies.d.ts

dist/evals/tasks/extract_area_codes.d.ts

dist/evals/tasks/extract_baptist_health.d.ts

dist/evals/tasks/extract_capacitor_info.d.ts

dist/evals/tasks/extract_collaborators.d.ts

dist/evals/tasks/extract_github_commits.d.ts

dist/evals/tasks/extract_github_stars.d.ts

dist/evals/tasks/extract_memorial_healthcare.d.ts

dist/evals/tasks/extract_nhl_stats.d.ts

dist/evals/tasks/extract_partners.d.ts

dist/evals/tasks/extract_press_releases.d.ts

dist/evals/tasks/extract_professional_info.d.ts

dist/evals/tasks/extract_public_notices.d.ts

dist/evals/tasks/extract_regulations.d.ts

dist/evals/tasks/extract_research_reports.d.ts

dist/evals/tasks/extract_resistor_info.d.ts

dist/evals/tasks/extract_rockauto.d.ts

dist/evals/tasks/extract_snowshoeing_destinations.d.ts

dist/evals/tasks/extract_staff_members.d.ts

dist/evals/tasks/google_jobs.d.ts

dist/evals/tasks/homedepot.d.ts

dist/evals/tasks/ibm.d.ts

dist/evals/tasks/imdb_movie_details.d.ts

dist/evals/tasks/ionwave_observe.d.ts

dist/evals/tasks/ionwave.d.ts

dist/evals/tasks/laroche_form.d.ts

dist/evals/tasks/nonsense_action.d.ts

dist/evals/tasks/panamcs.d.ts

dist/evals/tasks/peeler_complex.d.ts

dist/evals/tasks/peeler_simple.d.ts

dist/evals/tasks/rakuten_jp.d.ts

dist/evals/tasks/sciquest.d.ts

dist/evals/tasks/shopify_homepage.d.ts

dist/evals/tasks/simple_google_search.d.ts

dist/evals/tasks/stock_x.d.ts

dist/evals/tasks/ted_talk.d.ts

dist/evals/tasks/vanta_h.d.ts

dist/evals/tasks/vanta.d.ts

dist/evals/tasks/vantechjournal.d.ts

dist/evals/tasks/wichita.d.ts

dist/evals/tasks/wikipedia.d.ts

dist/examples/stagehand.config.d.ts

dist/types/textannotation.d.ts

10

dist/index.d.ts

		@@ -151,7 +151,4 @@ import { Page, BrowserContext, Browser } from '@playwright/test';
		sessionUrl: string;
		sessionId: string;
		}
		interface InitResult {
		debugUrl: string;
		sessionUrl: string;
		}
		interface InitFromPageOptions {
		@@ -186,2 +183,3 @@ page: Page;
		domSettleTimeoutMs?: number;
		useTextExtract?: boolean;
		}
		@@ -207,2 +205,3 @@ type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
		contextPath?: string;
		sessionId?: string;
		}
		@@ -227,2 +226,3 @@
		context: BrowserContext;
		browserbaseSessionID?: string;
		private env;
		@@ -260,3 +260,3 @@ private apiKey;
		act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, useTextExtract, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
		observe(options?: ObserveOptions): Promise<ObserveResult[]>;
		@@ -263,0 +263,0 @@ close(): Promise<void>;

2

dist/lib/dom/build/scriptContent.d.ts

		@@ -1,1 +0,1 @@
		export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) \|\| isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) \|\| isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ / new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() \|\| \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") \|\| element.hasAttribute(\"hidden\") \|\| element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) \|\| elementRole && interactiveRoles.includes(elementRole) \|\| elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";
		export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) \|\| isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) \|\| isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ / new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() \|\| \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body \").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent \|\| \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" \|\| element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt \|\| \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText \|\| \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") \|\| element.hasAttribute(\"hidden\") \|\| element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) \|\| elementRole && interactiveRoles.includes(elementRole) \|\| elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";

10

dist/lib/dom/process.d.ts

		@@ -18,1 +18,11 @@ export declare function isElementNode(node: Node): node is Element;
		}>;
		export declare function storeDOM(): string;
		export declare function restoreDOM(storedDOM: string): void;
		export declare function createTextBoundingBoxes(): void;
		export declare function getElementBoundingBoxes(xpath: string): Array<{
		text: string;
		top: number;
		left: number;
		width: number;
		height: number;
		}>;

74

dist/lib/handlers/extractHandler.d.ts

		@@ -5,2 +5,71 @@ import { LLMProvider } from "../llm/LLMProvider";
		import { LLMClient } from "../llm/LLMClient";
		/**
		* The `StagehandExtractHandler` class is responsible for extracting structured data from a webpage.
		* It provides two approaches: `textExtract` and `domExtract`. `textExtract` is used by default.
		*
		* Here is what `textExtract` does at a high level:
		*
		* 1. Wait for the DOM to settle and start DOM debugging.
		* - Ensures the page is fully loaded and stable before extraction.
		*
		* 2. Store the original DOM before any mutations.
		* - Preserves the initial state of the DOM to restore later.
		* - We do this because creating spans around every word in the DOM (see step 4)
		* becomes very difficult to revert. Text nodes can be finicky, and directly
		* removing the added spans often corrupts the structure of the DOM.
		*
		* 3. Process the DOM to generate a selector map of candidate elements.
		* - Identifies potential elements that contain the data to extract.
		*
		* 4. Create text bounding boxes around every word in the webpage.
		* - Wraps words in spans so that their bounding boxes can be used to
		* determine their positions on the text-rendered-webpage.
		*
		* 5. Collect all text annotations (with positions and dimensions) from each of the candidate elements.
		* - Gathers text and positional data for each word.
		*
		* 6. Group annotations by text and deduplicate them based on proximity.
		* - There is no guarantee that the text annotations are unique (candidate elements can be nested).
		* - Thus, we must remove duplicate words that are close to each other on the page.
		*
		* 7. Restore the original DOM after mutations.
		* - Returns the DOM to its original state after processing.
		*
		* 8. Format the deduplicated annotations into a text representation.
		* - Prepares the text data for the extraction process.
		*
		* 9. Pass the formatted text to an LLM for extraction according to the given instruction and schema.
		* - Uses a language model to extract structured data based on instructions.
		*
		* 10. Handle the extraction response and logging the results.
		* - Processes the output from the LLM and logs relevant information.
		*
		*
		* Here is what `domExtract` does at a high level:
		*
		* 1. Wait for the DOM to settle and start DOM debugging.
		* - Ensures the page is fully loaded and stable before extraction.
		*
		* 2. Process the DOM in chunks.
		* - The `processDom` function:
		* - Divides the page into vertical "chunks" based on viewport height.
		* - Picks the next chunk that hasn't been processed yet.
		* - Scrolls to that chunk and extracts candidate elements.
		* - Returns `outputString` (HTML snippets of candidate elements),
		* `selectorMap` (the XPaths of the candidate elements),
		* `chunk` (the current chunk index), and `chunks` (the array of all chunk indices).
		* - This chunk-based approach ensures that large or lengthy pages can be processed in smaller, manageable sections.
		*
		* 3. Pass the extracted DOM elements (in `outputString`) to the LLM for structured data extraction.
		* - Uses the instructions, schema, and previously extracted content as context to
		* guide the LLM in extracting the structured data.
		*
		* 4. Check if extraction is complete.
		* - If the extraction is complete (all chunks have been processed or the LLM determines
		* that we do not need to continue), return the final result.
		* - If not, repeat steps 1-4 with the next chunk until extraction is complete or no more chunks remain.
		*
		* @remarks
		* Each step corresponds to specific code segments, as noted in the comments throughout the code.
		*/
		export declare class StagehandExtractHandler {
		@@ -35,3 +104,3 @@ private readonly stagehand;
		});
		extract<T extends z.AnyZodObject>({ instruction, schema, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, }: {
		extract<T extends z.AnyZodObject>({ instruction, schema, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, useTextExtract, }: {
		instruction: string;
		@@ -44,3 +113,6 @@ schema: T;
		domSettleTimeoutMs?: number;
		useTextExtract?: boolean;
		}): Promise<z.infer<T>>;
		private textExtract;
		private domExtract;
		}

3

dist/lib/index.d.ts

		@@ -10,2 +10,3 @@ import { type BrowserContext, type Page } from "@playwright/test";
		context: BrowserContext;
		browserbaseSessionID?: string;
		private env;
		@@ -43,3 +44,3 @@ private apiKey;
		act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, useTextExtract, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
		observe(options?: ObserveOptions): Promise<ObserveResult[]>;
		@@ -46,0 +47,0 @@ close(): Promise<void>;

3

dist/lib/inference.d.ts

		@@ -8,3 +8,3 @@ import { z } from "zod";
		export declare function act({ action, domElements, steps, llmClient, screenshot, retries, logger, requestId, variables, }: ActParams): Promise<ActResult \| null>;
		export declare function extract({ instruction, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, }: {
		export declare function extract({ instruction, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, isUsingTextExtract, }: {
		instruction: string;
		@@ -18,2 +18,3 @@ previouslyExtractedContent: object;
		requestId: string;
		isUsingTextExtract?: boolean;
		}): Promise<{
		@@ -20,0 +21,0 @@ metadata: {

2

dist/lib/prompt.d.ts

		@@ -8,3 +8,3 @@ import OpenAI from "openai";
		export declare const actTools: Array<OpenAI.ChatCompletionTool>;
		export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean): ChatMessage;
		export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean, useTextExtract?: boolean): ChatMessage;
		export declare function buildExtractUserPrompt(instruction: string, domElements: string, isUsingPrintExtractedDataTool?: boolean): ChatMessage;
		@@ -11,0 +11,0 @@ export declare function buildRefineSystemPrompt(): ChatMessage;

17

dist/lib/utils.d.ts

		import { LogLine } from "../types/log";
		import { TextAnnotation } from "../types/textannotation";
		import { z } from "zod";
		export declare function generateId(operation: string): string;
		/**
		* `formatText` converts a list of text annotations into a formatted text representation.
		* Each annotation represents a piece of text at a certain position on a webpage.
		* The formatting attempts to reconstruct a textual "screenshot" of the page by:
		* - Grouping annotations into lines based on their vertical positions.
		* - Adjusting spacing to reflect line gaps.
		* - Attempting to preserve relative positions and formatting.
		*
		* The output is a text block, optionally surrounded by lines of dashes, that aims
		* to closely mirror the visual layout of the text on the page.
		*
		* @param textAnnotations - An array of TextAnnotations describing text and their positions.
		* @param pageWidth - The width of the page in pixels, used to normalize positions.
		* @returns A string representing the text layout of the page.
		*/
		export declare function formatText(textAnnotations: TextAnnotation[], pageWidth: number): string;
		export declare function logLineToString(logLine: LogLine): string;
		export declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean;

1

dist/types/browser.d.ts

		@@ -8,2 +8,3 @@ import { Browser, BrowserContext } from "@playwright/test";
		contextPath?: string;
		sessionId?: string;
		}

3

dist/types/evals.d.ts

		@@ -9,2 +9,3 @@ import { EvalLogger } from "../evals/utils";
		logger: EvalLogger;
		useTextExtract: boolean;
		}) => Promise<{
		@@ -17,3 +18,3 @@ _success: boolean;
		}>;
		export declare const EvalCategorySchema: z.ZodEnum<["observe", "act", "combination", "extract", "experimental"]>;
		export declare const EvalCategorySchema: z.ZodEnum<["observe", "act", "combination", "extract", "experimental", "text_extract"]>;
		export type EvalCategory = z.infer<typeof EvalCategorySchema>;
		@@ -20,0 +21,0 @@ export interface EvalInput {

6

dist/types/stagehand.d.ts

		@@ -23,6 +23,2 @@ import Browserbase from "@browserbasehq/sdk";
		}
		export interface InitResult {
		debugUrl: string;
		sessionUrl: string;
		}
		export interface InitOptions {
		@@ -39,2 +35,3 @@ /** @deprecated Pass this into the Stagehand constructor instead. This will be removed in the next major version. */
		sessionUrl: string;
		sessionId: string;
		}
		@@ -70,2 +67,3 @@ export interface InitFromPageOptions {
		domSettleTimeoutMs?: number;
		useTextExtract?: boolean;
		}
		@@ -72,0 +70,0 @@ export type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;

154

lib/dom/build/index.js

		@@ -356,2 +356,152 @@ (() => {
		}
		function storeDOM() {
		const originalDOM = document.body.cloneNode(true);
		console.log("DOM state stored.");
		return originalDOM.outerHTML;
		}
		function restoreDOM(storedDOM) {
		console.log("Restoring DOM");
		if (storedDOM) {
		document.body.innerHTML = storedDOM;
		} else {
		console.error("No DOM state was provided.");
		}
		}
		function createTextBoundingBoxes() {
		const style = document.createElement("style");
		document.head.appendChild(style);
		if (style.sheet) {
		style.sheet.insertRule(
		`
		.stagehand-highlighted-word, .stagehand-space {
		border: 0px solid orange;
		display: inline-block !important;
		visibility: visible;
		}
		`,
		0
		);
		style.sheet.insertRule(
		`
		code .stagehand-highlighted-word, code .stagehand-space,
		pre .stagehand-highlighted-word, pre .stagehand-space {
		white-space: pre-wrap;
		display: inline !important;
		}
		`,
		1
		);
		}
		function applyHighlighting(root) {
		root.querySelectorAll("body *").forEach((element) => {
		if (element.closest(".stagehand-nav, .stagehand-marker")) {
		return;
		}
		if (["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes(
		element.tagName
		)) {
		return;
		}
		const childNodes = Array.from(element.childNodes);
		childNodes.forEach((node) => {
		if (node.nodeType === 3 && node.textContent?.trim().length > 0) {
		const textContent = node.textContent.replace(/\u00A0/g, " ");
		const tokens = textContent.split(/(\s+)/g);
		const fragment = document.createDocumentFragment();
		const parentIsCode = element.tagName === "CODE";
		tokens.forEach((token) => {
		const span = document.createElement("span");
		span.textContent = token;
		if (parentIsCode) {
		span.style.whiteSpace = "pre-wrap";
		span.style.display = "inline";
		}
		span.className = token.trim().length === 0 ? "stagehand-space" : "stagehand-highlighted-word";
		fragment.appendChild(span);
		});
		if (fragment.childNodes.length > 0 && node.parentNode) {
		element.insertBefore(fragment, node);
		node.remove();
		}
		}
		});
		});
		}
		applyHighlighting(document);
		document.querySelectorAll("iframe").forEach((iframe) => {
		try {
		iframe.contentWindow?.postMessage({ action: "highlight" }, "*");
		} catch (error) {
		console.error("Error accessing iframe content: ", error);
		}
		});
		}
		function getElementBoundingBoxes(xpath) {
		const element = document.evaluate(
		xpath,
		document,
		null,
		XPathResult.FIRST_ORDERED_NODE_TYPE,
		null
		).singleNodeValue;
		if (!element) return [];
		const isValidText = (text) => text && text.trim().length > 0;
		let dropDownElem = element.querySelector("option[selected]");
		if (!dropDownElem) {
		dropDownElem = element.querySelector("option");
		}
		if (dropDownElem) {
		const elemText = dropDownElem.textContent \|\| "";
		if (isValidText(elemText)) {
		const parentRect = element.getBoundingClientRect();
		return [
		{
		text: elemText.trim(),
		top: parentRect.top + window.scrollY,
		left: parentRect.left + window.scrollX,
		width: parentRect.width,
		height: parentRect.height
		}
		];
		} else {
		return [];
		}
		}
		let placeholderText = "";
		if ((element.tagName.toLowerCase() === "input" \|\| element.tagName.toLowerCase() === "textarea") && element.placeholder) {
		placeholderText = element.placeholder;
		} else if (element.tagName.toLowerCase() === "a") {
		placeholderText = "";
		} else if (element.tagName.toLowerCase() === "img") {
		placeholderText = element.alt \|\| "";
		}
		const words = element.querySelectorAll(
		".stagehand-highlighted-word"
		);
		const boundingBoxes = Array.from(words).map((word) => {
		const rect = word.getBoundingClientRect();
		return {
		text: word.innerText \|\| "",
		top: rect.top + window.scrollY,
		left: rect.left + window.scrollX,
		width: rect.width,
		height: rect.height * 0.75
		};
		}).filter(
		(box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)
		);
		if (boundingBoxes.length === 0) {
		const elementRect = element.getBoundingClientRect();
		return [
		{
		text: placeholderText,
		top: elementRect.top + window.scrollY,
		left: elementRect.left + window.scrollX,
		width: elementRect.width,
		height: elementRect.height * 0.75
		}
		];
		}
		return boundingBoxes;
		}
		window.processDom = processDom;
		@@ -361,2 +511,6 @@ window.processAllOfDom = processAllOfDom;
		window.scrollToHeight = scrollToHeight;
		window.storeDOM = storeDOM;
		window.restoreDOM = restoreDOM;
		window.createTextBoundingBoxes = createTextBoundingBoxes;
		window.getElementBoundingBoxes = getElementBoundingBoxes;
		var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];
		@@ -363,0 +517,0 @@ var interactiveElementTypes = [

2

lib/dom/build/scriptContent.ts

		@@ -1,1 +0,1 @@
		export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) \|\| isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) \|\| isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ / new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() \|\| \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") \|\| element.hasAttribute(\"hidden\") \|\| element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) \|\| elementRole && interactiveRoles.includes(elementRole) \|\| elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";
		export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) \|\| isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) \|\| isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ / new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() \|\| \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body \").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent \|\| \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" \|\| element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt \|\| \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText \|\| \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 \|\| rect.height === 0 \|\| rect.top < 0 \|\| rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") \|\| element.hasAttribute(\"hidden\") \|\| element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) \|\| elementRole && interactiveRoles.includes(elementRole) \|\| elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n";

10

lib/dom/global.d.ts

		@@ -27,3 +27,13 @@ export {};
		__PW_inspect?: unknown;
		storeDOM: () => string;
		restoreDOM: (storedDOM: string) => void;
		createTextBoundingBoxes: () => void;
		getElementBoundingBoxes: (xpath: string) => Array<{
		text: string;
		top: number;
		left: number;
		width: number;
		height: number;
		}>;
		}
		}

197

lib/dom/process.ts

		@@ -243,2 +243,195 @@ import { generateXPathsForElement as generateXPaths } from "./xpathUtils";

		export function storeDOM(): string {
		const originalDOM = document.body.cloneNode(true) as HTMLElement;
		console.log("DOM state stored.");
		return originalDOM.outerHTML;
		}

		export function restoreDOM(storedDOM: string): void {
		console.log("Restoring DOM");
		if (storedDOM) {
		document.body.innerHTML = storedDOM;
		} else {
		console.error("No DOM state was provided.");
		}
		}

		export function createTextBoundingBoxes(): void {
		const style = document.createElement("style");
		document.head.appendChild(style);
		if (style.sheet) {
		style.sheet.insertRule(
		`
		.stagehand-highlighted-word, .stagehand-space {
		border: 0px solid orange;
		display: inline-block !important;
		visibility: visible;
		}
		`,
		0,
		);

		style.sheet.insertRule(
		`
		code .stagehand-highlighted-word, code .stagehand-space,
		pre .stagehand-highlighted-word, pre .stagehand-space {
		white-space: pre-wrap;
		display: inline !important;
		}
		`,
		1,
		);
		}

		function applyHighlighting(root: Document \| HTMLElement): void {
		root.querySelectorAll("body *").forEach((element) => {
		if (element.closest(".stagehand-nav, .stagehand-marker")) {
		return;
		}
		if (
		["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes(
		element.tagName,
		)
		) {
		return;
		}

		const childNodes = Array.from(element.childNodes);
		childNodes.forEach((node) => {
		if (node.nodeType === 3 && node.textContent?.trim().length > 0) {
		const textContent = node.textContent.replace(/\u00A0/g, " ");
		const tokens = textContent.split(/(\s+)/g); // Split text by spaces
		const fragment = document.createDocumentFragment();
		const parentIsCode = element.tagName === "CODE";

		tokens.forEach((token) => {
		const span = document.createElement("span");
		span.textContent = token;
		if (parentIsCode) {
		// Special handling for <code> tags
		span.style.whiteSpace = "pre-wrap";
		span.style.display = "inline";
		}
		span.className =
		token.trim().length === 0
		? "stagehand-space"
		: "stagehand-highlighted-word";
		fragment.appendChild(span);
		});

		if (fragment.childNodes.length > 0 && node.parentNode) {
		element.insertBefore(fragment, node);
		node.remove();
		}
		}
		});
		});
		}

		applyHighlighting(document);

		document.querySelectorAll("iframe").forEach((iframe) => {
		try {
		iframe.contentWindow?.postMessage({ action: "highlight" }, "*");
		} catch (error) {
		console.error("Error accessing iframe content: ", error);
		}
		});
		}

		export function getElementBoundingBoxes(xpath: string): Array<{
		text: string;
		top: number;
		left: number;
		width: number;
		height: number;
		}> {
		const element = document.evaluate(
		xpath,
		document,
		null,
		XPathResult.FIRST_ORDERED_NODE_TYPE,
		null,
		).singleNodeValue as HTMLElement;

		if (!element) return [];

		const isValidText = (text: string) => text && text.trim().length > 0;
		let dropDownElem = element.querySelector("option[selected]");

		if (!dropDownElem) {
		dropDownElem = element.querySelector("option");
		}

		if (dropDownElem) {
		const elemText = dropDownElem.textContent \|\| "";
		if (isValidText(elemText)) {
		const parentRect = element.getBoundingClientRect();
		return [
		{
		text: elemText.trim(),
		top: parentRect.top + window.scrollY,
		left: parentRect.left + window.scrollX,
		width: parentRect.width,
		height: parentRect.height,
		},
		];
		} else {
		return [];
		}
		}

		let placeholderText = "";
		if (
		(element.tagName.toLowerCase() === "input" \|\|
		element.tagName.toLowerCase() === "textarea") &&
		(element as HTMLInputElement).placeholder
		) {
		placeholderText = (element as HTMLInputElement).placeholder;
		} else if (element.tagName.toLowerCase() === "a") {
		placeholderText = "";
		} else if (element.tagName.toLowerCase() === "img") {
		placeholderText = (element as HTMLImageElement).alt \|\| "";
		}

		const words = element.querySelectorAll(
		".stagehand-highlighted-word",
		) as NodeListOf<HTMLElement>;

		const boundingBoxes = Array.from(words)
		.map((word) => {
		const rect = word.getBoundingClientRect();
		return {
		text: word.innerText \|\| "",
		top: rect.top + window.scrollY,
		left: rect.left + window.scrollX,
		width: rect.width,
		height: rect.height * 0.75,
		};
		})
		.filter(
		(box) =>
		box.width > 0 &&
		box.height > 0 &&
		box.top >= 0 &&
		box.left >= 0 &&
		isValidText(box.text),
		);

		if (boundingBoxes.length === 0) {
		const elementRect = element.getBoundingClientRect();
		return [
		{
		text: placeholderText,
		top: elementRect.top + window.scrollY,
		left: elementRect.left + window.scrollX,
		width: elementRect.width,
		height: elementRect.height * 0.75,
		},
		];
		}

		return boundingBoxes;
		}

		window.processDom = processDom;
		@@ -248,2 +441,6 @@ window.processAllOfDom = processAllOfDom;
		window.scrollToHeight = scrollToHeight;
		window.storeDOM = storeDOM;
		window.restoreDOM = restoreDOM;
		window.createTextBoundingBoxes = createTextBoundingBoxes;
		window.getElementBoundingBoxes = getElementBoundingBoxes;

		@@ -250,0 +447,0 @@ const leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"];

350

lib/handlers/extractHandler.ts

		@@ -5,5 +5,79 @@ import { LLMProvider } from "../llm/LLMProvider";
		import { LogLine } from "../../types/log";
		import { TextAnnotation } from "../../types/textannotation";
		import { extract } from "../inference";
		import { LLMClient } from "../llm/LLMClient";
		import { formatText } from "../utils";

		const PROXIMITY_THRESHOLD = 15;

		/**
		* The `StagehandExtractHandler` class is responsible for extracting structured data from a webpage.
		* It provides two approaches: `textExtract` and `domExtract`. `textExtract` is used by default.
		*
		* Here is what `textExtract` does at a high level:
		*
		* 1. Wait for the DOM to settle and start DOM debugging.
		* - Ensures the page is fully loaded and stable before extraction.
		*
		* 2. Store the original DOM before any mutations.
		* - Preserves the initial state of the DOM to restore later.
		* - We do this because creating spans around every word in the DOM (see step 4)
		* becomes very difficult to revert. Text nodes can be finicky, and directly
		* removing the added spans often corrupts the structure of the DOM.
		*
		* 3. Process the DOM to generate a selector map of candidate elements.
		* - Identifies potential elements that contain the data to extract.
		*
		* 4. Create text bounding boxes around every word in the webpage.
		* - Wraps words in spans so that their bounding boxes can be used to
		* determine their positions on the text-rendered-webpage.
		*
		* 5. Collect all text annotations (with positions and dimensions) from each of the candidate elements.
		* - Gathers text and positional data for each word.
		*
		* 6. Group annotations by text and deduplicate them based on proximity.
		* - There is no guarantee that the text annotations are unique (candidate elements can be nested).
		* - Thus, we must remove duplicate words that are close to each other on the page.
		*
		* 7. Restore the original DOM after mutations.
		* - Returns the DOM to its original state after processing.
		*
		* 8. Format the deduplicated annotations into a text representation.
		* - Prepares the text data for the extraction process.
		*
		* 9. Pass the formatted text to an LLM for extraction according to the given instruction and schema.
		* - Uses a language model to extract structured data based on instructions.
		*
		* 10. Handle the extraction response and logging the results.
		* - Processes the output from the LLM and logs relevant information.
		*
		*
		* Here is what `domExtract` does at a high level:
		*
		* 1. Wait for the DOM to settle and start DOM debugging.
		* - Ensures the page is fully loaded and stable before extraction.
		*
		* 2. Process the DOM in chunks.
		* - The `processDom` function:
		* - Divides the page into vertical "chunks" based on viewport height.
		* - Picks the next chunk that hasn't been processed yet.
		* - Scrolls to that chunk and extracts candidate elements.
		* - Returns `outputString` (HTML snippets of candidate elements),
		* `selectorMap` (the XPaths of the candidate elements),
		* `chunk` (the current chunk index), and `chunks` (the array of all chunk indices).
		* - This chunk-based approach ensures that large or lengthy pages can be processed in smaller, manageable sections.
		*
		* 3. Pass the extracted DOM elements (in `outputString`) to the LLM for structured data extraction.
		* - Uses the instructions, schema, and previously extracted content as context to
		* guide the LLM in extracting the structured data.
		*
		* 4. Check if extraction is complete.
		* - If the extraction is complete (all chunks have been processed or the LLM determines
		* that we do not need to continue), return the final result.
		* - If not, repeat steps 1-4 with the next chunk until extraction is complete or no more chunks remain.
		*
		* @remarks
		* Each step corresponds to specific code segments, as noted in the comments throughout the code.
		*/

		export class StagehandExtractHandler {
		@@ -64,2 +138,3 @@ private readonly stagehand: Stagehand;
		domSettleTimeoutMs,
		useTextExtract = false,
		}: {
		@@ -73,3 +148,41 @@ instruction: string;
		domSettleTimeoutMs?: number;
		useTextExtract?: boolean;
		}): Promise<z.infer<T>> {
		if (useTextExtract) {
		return this.textExtract({
		instruction,
		schema,
		content,
		llmClient,
		requestId,
		domSettleTimeoutMs,
		});
		} else {
		return this.domExtract({
		instruction,
		schema,
		content,
		chunksSeen,
		llmClient,
		requestId,
		domSettleTimeoutMs,
		});
		}
		}

		private async textExtract<T extends z.AnyZodObject>({
		instruction,
		schema,
		content = {},
		llmClient,
		requestId,
		domSettleTimeoutMs,
		}: {
		instruction: string;
		schema: T;
		content?: z.infer<T>;
		llmClient: LLMClient;
		requestId?: string;
		domSettleTimeoutMs?: number;
		}): Promise<z.infer<T>> {
		this.logger({
		@@ -87,4 +200,228 @@ category: "extraction",

		// 1: Wait for the DOM to settle and start DOM debugging
		await this.waitForSettledDom(domSettleTimeoutMs);
		await this.startDomDebug();

		// 2: Store the original DOM before any mutations
		// we need to store the original DOM here because calling createTextBoundingBoxes()
		// will mutate the DOM by adding spans around every word
		const originalDOM = await this.stagehand.page.evaluate(() =>
		window.storeDOM(),
		);

		// 3: Process the DOM to generate a selector map of candidate elements
		const { selectorMap }: { selectorMap: Record<number, string[]> } =
		await this.stagehand.page.evaluate(() => window.processAllOfDom());

		this.logger({
		category: "extraction",
		message: `received output from processAllOfDom. selectorMap has ${Object.keys(selectorMap).length} entries`,
		level: 1,
		});

		// 4: Create text bounding boxes around every word in the webpage
		// calling createTextBoundingBoxes() will create a span around every word on the
		// webpage. The bounding boxes of these spans will be used to determine their
		// positions in the text rendered webpage
		await this.stagehand.page.evaluate(() => window.createTextBoundingBoxes());
		const pageWidth = await this.stagehand.page.evaluate(
		() => window.innerWidth,
		);
		const pageHeight = await this.stagehand.page.evaluate(
		() => window.innerHeight,
		);

		// 5: Collect all text annotations (with positions and dimensions) from the candidate elements
		// allAnnotations will store all the TextAnnotations BEFORE deduplication
		const allAnnotations: TextAnnotation[] = [];

		// here we will loop through all the xpaths in the selectorMap,
		// and get the bounding boxes for each one. These are xpaths to "candidate elements"
		for (const xpaths of Object.values(selectorMap)) {
		const xpath = xpaths[0];

		// boundingBoxes is an array because there may be multiple bounding boxes within a single element
		// (since each bounding box is around a single word)
		const boundingBoxes: Array<{
		text: string;
		left: number;
		top: number;
		width: number;
		height: number;
		}> = await this.stagehand.page.evaluate(
		(xpath) => window.getElementBoundingBoxes(xpath),
		xpath,
		);

		for (const box of boundingBoxes) {
		const bottom_left = {
		x: box.left,
		y: box.top + box.height,
		};
		const bottom_left_normalized = {
		x: box.left / pageWidth,
		y: (box.top + box.height) / pageHeight,
		};

		const annotation: TextAnnotation = {
		text: box.text,
		bottom_left,
		bottom_left_normalized,
		width: box.width,
		height: box.height,
		};
		allAnnotations.push(annotation);
		}
		}

		// 6: Group annotations by text and deduplicate them based on proximity
		const annotationsGroupedByText = new Map<string, TextAnnotation[]>();

		for (const annotation of allAnnotations) {
		if (!annotationsGroupedByText.has(annotation.text)) {
		annotationsGroupedByText.set(annotation.text, []);
		}
		annotationsGroupedByText.get(annotation.text)!.push(annotation);
		}

		const deduplicatedTextAnnotations: TextAnnotation[] = [];

		// here, we deduplicate annotations per text group
		for (const [text, annotations] of annotationsGroupedByText.entries()) {
		for (const annotation of annotations) {
		// check if this annotation is close to any existing deduplicated annotation
		const isDuplicate = deduplicatedTextAnnotations.some(
		(existingAnnotation) => {
		if (existingAnnotation.text !== text) return false;

		const dx =
		existingAnnotation.bottom_left.x - annotation.bottom_left.x;
		const dy =
		existingAnnotation.bottom_left.y - annotation.bottom_left.y;
		const distance = Math.hypot(dx, dy);
		// the annotation is a duplicate if it has the same text and its bottom_left
		// position is within the PROXIMITY_THRESHOLD of an existing annotation.
		// we calculate the Euclidean distance between the two bottom_left points,
		// and if the distance is less than PROXIMITY_THRESHOLD,
		// the annotation is considered a duplicate.
		return distance < PROXIMITY_THRESHOLD;
		},
		);

		if (!isDuplicate) {
		deduplicatedTextAnnotations.push(annotation);
		}
		}
		}

		// 7: Restore the original DOM after mutations
		await this.stagehand.page.evaluate(
		(dom) => window.restoreDOM(dom),
		originalDOM,
		);

		// 8: Format the deduplicated annotations into a text representation
		const formattedText = formatText(deduplicatedTextAnnotations, pageWidth);

		// 9: Pass the formatted text to an LLM for extraction according to the given instruction and schema
		const extractionResponse = await extract({
		instruction,
		previouslyExtractedContent: content,
		domElements: formattedText,
		schema,
		chunksSeen: 1,
		chunksTotal: 1,
		llmClient,
		requestId,
		});

		const {
		metadata: { completed },
		...output
		} = extractionResponse;
		await this.cleanupDomDebug();

		// 10: Handle the extraction response and log the results
		this.logger({
		category: "extraction",
		message: "received extraction response",
		auxiliary: {
		extraction_response: {
		value: JSON.stringify(extractionResponse),
		type: "object",
		},
		},
		});

		if (completed) {
		this.logger({
		category: "extraction",
		message: "extraction completed successfully",
		level: 1,
		auxiliary: {
		extraction_response: {
		value: JSON.stringify(extractionResponse),
		type: "object",
		},
		},
		});
		} else {
		this.logger({
		category: "extraction",
		message: "extraction incomplete after processing all data",
		level: 1,
		auxiliary: {
		extraction_response: {
		value: JSON.stringify(extractionResponse),
		type: "object",
		},
		},
		});
		}
		return output;
		}

		private async domExtract<T extends z.AnyZodObject>({
		instruction,
		schema,
		content = {},
		chunksSeen = [],
		llmClient,
		requestId,
		domSettleTimeoutMs,
		}: {
		instruction: string;
		schema: T;
		content?: z.infer<T>;
		chunksSeen?: Array<number>;
		llmClient: LLMClient;
		requestId?: string;
		domSettleTimeoutMs?: number;
		}): Promise<z.infer<T>> {
		this.logger({
		category: "extraction",
		message: "starting extraction using old approach",
		level: 1,
		auxiliary: {
		instruction: {
		value: instruction,
		type: "string",
		},
		},
		});

		// 1: Wait for the DOM to settle and start DOM debugging
		// This ensures the page is stable before extracting any data.
		await this.waitForSettledDom(domSettleTimeoutMs);
		await this.startDomDebug();

		// 2: Call processDom() to handle chunk-based extraction
		// processDom determines which chunk of the page to process next.
		// It will:
		// - Identify all chunks (vertical segments of the page),
		// - Pick the next unprocessed chunk,
		// - Scroll to that chunk's region,
		// - Extract candidate elements and their text,
		// - Return the extracted text (outputString), a selectorMap (for referencing elements),
		// the current chunk index, and the full list of chunks.
		const { outputString, chunk, chunks } = await this.stagehand.page.evaluate(
		@@ -114,2 +451,5 @@ (chunksSeen?: number[]) => window.processDom(chunksSeen ?? []),

		// 3: Pass the list of candidate HTML snippets to the LLM
		// The LLM uses the provided instruction and schema to parse and extract
		// structured data.
		const extractionResponse = await extract({
		@@ -124,2 +464,3 @@ instruction,
		requestId,
		isUsingTextExtract: false,
		});
		@@ -131,2 +472,3 @@
		} = extractionResponse;

		await this.cleanupDomDebug();
		@@ -145,4 +487,8 @@

		// Mark the current chunk as processed by adding it to chunksSeen
		chunksSeen.push(chunk);

		// 4: Check if extraction is complete
		// If the LLM deems the extraction complete or we've processed all chunks, return the final result.
		// Otherwise, call domExtract again for the next chunk.
		if (completed \|\| chunksSeen.length === chunks.length) {
		@@ -173,3 +519,5 @@ this.logger({
		await this.waitForSettledDom(domSettleTimeoutMs);
		return this.extract({

		// Recursively continue with the next chunk
		return this.domExtract({
		instruction,
		@@ -176,0 +524,0 @@ schema,

45

lib/index.ts

		@@ -198,3 +198,3 @@ import { Browserbase } from "@browserbasehq/sdk";

		return { browser, context, debugUrl, sessionUrl };
		return { browser, context, debugUrl, sessionUrl, sessionId };
		} else {
		@@ -311,2 +311,4 @@ logger({
		public context: BrowserContext;
		public browserbaseSessionID?: string;

		private env: "LOCAL" \| "BROWSERBASE";
		@@ -382,19 +384,21 @@ private apiKey: string \| undefined;
		}
		const { context, debugUrl, sessionUrl, contextPath } = await getBrowser(
		this.apiKey,
		this.projectId,
		this.env,
		this.headless,
		this.logger,
		this.browserbaseSessionCreateParams,
		this.browserbaseResumeSessionID,
		).catch((e) => {
		console.error("Error in init:", e);
		const br: BrowserResult = {
		context: undefined,
		debugUrl: undefined,
		sessionUrl: undefined,
		};
		return br;
		});
		const { context, debugUrl, sessionUrl, contextPath, sessionId } =
		await getBrowser(
		this.apiKey,
		this.projectId,
		this.env,
		this.headless,
		this.logger,
		this.browserbaseSessionCreateParams,
		this.browserbaseResumeSessionID,
		).catch((e) => {
		console.error("Error in init:", e);
		const br: BrowserResult = {
		context: undefined,
		debugUrl: undefined,
		sessionUrl: undefined,
		sessionId: undefined,
		};
		return br;
		});
		this.contextPath = contextPath;
		@@ -461,4 +465,5 @@ this.context = context;
		});
		this.browserbaseSessionID = sessionId;

		return { debugUrl, sessionUrl };
		return { debugUrl, sessionUrl, sessionId };
		}
		@@ -772,2 +777,3 @@
		domSettleTimeoutMs,
		useTextExtract,
		}: ExtractOptions<T>): Promise<ExtractResult<T>> {
		@@ -810,2 +816,3 @@ if (!this.extractHandler) {
		domSettleTimeoutMs,
		useTextExtract,
		})
		@@ -812,0 +819,0 @@ .catch((e) => {

4

lib/inference.ts

		@@ -163,2 +163,3 @@ import {
		requestId,
		isUsingTextExtract,
		}: {
		@@ -173,2 +174,3 @@ instruction: string;
		requestId: string;
		isUsingTextExtract?: boolean;
		}) {
		@@ -181,3 +183,3 @@ type ExtractionResponse = z.infer<typeof schema>;
		messages: [
		buildExtractSystemPrompt(isUsingAnthropic),
		buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract),
		buildExtractUserPrompt(instruction, domElements, isUsingAnthropic),
		@@ -184,0 +186,0 @@ ],

2

lib/llm/AnthropicClient.ts

		@@ -225,3 +225,3 @@ import Anthropic, { ClientOptions } from "@anthropic-ai/sdk";
		model: this.modelName,
		max_tokens: options.maxTokens \|\| 1500,
		max_tokens: options.maxTokens \|\| 8192,
		messages: formattedMessages,
		@@ -228,0 +228,0 @@ tools: anthropicTools,

48

lib/prompt.ts

		@@ -204,19 +204,43 @@ import OpenAI from "openai";
		// extract
		const extractSystemPrompt = `You are extracting content on behalf of a user. You will be given:
		export function buildExtractSystemPrompt(
		isUsingPrintExtractedDataTool: boolean = false,
		useTextExtract: boolean = true,
		): ChatMessage {
		const baseContent = `You are extracting content on behalf of a user.
		If a user asks you to extract a 'list' of information, or 'all' information,
		YOU MUST EXTRACT ALL OF THE INFORMATION THAT THE USER REQUESTS.

		You will be given:
		1. An instruction
		2. A list of DOM elements to extract from
		2. `;

		Print the exact text from the DOM elements with all symbols, characters, and endlines as is.
		const contentDetail = useTextExtract
		? `A text representation of a webpage to extract information from.`
		: `A list of DOM elements to extract from.`;

		const instructions = `
		Print the exact text from the ${
		useTextExtract ? "text-rendered webpage" : "DOM elements"
		} with all symbols, characters, and endlines as is.
		Print null or an empty string if no new information is found.
		`;
		`.trim();

		export function buildExtractSystemPrompt(
		isUsingPrintExtractedDataTool: boolean = false,
		): ChatMessage {
		let content = extractSystemPrompt.replace(/\s+/g, " ");
		if (isUsingPrintExtractedDataTool) {
		content += `
		const toolInstructions = isUsingPrintExtractedDataTool
		? `
		ONLY print the content using the print_extracted_data tool provided.
		ONLY print the content using the print_extracted_data tool provided.`;
		}
		ONLY print the content using the print_extracted_data tool provided.
		`.trim()
		: "";

		const additionalInstructions = useTextExtract
		? `Once you are given the text-rendered webpage,
		you must thoroughly and meticulously analyze it. Be very careful to ensure that you
		do not miss any important information.`
		: "";

		const content =
		`${baseContent}${contentDetail}\n\n${instructions}\n${toolInstructions}${
		additionalInstructions ? `\n\n${additionalInstructions}` : ""
		}`.replace(/\s+/g, " ");

		return {
		@@ -223,0 +247,0 @@ role: "system",

352

lib/utils.ts

		import crypto from "crypto";
		import { LogLine } from "../types/log";
		import { TextAnnotation } from "../types/textannotation";
		import { z } from "zod";
		@@ -9,2 +10,353 @@

		/**
		* `formatText` converts a list of text annotations into a formatted text representation.
		* Each annotation represents a piece of text at a certain position on a webpage.
		* The formatting attempts to reconstruct a textual "screenshot" of the page by:
		* - Grouping annotations into lines based on their vertical positions.
		* - Adjusting spacing to reflect line gaps.
		* - Attempting to preserve relative positions and formatting.
		*
		* The output is a text block, optionally surrounded by lines of dashes, that aims
		* to closely mirror the visual layout of the text on the page.
		*
		* @param textAnnotations - An array of TextAnnotations describing text and their positions.
		* @param pageWidth - The width of the page in pixels, used to normalize positions.
		* @returns A string representing the text layout of the page.
		*/
		export function formatText(
		textAnnotations: TextAnnotation[],
		pageWidth: number,
		): string {
		// 1: Estimate the average character width in pixels by examining the text annotations.
		// If no reliable measurement is found, default to 10 pixels per character.
		const charWidth = estimateCharacterWidth(textAnnotations) \|\| 10;

		// 2: Create a copy of textAnnotations and sort them by their vertical position (y-coordinate),
		// ensuring that topmost annotations appear first and bottommost appear last.
		const sortedAnnotations = [...textAnnotations].sort(
		(a, b) => a.bottom_left.y - b.bottom_left.y,
		);

		// 3: Group annotations by their line position. We use a small epsilon to handle
		// floating-point differences. Two annotations are considered on the same line if their
		// y-coordinates differ by less than epsilon.
		const epsilon = 0.0001;
		const lineMap: Map<number, TextAnnotation[]> = new Map();

		for (const annotation of sortedAnnotations) {
		let foundLineY: number \| undefined;

		// 4: Check if the annotation belongs to an existing line group.
		// If so, add it to that line. Otherwise, start a new line group.
		for (const key of lineMap.keys()) {
		if (Math.abs(key - annotation.bottom_left.y) < epsilon) {
		foundLineY = key;
		break;
		}
		}

		if (foundLineY !== undefined) {
		lineMap.get(foundLineY)!.push(annotation);
		} else {
		lineMap.set(annotation.bottom_left.y, [annotation]);
		}
		}

		// 5: Extract all line keys (y-coordinates) and sort them to process lines top-to-bottom.
		const lineYs = Array.from(lineMap.keys()).sort((a, b) => a - b);

		// 6: For each line, group words together and calculate the maximum normalized end position (maxNormalizedEndX).
		// This will help determine the necessary canvas width to accommodate all text.
		let maxNormalizedEndX = 0;
		const finalLines: TextAnnotation[][] = [];

		for (const lineY of lineYs) {
		const lineAnnotations = lineMap.get(lineY)!;

		// 7: Sort annotations in the current line by their horizontal position (x-coordinate),
		// ensuring left-to-right ordering.
		lineAnnotations.sort((a, b) => a.bottom_left.x - b.bottom_left.x);

		// 8: Group nearby annotations into word clusters, forming logical sentences or phrases.
		const groupedLineAnnotations = groupWordsInSentence(lineAnnotations);

		// 9: Determine how far to the right the text in this line extends, normalized by page width.
		// Update maxNormalizedEndX to track the widest line encountered.
		for (const ann of groupedLineAnnotations) {
		const textLengthInPx = ann.text.length * charWidth;
		const normalizedTextLength = textLengthInPx / pageWidth;
		const endX = ann.bottom_left_normalized.x + normalizedTextLength;
		if (endX > maxNormalizedEndX) {
		maxNormalizedEndX = endX;
		}
		}

		// 10: Save the processed line to finalLines for later rendering.
		finalLines.push(groupedLineAnnotations);
		}

		// 11: Determine the canvas width in characters. We scale according to maxNormalizedEndX and page width.
		// Add a small buffer (20 chars) to ensure no text overflows the canvas.
		let canvasWidth = Math.ceil(maxNormalizedEndX * (pageWidth / charWidth)) + 20;
		canvasWidth = Math.max(canvasWidth, 1);

		// 12: Compute the baseline (lowest point) of each line. This helps us understand vertical spacing.
		const lineBaselines = finalLines.map((line) =>
		Math.min(...line.map((a) => a.bottom_left.y)),
		);

		// 13: Compute vertical gaps between consecutive lines to determine line spacing.
		const verticalGaps: number[] = [];
		for (let i = 1; i < lineBaselines.length; i++) {
		verticalGaps.push(lineBaselines[i] - lineBaselines[i - 1]);
		}

		// 14: Estimate what a "normal" line spacing is by taking the median of all vertical gaps.
		const normalLineSpacing = verticalGaps.length > 0 ? median(verticalGaps) : 0;

		// 15: Create a 2D character canvas initialized with spaces, onto which we'll "print" text lines.
		let canvas: string[][] = [];

		// 16: lineIndex represents the current line of the canvas. Initialize with -1 so the first line starts at 0.
		let lineIndex = -1;

		// 17: Iterate over each line of processed text.
		for (let i = 0; i < finalLines.length; i++) {
		if (i === 0) {
		// 18: For the first line, just increment lineIndex to start at 0 with no extra spacing.
		lineIndex++;
		ensureLineExists(canvas, lineIndex, canvasWidth);
		} else {
		// 19: For subsequent lines, calculate how many extra blank lines to insert based on spacing.
		const gap = lineBaselines[i] - lineBaselines[i - 1];

		let extraLines = 0;
		// 20: If we have a known normal line spacing, and the gap is larger than expected,
		// insert extra blank lines proportional to the ratio of gap to normal spacing.
		if (normalLineSpacing > 0) {
		if (gap > 1.2 * normalLineSpacing) {
		extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0);
		}
		}

		// 21: Insert the calculated extra blank lines to maintain approximate vertical spacing.
		for (let e = 0; e < extraLines; e++) {
		lineIndex++;
		ensureLineExists(canvas, lineIndex, canvasWidth);
		}

		// 22: After adjusting for spacing, increment lineIndex for the current line of text.
		lineIndex++;
		ensureLineExists(canvas, lineIndex, canvasWidth);
		}

		// 23: Now place the annotations for the current line onto the canvas at the appropriate horizontal positions.
		const lineAnnotations = finalLines[i];
		for (const annotation of lineAnnotations) {
		const text = annotation.text;
		// 24: Calculate the starting x-position in the canvas based on normalized coordinates.
		const startXInChars = Math.round(
		annotation.bottom_left_normalized.x * canvasWidth,
		);

		// 25: Place each character of the annotation text into the canvas.
		for (let j = 0; j < text.length; j++) {
		const xPos = startXInChars + j;
		// 26: Ensure we don't exceed the canvas width.
		if (xPos < canvasWidth) {
		canvas[lineIndex][xPos] = text[j];
		}
		}
		}
		}

		// 27: Trim trailing whitespace from each line to create a cleaner output.
		canvas = canvas.map((row) => {
		const lineStr = row.join("");
		return Array.from(lineStr.trimEnd());
		});

		// 29: Join all lines to form the final page text. Trim any trailing whitespace from the entire text.
		let pageText = canvas.map((line) => line.join("")).join("\n");
		pageText = pageText.trimEnd();

		// 30: Surround the page text with lines of dashes to clearly delineate the text block.
		pageText =
		"-".repeat(canvasWidth) + "\n" + pageText + "\n" + "-".repeat(canvasWidth);

		// 31: Return the fully formatted text.
		return pageText;
		}

		/**
		* `ensureLineExists` ensures that a specified line index exists in the canvas.
		* If the canvas is not long enough, it extends it by adding new empty lines (filled with spaces).
		* This function is used to dynamically grow the canvas as we progress through the lines.
		*
		* @param canvas - The 2D character canvas array.
		* @param lineIndex - The desired line index that must exist.
		* @param width - The width of each line in characters.
		*/
		function ensureLineExists(
		canvas: string[][],
		lineIndex: number,
		width: number,
		) {
		// loop until the canvas has at least lineIndex+1 lines.
		// each new line is filled with spaces to match the required width.
		while (lineIndex >= canvas.length) {
		canvas.push(new Array(width).fill(" "));
		}
		}

		/**
		* `estimateCharacterWidth` estimates the average character width (in pixels) from a collection of text annotations.
		* It calculates the width per character for each annotation and uses their median as the result.
		* If no annotations are available or they have zero-length text, returns 0.
		*
		* @param textAnnotations - An array of text annotations with text and width fields.
		* @returns The median character width in pixels, or 0 if none can be calculated.
		*/
		function estimateCharacterWidth(textAnnotations: TextAnnotation[]): number {
		// collect width-per-character measurements from each annotation
		const charWidths: number[] = [];
		for (const annotation of textAnnotations) {
		const length = annotation.text.length;
		if (length > 0) {
		charWidths.push(annotation.width / length);
		}
		}

		// return the median of all collected measurements
		return median(charWidths);
		}

		/**
		* `groupWordsInSentence` groups annotations within a single line into logical "words" or "sentences".
		* It uses a set of heuristics involving horizontal proximity and similar height
		* to decide when to join multiple annotations into a single grouped annotation.
		*
		* @param lineAnnotations - An array of annotations from a single line of text.
		* @returns An array of grouped annotations, where each represents one concatenated piece of text.
		*/
		function groupWordsInSentence(
		lineAnnotations: TextAnnotation[],
		): TextAnnotation[] {
		const groupedAnnotations: TextAnnotation[] = [];
		let currentGroup: TextAnnotation[] = [];

		for (const annotation of lineAnnotations) {
		// if the current group is empty, start a new group with this annotation
		if (currentGroup.length === 0) {
		currentGroup.push(annotation);
		continue;
		}

		// determine horizontal grouping criteria
		// use a padding factor to allow slight spaces between words
		const padding = 2;
		const lastAnn = currentGroup[currentGroup.length - 1];
		const characterWidth = (lastAnn.width / lastAnn.text.length) * padding;
		const isWithinHorizontalRange =
		annotation.bottom_left.x <=
		lastAnn.bottom_left.x + lastAnn.width + characterWidth;

		// check if the annotation can be grouped with the current group.
		// conditions:
		// 1. the height difference from the group's first annotation is ≤ 4 units
		// 2. the annotation is horizontally close to the last annotation in the group
		if (
		Math.abs(annotation.height - currentGroup[0].height) <= 4 &&
		isWithinHorizontalRange
		) {
		// if it meets the criteria, add to the current group
		currentGroup.push(annotation);
		} else {
		// if it doesn't meet criteria:
		// 1. finalize the current group into a single grouped annotation,
		// 2. add it to groupedAnnotations,
		// 3. start a new group with the current annotation
		if (currentGroup.length > 0) {
		const groupedAnnotation = createGroupedAnnotation(currentGroup);
		groupedAnnotations.push(groupedAnnotation);
		currentGroup = [annotation];
		}
		}
		}

		// after processing all annotations, if there's a remaining group, finalize it too
		if (currentGroup.length > 0) {
		const groupedAnnotation = createGroupedAnnotation(currentGroup);
		groupedAnnotations.push(groupedAnnotation);
		}

		// return the final array of grouped annotations representing words or phrases
		return groupedAnnotations;
		}

		/**
		* `createGroupedAnnotation` combines a group of annotations into a single annotation by concatenating their text.
		* It also attempts to preserve formatting, such as marking bold text if the median height suggests emphasis.
		*
		* @param group - An array of annotations that should be merged into a single text element.
		* @returns A new TextAnnotation representing the combined text and averaged metrics from the group.
		*/
		function createGroupedAnnotation(group: TextAnnotation[]): TextAnnotation {
		// initialize an empty string to build the combined text.
		let text = "";

		// concatenate the text from each annotation in the group.
		// insert a space between words, except when punctuation directly follows a word
		for (const word of group) {
		if (
		[".", ",", '"', "'", ":", ";", "!", "?", "{", "}", "’", "”"].includes(
		word.text,
		)
		) {
		text += word.text;
		} else {
		text += text !== "" ? " " + word.text : word.text;
		}
		}

		// determine if the combined text qualifies as a "word" (contains alphanumeric chars)
		// and whether its median height suggests emphasizing it (e.g., bold text).
		const isWord = /[a-zA-Z0-9]/.test(text);
		const medianHeight = median(group.map((word) => word.height));

		// if it's considered a word and tall enough, surround it with `**` for bold formatting.
		if (isWord && medianHeight > 25) {
		text = "" + text + "";
		}

		// return a new annotation that represents the merged group.
		// use the first annotation's coordinates and normalized positions as references,
		// and sum the widths of all annotations to get the total width.
		return {
		text: text,
		bottom_left: {
		x: group[0].bottom_left.x,
		y: group[0].bottom_left.y,
		},
		bottom_left_normalized: {
		x: group[0].bottom_left_normalized.x,
		y: group[0].bottom_left_normalized.y,
		},
		width: group.reduce((sum, a) => sum + a.width, 0),
		height: group[0].height,
		};
		}

		function median(values: number[]): number {
		if (values.length === 0) return 0;
		const sorted = [...values].sort((a, b) => a - b);
		const middle = Math.floor(sorted.length / 2);

		if (sorted.length % 2 === 0) {
		return (sorted[middle - 1] + sorted[middle]) / 2;
		} else {
		return sorted[middle];
		}
		}

		export function logLineToString(logLine: LogLine): string {
		@@ -11,0 +363,0 @@ try {

2

package.json

		{
		"name": "@browserbasehq/stagehand",
		"version": "1.7.0-alpha-b902192bc7ff8eb02c85150c1fe6f89c2a95b211",
		"version": "1.7.0-alpha-ba4ec335a5323648c6016cc480300fd58868311a",
		"description": "An AI web browsing framework focused on simplicity and extensibility.",
		@@ -5,0 +5,0 @@ "main": "./dist/index.js",

2

README.md

		@@ -201,2 +201,3 @@ <div id="toc" align="center">
		- `sessionUrl`: a `string` representing the session URL. This is only available when using a Browserbase browser.
		- `sessionId`: a `string` representing the session ID. This is only available when using a Browserbase browser.

		@@ -263,2 +264,3 @@ - Example:
		- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle
		- `useTextExtract`: (optional) a `boolean` to determine if text-based extraction should be used. Defaults to `false`

		@@ -265,0 +267,0 @@ - Returns:

dist/evals/act/amazon_add_to_cart.d.ts

dist/evals/act/bidnet.d.ts

dist/evals/act/ionwave.d.ts

dist/evals/act/laroche_form.d.ts

dist/evals/act/nonsense_action.d.ts

dist/evals/act/peeler_simple.d.ts

dist/evals/act/simple_google_search.d.ts

dist/evals/act/stock_x.d.ts

dist/evals/act/vantechjournal.d.ts

dist/evals/act/wikipedia.d.ts

dist/evals/combination/allrecipes.d.ts

dist/evals/combination/arxiv.d.ts

dist/evals/combination/extract_collaborators.d.ts

dist/evals/combination/extract_github_commits.d.ts

dist/evals/combination/imdb_movie_details.d.ts

dist/evals/combination/peeler_complex.d.ts

dist/evals/combination/sciquest.d.ts

dist/evals/experimental/apple.d.ts

dist/evals/experimental/combination_sauce.d.ts

dist/evals/experimental/costar.d.ts

dist/evals/experimental/expedia_search.d.ts

dist/evals/experimental/expedia.d.ts

dist/evals/experimental/extract_aigrant_companies.d.ts

dist/evals/experimental/extract_capacitor_info.d.ts

dist/evals/experimental/extract_partners.d.ts

dist/evals/experimental/extract_press_releases.d.ts

dist/evals/experimental/extract_snowshoeing_destinations.d.ts

dist/evals/experimental/google_jobs.d.ts

dist/evals/experimental/homedepot.d.ts

dist/evals/experimental/ibm.d.ts

dist/evals/experimental/rakuten_jp.d.ts

dist/evals/experimental/ted_talk.d.ts

dist/evals/extract/extract_baptist_health.d.ts

dist/evals/extract/extract_github_stars.d.ts

dist/evals/extract/extract_memorial_healthcare.d.ts

dist/evals/extract/extract_nhl_stats.d.ts

dist/evals/extract/extract_professional_info.d.ts

dist/evals/extract/extract_regulations.d.ts

dist/evals/extract/extract_resistor_info.d.ts

dist/evals/extract/extract_rockauto.d.ts

dist/evals/extract/extract_staff_members.d.ts

dist/evals/observe/shopify_homepage.d.ts

dist/evals/observe/vanta_h.d.ts

dist/evals/observe/vanta.d.ts

dist/index.js

Sorry, the diff of this file is too big to display

@browserbasehq/stagehand - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics