@browserbasehq/stagehand
Advanced tools
Comparing version 1.10.0-alpha-d90a5b9dbb7ec564f30bef9c8d123e8efc530b6f to 1.10.0-alpha-e5db23c0cf75bee5c08b73f702eaf745309bb6d7
@@ -7,2 +7,3 @@ /** | ||
declare const useTextExtract: boolean; | ||
declare const useAccessibilityTree: boolean; | ||
/** | ||
@@ -15,2 +16,2 @@ * Variables for filtering which tasks to run: | ||
declare let filterByEvalName: string | null; | ||
export { filterByCategory, filterByEvalName, useTextExtract, DEFAULT_EVAL_CATEGORIES, }; | ||
export { filterByCategory, filterByEvalName, useTextExtract, useAccessibilityTree, DEFAULT_EVAL_CATEGORIES, }; |
@@ -1,3 +0,3 @@ | ||
import type { ConstructorParams } from "../../lib"; | ||
import type { ConstructorParams } from "@/dist"; | ||
declare const StagehandConfig: ConstructorParams; | ||
export default StagehandConfig; |
@@ -12,3 +12,3 @@ /** | ||
*/ | ||
import { AvailableModel, ConstructorParams, Stagehand } from "../lib"; | ||
import { AvailableModel, ConstructorParams, Stagehand } from "@/dist"; | ||
import { EvalLogger } from "./logger"; | ||
@@ -34,3 +34,3 @@ /** | ||
logger: EvalLogger; | ||
initResponse: import("../lib").InitResult; | ||
initResponse: import("@/dist").InitResult; | ||
}>; |
@@ -1,4 +0,3 @@ | ||
import { LogLine } from "../types/log"; | ||
import { LogLineEval } from "../types/evals"; | ||
import { Stagehand } from "../lib"; | ||
import { LogLineEval } from "@/types/evals"; | ||
import { Stagehand, LogLine } from "@/dist"; | ||
/** | ||
@@ -5,0 +4,0 @@ * EvalLogger: |
/** | ||
* This file implements scoring functions needed by braintrust. | ||
*/ | ||
import { EvalArgs, EvalInput, EvalResult } from "../types/evals"; | ||
import { EvalArgs, EvalInput, EvalResult } from "@/types/evals"; | ||
/** | ||
@@ -6,0 +6,0 @@ * Scoring function: exactMatch |
@@ -12,3 +12,3 @@ /** | ||
*/ | ||
import { AvailableModel } from "../types/model"; | ||
import { AvailableModel } from "@/dist"; | ||
declare const config: any; | ||
@@ -15,0 +15,0 @@ declare const tasksByName: Record<string, { |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const allrecipes: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const amazon_add_to_cart: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const apple: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const arxiv: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const bidnet: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const combination_sauce: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const costar: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const expedia_search: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const expedia: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_aigrant_companies: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_area_codes: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_baptist_health: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_capacitor_info: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_collaborators: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_csa: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_github_commits: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_github_stars: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_memorial_healthcare: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_nhl_stats: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_partners: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_press_releases: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_professional_info: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_public_notices: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_resistor_info: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_rockauto: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_snowshoeing_destinations: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const extract_staff_members: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const google_jobs: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const homedepot: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const imdb_movie_details: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const instructions: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const ionwave_observe: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const ionwave: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const laroche_form: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const nonsense_action: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const panamcs: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const peeler_complex: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const peeler_simple: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const rakuten_jp: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const sciquest: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const shopify_homepage: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const simple_google_search: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const stock_x: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const ted_talk: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const vanta_h: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const vanta: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const vantechjournal: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const wichita: EvalFunction; |
@@ -1,2 +0,2 @@ | ||
import { EvalFunction } from "../../types/evals"; | ||
import { EvalFunction } from "@/types/evals"; | ||
export declare const wikipedia: EvalFunction; |
@@ -10,2 +10,3 @@ /** | ||
*/ | ||
import { LogLine } from "@/dist"; | ||
/** | ||
@@ -64,1 +65,2 @@ * normalizeString: | ||
}): string; | ||
export declare function logLineToString(logLine: LogLine): string; |
import { LanguageModel } from "ai"; | ||
import { ChatCompletion } from "openai/resources/chat/completions"; | ||
import { CreateChatCompletionOptions, LLMClient } from "../../lib/llm/LLMClient"; | ||
import { CreateChatCompletionOptions, LLMClient } from "@/dist"; | ||
export declare class AISdkClient extends LLMClient { | ||
@@ -5,0 +5,0 @@ type: "aisdk"; |
@@ -0,18 +1,26 @@ | ||
/** | ||
* Welcome to the Stagehand Ollama client! | ||
* | ||
* This is a client for the Ollama API. It is a wrapper around the OpenAI API | ||
* that allows you to create chat completions with Ollama. | ||
* | ||
* To use this client, you need to have an Ollama instance running. You can | ||
* start an Ollama instance by running the following command: | ||
* | ||
* ```bash | ||
* ollama run llama3.2 | ||
* ``` | ||
*/ | ||
import { CreateChatCompletionOptions, LLMClient } from "@/dist"; | ||
import { type ClientOptions } from "openai"; | ||
import type { ChatCompletion } from "openai/resources/chat"; | ||
import type { LLMCache } from "../../lib/cache/LLMCache"; | ||
import { CreateChatCompletionOptions, LLMClient } from "../../lib/llm/LLMClient"; | ||
import type { ChatCompletion } from "openai/resources/chat/completions"; | ||
export declare class OllamaClient extends LLMClient { | ||
type: "ollama"; | ||
private client; | ||
private cache; | ||
private enableCaching; | ||
clientOptions: ClientOptions; | ||
constructor({ enableCaching, cache, modelName, clientOptions, }: { | ||
enableCaching?: boolean; | ||
cache?: LLMCache; | ||
constructor({ modelName, clientOptions, enableCaching, }: { | ||
modelName?: string; | ||
clientOptions?: ClientOptions; | ||
enableCaching?: boolean; | ||
}); | ||
createChatCompletion<T = ChatCompletion>({ options, retries, logger, }: CreateChatCompletionOptions): Promise<T>; | ||
} |
@@ -204,2 +204,3 @@ import { z, ZodType } from 'zod'; | ||
domSettleTimeoutMs?: number; | ||
useAccessibilityTree?: boolean; | ||
} | ||
@@ -211,6 +212,17 @@ interface ObserveResult { | ||
declare const defaultExtractSchema: z.ZodObject<{ | ||
extraction: z.ZodString; | ||
}, "strip", z.ZodTypeAny, { | ||
extraction?: string; | ||
}, { | ||
extraction?: string; | ||
}>; | ||
interface Page extends Omit<Page$1, "on"> { | ||
act: (options: ActOptions) => Promise<ActResult>; | ||
extract: <T extends z.AnyZodObject>(options: ExtractOptions<T>) => Promise<ExtractResult<T>>; | ||
observe: (options?: ObserveOptions) => Promise<ObserveResult[]>; | ||
act(action: string): Promise<ActResult>; | ||
act(options: ActOptions): Promise<ActResult>; | ||
extract(instruction: string): Promise<ExtractResult<typeof defaultExtractSchema>>; | ||
extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>; | ||
observe(): Promise<ObserveResult[]>; | ||
observe(instruction: string): Promise<ObserveResult[]>; | ||
observe(options?: ObserveOptions): Promise<ObserveResult[]>; | ||
on: { | ||
@@ -290,2 +302,2 @@ (event: "popup", listener: (page: Page) => unknown): Page; | ||
export { type ActOptions, type ActResult, AnnotatedScreenshotText, type AnthropicJsonSchemaObject, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ConstructorParams, type CreateChatCompletionOptions, type ExtractOptions, type ExtractResult, type GotoOptions, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, LLMClient, type LLMResponse, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, modelsWithVision }; | ||
export { type ActOptions, type ActResult, AnnotatedScreenshotText, type AnthropicJsonSchemaObject, type AvailableModel, AvailableModelSchema, type Browser, type BrowserContext, type BrowserResult, type ChatCompletionOptions, type ChatMessage, type ChatMessageContent, type ChatMessageImageContent, type ChatMessageTextContent, type ClientOptions, type ConstructorParams, type CreateChatCompletionOptions, type ExtractOptions, type ExtractResult, type GotoOptions, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, LLMClient, type LLMResponse, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, type Page, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, defaultExtractSchema, modelsWithVision }; |
@@ -1,1 +0,1 @@ | ||
export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; | ||
export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n function canElementScroll(elem) {\n if (typeof elem.scrollTo !== \"function\") {\n console.warn(\"canElementScroll: .scrollTo is not a function.\");\n return false;\n }\n try {\n const originalTop = elem.scrollTop;\n elem.scrollTo({\n top: originalTop + 100,\n left: 0,\n behavior: \"instant\"\n });\n if (elem.scrollTop === originalTop) {\n throw new Error(\"scrollTop did not change\");\n }\n elem.scrollTo({\n top: originalTop,\n left: 0,\n behavior: \"instant\"\n });\n return true;\n } catch (error) {\n console.warn(\"canElementScroll error:\", error.message || error);\n return false;\n }\n }\n\n // lib/dom/GlobalPageContainer.ts\n var GlobalPageContainer = class {\n getViewportHeight() {\n return calculateViewportHeight();\n }\n getScrollHeight() {\n return document.documentElement.scrollHeight;\n }\n async scrollTo(offset) {\n await new Promise((resolve) => setTimeout(resolve, 1500));\n window.scrollTo({ top: offset, left: 0, behavior: \"smooth\" });\n await this.waitForScrollEnd();\n }\n async waitForScrollEnd() {\n return new Promise((resolve) => {\n let scrollEndTimer;\n const handleScroll = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScroll);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScroll, { passive: true });\n handleScroll();\n });\n }\n };\n\n // lib/dom/ElementContainer.ts\n var ElementContainer = class {\n constructor(el) {\n this.el = el;\n }\n getViewportHeight() {\n return this.el.clientHeight;\n }\n getScrollHeight() {\n return this.el.scrollHeight;\n }\n async scrollTo(offset) {\n await new Promise((resolve) => setTimeout(resolve, 1500));\n this.el.scrollTo({ top: offset, left: 0, behavior: \"smooth\" });\n await this.waitForScrollEnd();\n }\n async waitForScrollEnd() {\n return new Promise((resolve) => {\n let scrollEndTimer;\n const handleScroll = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n this.el.removeEventListener(\"scroll\", handleScroll);\n resolve();\n }, 100);\n };\n this.el.addEventListener(\"scroll\", handleScroll, { passive: true });\n handleScroll();\n });\n }\n };\n\n // lib/dom/containerFactory.ts\n function createStagehandContainer(obj) {\n if (obj instanceof Window) {\n return new GlobalPageContainer();\n } else {\n return new ElementContainer(obj);\n }\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n function getMainScrollableElement() {\n const docEl = document.documentElement;\n let mainScrollable = docEl;\n const rootScrollDiff = docEl.scrollHeight - docEl.clientHeight;\n let maxScrollDiff = rootScrollDiff;\n const allElements = document.querySelectorAll(\"*\");\n for (const elem of allElements) {\n const style = window.getComputedStyle(elem);\n const overflowY = style.overflowY;\n const isPotentiallyScrollable = overflowY === \"auto\" || overflowY === \"scroll\" || overflowY === \"overlay\";\n if (isPotentiallyScrollable) {\n const candidateScrollDiff = elem.scrollHeight - elem.clientHeight;\n if (candidateScrollDiff > maxScrollDiff) {\n maxScrollDiff = candidateScrollDiff;\n mainScrollable = elem;\n }\n }\n }\n if (mainScrollable !== docEl) {\n if (!canElementScroll(mainScrollable)) {\n console.log(\n \"Stagehand (Browser Process): Unable to scroll candidate. Fallback to <html>.\"\n );\n mainScrollable = docEl;\n }\n }\n return mainScrollable;\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const container = createStagehandContainer(window);\n const { outputString, selectorMap } = await processElements(\n chunk,\n true,\n 0,\n container\n );\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const mainScrollable = getMainScrollableElement();\n const container = mainScrollable === document.documentElement ? createStagehandContainer(window) : createStagehandContainer(mainScrollable);\n const viewportHeight = container.getViewportHeight();\n const documentHeight = container.getScrollHeight();\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index, container);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await container.scrollTo(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0, container) {\n console.time(\"processElements:total\");\n const stagehandContainer = container ?? createStagehandContainer(window);\n const viewportHeight = stagehandContainer.getViewportHeight();\n const totalScrollHeight = stagehandContainer.getScrollHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = totalScrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await stagehandContainer.scrollTo(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n const DOMQueue = [...document.body.childNodes];\n const candidateElements = [];\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\"].includes(element.tagName)) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n window.createStagehandContainer = createStagehandContainer;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; |
@@ -0,1 +1,2 @@ | ||
import { StagehandContainer } from "./StagehandContainer"; | ||
export declare function isElementNode(node: Node): node is Element; | ||
@@ -13,4 +14,3 @@ export declare function isTextNode(node: Node): node is Text; | ||
}>; | ||
export declare function scrollToHeight(height: number): Promise<void>; | ||
export declare function processElements(chunk: number, scrollToChunk?: boolean, indexOffset?: number): Promise<{ | ||
export declare function processElements(chunk: number, scrollToChunk?: boolean, indexOffset?: number, container?: StagehandContainer): Promise<{ | ||
outputString: string; | ||
@@ -17,0 +17,0 @@ selectorMap: Record<number, string[]>; |
export declare function waitForDomSettle(): Promise<void>; | ||
export declare function calculateViewportHeight(): number; | ||
/** | ||
* Tests if the element actually responds to .scrollTo(...) | ||
* and that scrollTop changes as expected. | ||
*/ | ||
export declare function canElementScroll(elem: HTMLElement): boolean; |
@@ -19,3 +19,3 @@ import { LogLine } from "../../types/log"; | ||
private _recordObservation; | ||
observe({ instruction, useVision, fullPage, llmClient, requestId, domSettleTimeoutMs, }: { | ||
observe({ instruction, useVision, fullPage, llmClient, requestId, useAccessibilityTree, }: { | ||
instruction: string; | ||
@@ -25,8 +25,10 @@ useVision: boolean; | ||
llmClient: LLMClient; | ||
requestId?: string; | ||
requestId: string; | ||
domSettleTimeoutMs?: number; | ||
useAccessibilityTree?: boolean; | ||
}): Promise<{ | ||
selector: string; | ||
backendNodeId: number; | ||
description: string; | ||
}[]>; | ||
} |
@@ -23,7 +23,7 @@ import { z } from "zod"; | ||
metadata: { | ||
progress?: string; | ||
completed?: boolean; | ||
progress?: string; | ||
}; | ||
}>; | ||
export declare function observe({ instruction, domElements, llmClient, image, requestId, userProvidedInstructions, logger, }: { | ||
export declare function observe({ instruction, domElements, llmClient, image, requestId, isUsingAccessibilityTree, userProvidedInstructions, logger, }: { | ||
instruction: string; | ||
@@ -36,2 +36,3 @@ domElements: string; | ||
logger: (message: LogLine) => void; | ||
isUsingAccessibilityTree?: boolean; | ||
}): Promise<{ | ||
@@ -38,0 +39,0 @@ elements: { |
@@ -15,3 +15,3 @@ import { LLMTool } from "../types/llm"; | ||
export declare function buildMetadataPrompt(instruction: string, extractionResponse: object, chunksSeen: number, chunksTotal: number): ChatMessage; | ||
export declare function buildObserveSystemPrompt(userProvidedInstructions?: string): ChatMessage; | ||
export declare function buildObserveUserMessage(instruction: string, domElements: string): ChatMessage; | ||
export declare function buildObserveSystemPrompt(userProvidedInstructions?: string, isUsingAccessibilityTree?: boolean): ChatMessage; | ||
export declare function buildObserveUserMessage(instruction: string, domElements: string, isUsingAccessibilityTree?: boolean): ChatMessage; |
@@ -1,6 +0,6 @@ | ||
import type { Page as PlaywrightPage, BrowserContext as PlaywrightContext } from "@playwright/test"; | ||
import type { Page as PlaywrightPage, BrowserContext as PlaywrightContext, CDPSession } from "@playwright/test"; | ||
import { LLMClient } from "./llm/LLMClient"; | ||
import { ActOptions, ActResult, Stagehand } from "./index"; | ||
import { StagehandContext } from "./StagehandContext"; | ||
import { Page } from "../types/page"; | ||
import { Page, defaultExtractSchema } from "../types/page"; | ||
import { ExtractOptions, ExtractResult, ObserveOptions, ObserveResult } from "../types/stagehand"; | ||
@@ -16,2 +16,3 @@ import { z } from "zod"; | ||
private llmClient; | ||
private cdpClient; | ||
constructor(page: PlaywrightPage, stagehand: Stagehand, context: StagehandContext, llmClient: LLMClient, userProvidedInstructions?: string); | ||
@@ -24,5 +25,9 @@ init(): Promise<StagehandPage>; | ||
cleanupDomDebug(): Promise<void>; | ||
act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>; | ||
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, useTextExtract, }: ExtractOptions<T>): Promise<ExtractResult<T>>; | ||
observe(options?: ObserveOptions): Promise<ObserveResult[]>; | ||
act(actionOrOptions: string | ActOptions): Promise<ActResult>; | ||
extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(instructionOrOptions: string | ExtractOptions<T>): Promise<ExtractResult<T>>; | ||
observe(instructionOrOptions?: string | ObserveOptions): Promise<ObserveResult[]>; | ||
getCDPClient(): Promise<CDPSession>; | ||
sendCDP<T>(command: string, args?: Record<string, unknown>): Promise<T>; | ||
enableCDP(domain: string): Promise<void>; | ||
disableCDP(domain: string): Promise<void>; | ||
} |
@@ -1,6 +0,6 @@ | ||
import { EvalLogger } from "../evals/logger"; | ||
import { AvailableModel } from "../types/model"; | ||
import { LogLine } from "../types/log"; | ||
import { z } from "zod"; | ||
import { EvalCase } from "braintrust"; | ||
import type { EvalLogger } from "../evals/logger"; | ||
import type { AvailableModel } from "../types/model"; | ||
import type { LogLine } from "../types/log"; | ||
import type { EvalCase } from "braintrust"; | ||
export type EvalFunction = (args: { | ||
@@ -10,2 +10,3 @@ modelName: AvailableModel; | ||
useTextExtract: boolean; | ||
useAccessibilityTree: boolean; | ||
}) => Promise<{ | ||
@@ -12,0 +13,0 @@ _success: boolean; |
import type { Browser as PlaywrightBrowser, BrowserContext as PlaywrightContext, Page as PlaywrightPage } from "@playwright/test"; | ||
import type { z } from "zod"; | ||
import { z } from "zod"; | ||
import type { ActOptions, ActResult, ExtractOptions, ExtractResult, ObserveOptions, ObserveResult } from "./stagehand"; | ||
export declare const defaultExtractSchema: z.ZodObject<{ | ||
extraction: z.ZodString; | ||
}, "strip", z.ZodTypeAny, { | ||
extraction?: string; | ||
}, { | ||
extraction?: string; | ||
}>; | ||
export interface Page extends Omit<PlaywrightPage, "on"> { | ||
act: (options: ActOptions) => Promise<ActResult>; | ||
extract: <T extends z.AnyZodObject>(options: ExtractOptions<T>) => Promise<ExtractResult<T>>; | ||
observe: (options?: ObserveOptions) => Promise<ObserveResult[]>; | ||
act(action: string): Promise<ActResult>; | ||
act(options: ActOptions): Promise<ActResult>; | ||
extract(instruction: string): Promise<ExtractResult<typeof defaultExtractSchema>>; | ||
extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>; | ||
observe(): Promise<ObserveResult[]>; | ||
observe(instruction: string): Promise<ObserveResult[]>; | ||
observe(options?: ObserveOptions): Promise<ObserveResult[]>; | ||
on: { | ||
@@ -9,0 +20,0 @@ (event: "popup", listener: (page: Page) => unknown): Page; |
@@ -80,2 +80,3 @@ import Browserbase from "@browserbasehq/sdk"; | ||
domSettleTimeoutMs?: number; | ||
useAccessibilityTree?: boolean; | ||
} | ||
@@ -82,0 +83,0 @@ export interface ObserveResult { |
@@ -177,3 +177,99 @@ (() => { | ||
} | ||
function canElementScroll(elem) { | ||
if (typeof elem.scrollTo !== "function") { | ||
console.warn("canElementScroll: .scrollTo is not a function."); | ||
return false; | ||
} | ||
try { | ||
const originalTop = elem.scrollTop; | ||
elem.scrollTo({ | ||
top: originalTop + 100, | ||
left: 0, | ||
behavior: "instant" | ||
}); | ||
if (elem.scrollTop === originalTop) { | ||
throw new Error("scrollTop did not change"); | ||
} | ||
elem.scrollTo({ | ||
top: originalTop, | ||
left: 0, | ||
behavior: "instant" | ||
}); | ||
return true; | ||
} catch (error) { | ||
console.warn("canElementScroll error:", error.message || error); | ||
return false; | ||
} | ||
} | ||
// lib/dom/GlobalPageContainer.ts | ||
var GlobalPageContainer = class { | ||
getViewportHeight() { | ||
return calculateViewportHeight(); | ||
} | ||
getScrollHeight() { | ||
return document.documentElement.scrollHeight; | ||
} | ||
async scrollTo(offset) { | ||
await new Promise((resolve) => setTimeout(resolve, 1500)); | ||
window.scrollTo({ top: offset, left: 0, behavior: "smooth" }); | ||
await this.waitForScrollEnd(); | ||
} | ||
async waitForScrollEnd() { | ||
return new Promise((resolve) => { | ||
let scrollEndTimer; | ||
const handleScroll = () => { | ||
clearTimeout(scrollEndTimer); | ||
scrollEndTimer = window.setTimeout(() => { | ||
window.removeEventListener("scroll", handleScroll); | ||
resolve(); | ||
}, 100); | ||
}; | ||
window.addEventListener("scroll", handleScroll, { passive: true }); | ||
handleScroll(); | ||
}); | ||
} | ||
}; | ||
// lib/dom/ElementContainer.ts | ||
var ElementContainer = class { | ||
constructor(el) { | ||
this.el = el; | ||
} | ||
getViewportHeight() { | ||
return this.el.clientHeight; | ||
} | ||
getScrollHeight() { | ||
return this.el.scrollHeight; | ||
} | ||
async scrollTo(offset) { | ||
await new Promise((resolve) => setTimeout(resolve, 1500)); | ||
this.el.scrollTo({ top: offset, left: 0, behavior: "smooth" }); | ||
await this.waitForScrollEnd(); | ||
} | ||
async waitForScrollEnd() { | ||
return new Promise((resolve) => { | ||
let scrollEndTimer; | ||
const handleScroll = () => { | ||
clearTimeout(scrollEndTimer); | ||
scrollEndTimer = window.setTimeout(() => { | ||
this.el.removeEventListener("scroll", handleScroll); | ||
resolve(); | ||
}, 100); | ||
}; | ||
this.el.addEventListener("scroll", handleScroll, { passive: true }); | ||
handleScroll(); | ||
}); | ||
} | ||
}; | ||
// lib/dom/containerFactory.ts | ||
function createStagehandContainer(obj) { | ||
if (obj instanceof Window) { | ||
return new GlobalPageContainer(); | ||
} else { | ||
return new ElementContainer(obj); | ||
} | ||
} | ||
// lib/dom/process.ts | ||
@@ -186,5 +282,39 @@ function isElementNode(node) { | ||
} | ||
function getMainScrollableElement() { | ||
const docEl = document.documentElement; | ||
let mainScrollable = docEl; | ||
const rootScrollDiff = docEl.scrollHeight - docEl.clientHeight; | ||
let maxScrollDiff = rootScrollDiff; | ||
const allElements = document.querySelectorAll("*"); | ||
for (const elem of allElements) { | ||
const style = window.getComputedStyle(elem); | ||
const overflowY = style.overflowY; | ||
const isPotentiallyScrollable = overflowY === "auto" || overflowY === "scroll" || overflowY === "overlay"; | ||
if (isPotentiallyScrollable) { | ||
const candidateScrollDiff = elem.scrollHeight - elem.clientHeight; | ||
if (candidateScrollDiff > maxScrollDiff) { | ||
maxScrollDiff = candidateScrollDiff; | ||
mainScrollable = elem; | ||
} | ||
} | ||
} | ||
if (mainScrollable !== docEl) { | ||
if (!canElementScroll(mainScrollable)) { | ||
console.log( | ||
"Stagehand (Browser Process): Unable to scroll candidate. Fallback to <html>." | ||
); | ||
mainScrollable = docEl; | ||
} | ||
} | ||
return mainScrollable; | ||
} | ||
async function processDom(chunksSeen) { | ||
const { chunk, chunksArray } = await pickChunk(chunksSeen); | ||
const { outputString, selectorMap } = await processElements(chunk); | ||
const container = createStagehandContainer(window); | ||
const { outputString, selectorMap } = await processElements( | ||
chunk, | ||
true, | ||
0, | ||
container | ||
); | ||
console.log( | ||
@@ -203,4 +333,6 @@ `Stagehand (Browser Process): Extracted dom elements: | ||
console.log("Stagehand (Browser Process): Processing all of DOM"); | ||
const viewportHeight = calculateViewportHeight(); | ||
const documentHeight = document.documentElement.scrollHeight; | ||
const mainScrollable = getMainScrollableElement(); | ||
const container = mainScrollable === document.documentElement ? createStagehandContainer(window) : createStagehandContainer(mainScrollable); | ||
const viewportHeight = container.getViewportHeight(); | ||
const documentHeight = container.getScrollHeight(); | ||
const totalChunks = Math.ceil(documentHeight / viewportHeight); | ||
@@ -210,7 +342,7 @@ let index = 0; | ||
for (let chunk = 0; chunk < totalChunks; chunk++) { | ||
const result = await processElements(chunk, true, index); | ||
const result = await processElements(chunk, true, index, container); | ||
results.push(result); | ||
index += Object.keys(result.selectorMap).length; | ||
} | ||
await scrollToHeight(0); | ||
await container.scrollTo(0); | ||
const allOutputString = results.map((result) => result.outputString).join(""); | ||
@@ -229,33 +361,20 @@ const allSelectorMap = results.reduce( | ||
} | ||
async function scrollToHeight(height) { | ||
window.scrollTo({ top: height, left: 0, behavior: "smooth" }); | ||
await new Promise((resolve) => { | ||
let scrollEndTimer; | ||
const handleScrollEnd = () => { | ||
clearTimeout(scrollEndTimer); | ||
scrollEndTimer = window.setTimeout(() => { | ||
window.removeEventListener("scroll", handleScrollEnd); | ||
resolve(); | ||
}, 100); | ||
}; | ||
window.addEventListener("scroll", handleScrollEnd, { passive: true }); | ||
handleScrollEnd(); | ||
}); | ||
} | ||
var xpathCache = /* @__PURE__ */ new Map(); | ||
async function processElements(chunk, scrollToChunk = true, indexOffset = 0) { | ||
async function processElements(chunk, scrollToChunk = true, indexOffset = 0, container) { | ||
console.time("processElements:total"); | ||
const viewportHeight = calculateViewportHeight(); | ||
const stagehandContainer = container ?? createStagehandContainer(window); | ||
const viewportHeight = stagehandContainer.getViewportHeight(); | ||
const totalScrollHeight = stagehandContainer.getScrollHeight(); | ||
const chunkHeight = viewportHeight * chunk; | ||
const maxScrollTop = document.documentElement.scrollHeight - viewportHeight; | ||
const maxScrollTop = totalScrollHeight - viewportHeight; | ||
const offsetTop = Math.min(chunkHeight, maxScrollTop); | ||
if (scrollToChunk) { | ||
console.time("processElements:scroll"); | ||
await scrollToHeight(offsetTop); | ||
await stagehandContainer.scrollTo(offsetTop); | ||
console.timeEnd("processElements:scroll"); | ||
} | ||
const candidateElements = []; | ||
const DOMQueue = [...document.body.childNodes]; | ||
console.log("Stagehand (Browser Process): Generating candidate elements"); | ||
console.time("processElements:findCandidates"); | ||
const DOMQueue = [...document.body.childNodes]; | ||
const candidateElements = []; | ||
while (DOMQueue.length > 0) { | ||
@@ -404,5 +523,3 @@ const element = DOMQueue.pop(); | ||
} | ||
if (["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes( | ||
element.tagName | ||
)) { | ||
if (["SCRIPT", "STYLE", "IFRAME", "INPUT"].includes(element.tagName)) { | ||
return; | ||
@@ -515,3 +632,2 @@ } | ||
window.processElements = processElements; | ||
window.scrollToHeight = scrollToHeight; | ||
window.storeDOM = storeDOM; | ||
@@ -521,2 +637,3 @@ window.restoreDOM = restoreDOM; | ||
window.getElementBoundingBoxes = getElementBoundingBoxes; | ||
window.createStagehandContainer = createStagehandContainer; | ||
var leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"]; | ||
@@ -587,5 +704,2 @@ var interactiveElementTypes = [ | ||
} | ||
if (!isTopElement(parent, rect)) { | ||
return false; | ||
} | ||
const visible = parent.checkVisibility({ | ||
@@ -592,0 +706,0 @@ checkOpacity: true, |
@@ -1,1 +0,1 @@ | ||
export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\", \"TEXTAREA\"].includes(\n element.tagName\n )) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; | ||
export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n function canElementScroll(elem) {\n if (typeof elem.scrollTo !== \"function\") {\n console.warn(\"canElementScroll: .scrollTo is not a function.\");\n return false;\n }\n try {\n const originalTop = elem.scrollTop;\n elem.scrollTo({\n top: originalTop + 100,\n left: 0,\n behavior: \"instant\"\n });\n if (elem.scrollTop === originalTop) {\n throw new Error(\"scrollTop did not change\");\n }\n elem.scrollTo({\n top: originalTop,\n left: 0,\n behavior: \"instant\"\n });\n return true;\n } catch (error) {\n console.warn(\"canElementScroll error:\", error.message || error);\n return false;\n }\n }\n\n // lib/dom/GlobalPageContainer.ts\n var GlobalPageContainer = class {\n getViewportHeight() {\n return calculateViewportHeight();\n }\n getScrollHeight() {\n return document.documentElement.scrollHeight;\n }\n async scrollTo(offset) {\n await new Promise((resolve) => setTimeout(resolve, 1500));\n window.scrollTo({ top: offset, left: 0, behavior: \"smooth\" });\n await this.waitForScrollEnd();\n }\n async waitForScrollEnd() {\n return new Promise((resolve) => {\n let scrollEndTimer;\n const handleScroll = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScroll);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScroll, { passive: true });\n handleScroll();\n });\n }\n };\n\n // lib/dom/ElementContainer.ts\n var ElementContainer = class {\n constructor(el) {\n this.el = el;\n }\n getViewportHeight() {\n return this.el.clientHeight;\n }\n getScrollHeight() {\n return this.el.scrollHeight;\n }\n async scrollTo(offset) {\n await new Promise((resolve) => setTimeout(resolve, 1500));\n this.el.scrollTo({ top: offset, left: 0, behavior: \"smooth\" });\n await this.waitForScrollEnd();\n }\n async waitForScrollEnd() {\n return new Promise((resolve) => {\n let scrollEndTimer;\n const handleScroll = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n this.el.removeEventListener(\"scroll\", handleScroll);\n resolve();\n }, 100);\n };\n this.el.addEventListener(\"scroll\", handleScroll, { passive: true });\n handleScroll();\n });\n }\n };\n\n // lib/dom/containerFactory.ts\n function createStagehandContainer(obj) {\n if (obj instanceof Window) {\n return new GlobalPageContainer();\n } else {\n return new ElementContainer(obj);\n }\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n function getMainScrollableElement() {\n const docEl = document.documentElement;\n let mainScrollable = docEl;\n const rootScrollDiff = docEl.scrollHeight - docEl.clientHeight;\n let maxScrollDiff = rootScrollDiff;\n const allElements = document.querySelectorAll(\"*\");\n for (const elem of allElements) {\n const style = window.getComputedStyle(elem);\n const overflowY = style.overflowY;\n const isPotentiallyScrollable = overflowY === \"auto\" || overflowY === \"scroll\" || overflowY === \"overlay\";\n if (isPotentiallyScrollable) {\n const candidateScrollDiff = elem.scrollHeight - elem.clientHeight;\n if (candidateScrollDiff > maxScrollDiff) {\n maxScrollDiff = candidateScrollDiff;\n mainScrollable = elem;\n }\n }\n }\n if (mainScrollable !== docEl) {\n if (!canElementScroll(mainScrollable)) {\n console.log(\n \"Stagehand (Browser Process): Unable to scroll candidate. Fallback to <html>.\"\n );\n mainScrollable = docEl;\n }\n }\n return mainScrollable;\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const container = createStagehandContainer(window);\n const { outputString, selectorMap } = await processElements(\n chunk,\n true,\n 0,\n container\n );\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const mainScrollable = getMainScrollableElement();\n const container = mainScrollable === document.documentElement ? createStagehandContainer(window) : createStagehandContainer(mainScrollable);\n const viewportHeight = container.getViewportHeight();\n const documentHeight = container.getScrollHeight();\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index, container);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await container.scrollTo(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0, container) {\n console.time(\"processElements:total\");\n const stagehandContainer = container ?? createStagehandContainer(window);\n const viewportHeight = stagehandContainer.getViewportHeight();\n const totalScrollHeight = stagehandContainer.getScrollHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = totalScrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await stagehandContainer.scrollTo(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n const DOMQueue = [...document.body.childNodes];\n const candidateElements = [];\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n function storeDOM() {\n const originalDOM = document.body.cloneNode(true);\n console.log(\"DOM state stored.\");\n return originalDOM.outerHTML;\n }\n function restoreDOM(storedDOM) {\n console.log(\"Restoring DOM\");\n if (storedDOM) {\n document.body.innerHTML = storedDOM;\n } else {\n console.error(\"No DOM state was provided.\");\n }\n }\n function createTextBoundingBoxes() {\n const style = document.createElement(\"style\");\n document.head.appendChild(style);\n if (style.sheet) {\n style.sheet.insertRule(\n `\n .stagehand-highlighted-word, .stagehand-space {\n border: 0px solid orange;\n display: inline-block !important;\n visibility: visible;\n }\n `,\n 0\n );\n style.sheet.insertRule(\n `\n code .stagehand-highlighted-word, code .stagehand-space,\n pre .stagehand-highlighted-word, pre .stagehand-space {\n white-space: pre-wrap;\n display: inline !important;\n }\n `,\n 1\n );\n }\n function applyHighlighting(root) {\n root.querySelectorAll(\"body *\").forEach((element) => {\n if (element.closest(\".stagehand-nav, .stagehand-marker\")) {\n return;\n }\n if ([\"SCRIPT\", \"STYLE\", \"IFRAME\", \"INPUT\"].includes(element.tagName)) {\n return;\n }\n const childNodes = Array.from(element.childNodes);\n childNodes.forEach((node) => {\n if (node.nodeType === 3 && node.textContent?.trim().length > 0) {\n const textContent = node.textContent.replace(/\\u00A0/g, \" \");\n const tokens = textContent.split(/(\\s+)/g);\n const fragment = document.createDocumentFragment();\n const parentIsCode = element.tagName === \"CODE\";\n tokens.forEach((token) => {\n const span = document.createElement(\"span\");\n span.textContent = token;\n if (parentIsCode) {\n span.style.whiteSpace = \"pre-wrap\";\n span.style.display = \"inline\";\n }\n span.className = token.trim().length === 0 ? \"stagehand-space\" : \"stagehand-highlighted-word\";\n fragment.appendChild(span);\n });\n if (fragment.childNodes.length > 0 && node.parentNode) {\n element.insertBefore(fragment, node);\n node.remove();\n }\n }\n });\n });\n }\n applyHighlighting(document);\n document.querySelectorAll(\"iframe\").forEach((iframe) => {\n try {\n iframe.contentWindow?.postMessage({ action: \"highlight\" }, \"*\");\n } catch (error) {\n console.error(\"Error accessing iframe content: \", error);\n }\n });\n }\n function getElementBoundingBoxes(xpath) {\n const element = document.evaluate(\n xpath,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (!element) return [];\n const isValidText = (text) => text && text.trim().length > 0;\n let dropDownElem = element.querySelector(\"option[selected]\");\n if (!dropDownElem) {\n dropDownElem = element.querySelector(\"option\");\n }\n if (dropDownElem) {\n const elemText = dropDownElem.textContent || \"\";\n if (isValidText(elemText)) {\n const parentRect = element.getBoundingClientRect();\n return [\n {\n text: elemText.trim(),\n top: parentRect.top + window.scrollY,\n left: parentRect.left + window.scrollX,\n width: parentRect.width,\n height: parentRect.height\n }\n ];\n } else {\n return [];\n }\n }\n let placeholderText = \"\";\n if ((element.tagName.toLowerCase() === \"input\" || element.tagName.toLowerCase() === \"textarea\") && element.placeholder) {\n placeholderText = element.placeholder;\n } else if (element.tagName.toLowerCase() === \"a\") {\n placeholderText = \"\";\n } else if (element.tagName.toLowerCase() === \"img\") {\n placeholderText = element.alt || \"\";\n }\n const words = element.querySelectorAll(\n \".stagehand-highlighted-word\"\n );\n const boundingBoxes = Array.from(words).map((word) => {\n const rect = word.getBoundingClientRect();\n return {\n text: word.innerText || \"\",\n top: rect.top + window.scrollY,\n left: rect.left + window.scrollX,\n width: rect.width,\n height: rect.height * 0.75\n };\n }).filter(\n (box) => box.width > 0 && box.height > 0 && box.top >= 0 && box.left >= 0 && isValidText(box.text)\n );\n if (boundingBoxes.length === 0) {\n const elementRect = element.getBoundingClientRect();\n return [\n {\n text: placeholderText,\n top: elementRect.top + window.scrollY,\n left: elementRect.left + window.scrollX,\n width: elementRect.width,\n height: elementRect.height * 0.75\n }\n ];\n }\n return boundingBoxes;\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.storeDOM = storeDOM;\n window.restoreDOM = restoreDOM;\n window.createTextBoundingBoxes = createTextBoundingBoxes;\n window.getElementBoundingBoxes = getElementBoundingBoxes;\n window.createStagehandContainer = createStagehandContainer;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; |
@@ -0,1 +1,3 @@ | ||
import { StagehandContainer } from "./StagehandContainer"; | ||
export {}; | ||
@@ -22,3 +24,3 @@ declare global { | ||
cleanupDebug: () => void; | ||
scrollToHeight: (height: number) => Promise<void>; | ||
createStagehandContainer: (obj: Window | HTMLElement) => StagehandContainer; | ||
waitForDomSettle: () => Promise<void>; | ||
@@ -25,0 +27,0 @@ __playwright?: unknown; |
import { generateXPathsForElement as generateXPaths } from "./xpathUtils"; | ||
import { calculateViewportHeight } from "./utils"; | ||
import { calculateViewportHeight, canElementScroll } from "./utils"; | ||
import { createStagehandContainer } from "./containerFactory"; | ||
import { StagehandContainer } from "./StagehandContainer"; | ||
@@ -12,6 +14,56 @@ export function isElementNode(node: Node): node is Element { | ||
function getMainScrollableElement(): HTMLElement { | ||
const docEl = document.documentElement; | ||
let mainScrollable: HTMLElement = docEl; | ||
// 1) Compute how “scrollable” the root <html> is | ||
// i.e. total scrollHeight - visible clientHeight | ||
const rootScrollDiff = docEl.scrollHeight - docEl.clientHeight; | ||
// Keep track of the “largest” scroll diff found so far. | ||
let maxScrollDiff = rootScrollDiff; | ||
// 2) Scan all elements to find if any <div> has a larger scrollable diff | ||
const allElements = document.querySelectorAll<HTMLElement>("*"); | ||
for (const elem of allElements) { | ||
const style = window.getComputedStyle(elem); | ||
const overflowY = style.overflowY; | ||
const isPotentiallyScrollable = | ||
overflowY === "auto" || overflowY === "scroll" || overflowY === "overlay"; | ||
if (isPotentiallyScrollable) { | ||
const candidateScrollDiff = elem.scrollHeight - elem.clientHeight; | ||
// Only pick this <div> if it has strictly more vertical “scrollable distance” than our current best | ||
if (candidateScrollDiff > maxScrollDiff) { | ||
maxScrollDiff = candidateScrollDiff; | ||
mainScrollable = elem; | ||
} | ||
} | ||
} | ||
// 3) Verify the chosen element truly scrolls | ||
if (mainScrollable !== docEl) { | ||
if (!canElementScroll(mainScrollable)) { | ||
console.log( | ||
"Stagehand (Browser Process): Unable to scroll candidate. Fallback to <html>.", | ||
); | ||
mainScrollable = docEl; | ||
} | ||
} | ||
return mainScrollable; | ||
} | ||
export async function processDom(chunksSeen: Array<number>) { | ||
const { chunk, chunksArray } = await pickChunk(chunksSeen); | ||
const { outputString, selectorMap } = await processElements(chunk); | ||
const container = createStagehandContainer(window); | ||
const { outputString, selectorMap } = await processElements( | ||
chunk, | ||
true, | ||
0, | ||
container, | ||
); | ||
console.log( | ||
@@ -32,4 +84,11 @@ `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`, | ||
const viewportHeight = calculateViewportHeight(); | ||
const documentHeight = document.documentElement.scrollHeight; | ||
const mainScrollable = getMainScrollableElement(); | ||
const container = | ||
mainScrollable === document.documentElement | ||
? createStagehandContainer(window) | ||
: createStagehandContainer(mainScrollable); | ||
const viewportHeight = container.getViewportHeight(); | ||
const documentHeight = container.getScrollHeight(); | ||
const totalChunks = Math.ceil(documentHeight / viewportHeight); | ||
@@ -40,3 +99,4 @@ | ||
for (let chunk = 0; chunk < totalChunks; chunk++) { | ||
const result = await processElements(chunk, true, index); | ||
// Pass the container to processElements | ||
const result = await processElements(chunk, true, index, container); | ||
results.push(result); | ||
@@ -46,3 +106,3 @@ index += Object.keys(result.selectorMap).length; | ||
await scrollToHeight(0); | ||
await container.scrollTo(0); | ||
@@ -65,21 +125,2 @@ const allOutputString = results.map((result) => result.outputString).join(""); | ||
export async function scrollToHeight(height: number) { | ||
window.scrollTo({ top: height, left: 0, behavior: "smooth" }); | ||
// Wait for scrolling to finish using the scrollend event | ||
await new Promise<void>((resolve) => { | ||
let scrollEndTimer: number; | ||
const handleScrollEnd = () => { | ||
clearTimeout(scrollEndTimer); | ||
scrollEndTimer = window.setTimeout(() => { | ||
window.removeEventListener("scroll", handleScrollEnd); | ||
resolve(); | ||
}, 100); | ||
}; | ||
window.addEventListener("scroll", handleScrollEnd, { passive: true }); | ||
handleScrollEnd(); | ||
}); | ||
} | ||
const xpathCache: Map<Node, string[]> = new Map(); | ||
@@ -91,2 +132,3 @@ | ||
indexOffset: number = 0, | ||
container?: StagehandContainer, | ||
): Promise<{ | ||
@@ -97,9 +139,11 @@ outputString: string; | ||
console.time("processElements:total"); | ||
const viewportHeight = calculateViewportHeight(); | ||
const chunkHeight = viewportHeight * chunk; | ||
// Calculate the maximum scrollable offset | ||
const maxScrollTop = document.documentElement.scrollHeight - viewportHeight; | ||
// If no container given, default to the entire page | ||
const stagehandContainer = container ?? createStagehandContainer(window); | ||
// Adjust the offsetTop to not exceed the maximum scrollable offset | ||
const viewportHeight = stagehandContainer.getViewportHeight(); | ||
const totalScrollHeight = stagehandContainer.getScrollHeight(); | ||
const chunkHeight = viewportHeight * chunk; | ||
const maxScrollTop = totalScrollHeight - viewportHeight; | ||
const offsetTop = Math.min(chunkHeight, maxScrollTop); | ||
@@ -109,12 +153,12 @@ | ||
console.time("processElements:scroll"); | ||
await scrollToHeight(offsetTop); | ||
await stagehandContainer.scrollTo(offsetTop); | ||
console.timeEnd("processElements:scroll"); | ||
} | ||
const candidateElements: Array<ChildNode> = []; | ||
const DOMQueue: Array<ChildNode> = [...document.body.childNodes]; | ||
console.log("Stagehand (Browser Process): Generating candidate elements"); | ||
console.time("processElements:findCandidates"); | ||
// NOTE: we still gather candidate elems from the entire body | ||
const DOMQueue: ChildNode[] = [...document.body.childNodes]; | ||
const candidateElements: ChildNode[] = []; | ||
while (DOMQueue.length > 0) { | ||
@@ -298,7 +342,3 @@ const element = DOMQueue.pop(); | ||
} | ||
if ( | ||
["SCRIPT", "STYLE", "IFRAME", "INPUT", "TEXTAREA"].includes( | ||
element.tagName, | ||
) | ||
) { | ||
if (["SCRIPT", "STYLE", "IFRAME", "INPUT"].includes(element.tagName)) { | ||
return; | ||
@@ -448,3 +488,2 @@ } | ||
window.processElements = processElements; | ||
window.scrollToHeight = scrollToHeight; | ||
window.storeDOM = storeDOM; | ||
@@ -454,2 +493,3 @@ window.restoreDOM = restoreDOM; | ||
window.getElementBoundingBoxes = getElementBoundingBoxes; | ||
window.createStagehandContainer = createStagehandContainer; | ||
@@ -546,5 +586,2 @@ const leafElementDenyList = ["SVG", "IFRAME", "SCRIPT", "STYLE", "LINK"]; | ||
} | ||
if (!isTopElement(parent, rect)) { | ||
return false; | ||
} | ||
@@ -551,0 +588,0 @@ const visible = parent.checkVisibility({ |
@@ -22,1 +22,41 @@ export async function waitForDomSettle() { | ||
} | ||
/** | ||
* Tests if the element actually responds to .scrollTo(...) | ||
* and that scrollTop changes as expected. | ||
*/ | ||
export function canElementScroll(elem: HTMLElement): boolean { | ||
// Quick check if scrollTo is a function | ||
if (typeof elem.scrollTo !== "function") { | ||
console.warn("canElementScroll: .scrollTo is not a function."); | ||
return false; | ||
} | ||
try { | ||
const originalTop = elem.scrollTop; | ||
// try to scroll | ||
elem.scrollTo({ | ||
top: originalTop + 100, | ||
left: 0, | ||
behavior: "instant", | ||
}); | ||
// If scrollTop never changed, consider it unscrollable | ||
if (elem.scrollTop === originalTop) { | ||
throw new Error("scrollTop did not change"); | ||
} | ||
// Scroll back to original place | ||
elem.scrollTo({ | ||
top: originalTop, | ||
left: 0, | ||
behavior: "instant", | ||
}); | ||
return true; | ||
} catch (error) { | ||
console.warn("canElementScroll error:", (error as Error).message || error); | ||
return false; | ||
} | ||
} |
@@ -1201,5 +1201,9 @@ import { Locator, Page } from "@playwright/test"; | ||
}); | ||
await this.stagehandPage.page.evaluate(() => | ||
window.scrollToHeight(0), | ||
); | ||
await this.stagehandPage.page.evaluate(() => { | ||
const container = window.createStagehandContainer( | ||
document.documentElement, | ||
); | ||
return container.scrollTo(0); | ||
}); | ||
return await this.act({ | ||
@@ -1206,0 +1210,0 @@ action, |
@@ -250,3 +250,5 @@ import { z } from "zod"; | ||
}; | ||
allAnnotations.push(annotation); | ||
if (annotation.text.length > 0) { | ||
allAnnotations.push(annotation); | ||
} | ||
} | ||
@@ -253,0 +255,0 @@ } |
@@ -8,2 +8,6 @@ import { LogLine } from "../../types/log"; | ||
import { ScreenshotService } from "../vision"; | ||
import { | ||
getAccessibilityTree, | ||
getXPathByResolvedObjectId, | ||
} from "../a11y/utils"; | ||
@@ -57,3 +61,3 @@ export class StagehandObserveHandler { | ||
requestId, | ||
domSettleTimeoutMs, | ||
useAccessibilityTree = false, | ||
}: { | ||
@@ -64,5 +68,6 @@ instruction: string; | ||
llmClient: LLMClient; | ||
requestId?: string; | ||
requestId: string; | ||
domSettleTimeoutMs?: number; | ||
}): Promise<{ selector: string; description: string }[]> { | ||
useAccessibilityTree?: boolean; | ||
}) { | ||
if (!instruction) { | ||
@@ -83,14 +88,64 @@ instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; | ||
await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); | ||
let outputString: string; | ||
let selectorMap: Record<string, string[]> = {}; | ||
const backendNodeIdMap: Record<string, number> = {}; | ||
await this.stagehandPage.startDomDebug(); | ||
const evalResult = await this.stagehand.page.evaluate( | ||
(fullPage: boolean) => | ||
fullPage ? window.processAllOfDom() : window.processDom([]), | ||
fullPage, | ||
); | ||
await this.stagehandPage.enableCDP("DOM"); | ||
const { selectorMap } = evalResult; | ||
// has to be like this atm because of the re-assignment | ||
let { outputString } = evalResult; | ||
const evalResult = await this.stagehand.page.evaluate(async () => { | ||
const result = await window.processAllOfDom(); | ||
return result; | ||
}); | ||
// For each element in the selector map, get its backendNodeId | ||
for (const [index, xpaths] of Object.entries(evalResult.selectorMap)) { | ||
try { | ||
// Use the first xpath to find the element | ||
const xpath = xpaths[0]; | ||
const { result } = await this.stagehandPage.sendCDP<{ | ||
result: { objectId: string }; | ||
}>("Runtime.evaluate", { | ||
expression: `document.evaluate('${xpath}', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue`, | ||
returnByValue: false, | ||
}); | ||
if (result.objectId) { | ||
// Get the node details using CDP | ||
const { node } = await this.stagehandPage.sendCDP<{ | ||
node: { backendNodeId: number }; | ||
}>("DOM.describeNode", { | ||
objectId: result.objectId, | ||
depth: -1, | ||
pierce: true, | ||
}); | ||
if (node.backendNodeId) { | ||
backendNodeIdMap[index] = node.backendNodeId; | ||
} | ||
} | ||
} catch (error) { | ||
console.warn( | ||
`Failed to get backendNodeId for element ${index}:`, | ||
error, | ||
); | ||
continue; | ||
} | ||
} | ||
await this.stagehandPage.disableCDP("DOM"); | ||
({ outputString, selectorMap } = evalResult); | ||
if (useAccessibilityTree) { | ||
const tree = await getAccessibilityTree(this.stagehandPage, this.logger); | ||
this.logger({ | ||
category: "observation", | ||
message: "Getting accessibility tree data", | ||
level: 1, | ||
}); | ||
outputString = tree.simplified; | ||
} | ||
let annotatedScreenshot: Buffer | undefined; | ||
@@ -132,13 +187,42 @@ if (useVision === true) { | ||
logger: this.logger, | ||
isUsingAccessibilityTree: useAccessibilityTree, | ||
}); | ||
const elementsWithSelectors = observationResponse.elements.map( | ||
(element) => { | ||
const elementsWithSelectors = await Promise.all( | ||
observationResponse.elements.map(async (element) => { | ||
const { elementId, ...rest } = element; | ||
if (useAccessibilityTree) { | ||
const index = Object.entries(backendNodeIdMap).find( | ||
([, value]) => value === elementId, | ||
)?.[0]; | ||
if (!index || !selectorMap[index]?.[0]) { | ||
// Generate xpath for the given element if not found in selectorMap | ||
const { object } = await this.stagehandPage.sendCDP<{ | ||
object: { objectId: string }; | ||
}>("DOM.resolveNode", { | ||
backendNodeId: elementId, | ||
}); | ||
const xpath = await getXPathByResolvedObjectId( | ||
await this.stagehandPage.getCDPClient(), | ||
object.objectId, | ||
); | ||
return { | ||
...rest, | ||
selector: xpath, | ||
backendNodeId: elementId, | ||
}; | ||
} | ||
return { | ||
...rest, | ||
selector: `xpath=${selectorMap[index][0]}`, | ||
backendNodeId: elementId, | ||
}; | ||
} | ||
return { | ||
...rest, | ||
selector: `xpath=${selectorMap[elementId][0]}`, | ||
backendNodeId: backendNodeIdMap[elementId], | ||
}; | ||
}, | ||
}), | ||
); | ||
@@ -145,0 +229,0 @@ |
@@ -287,2 +287,3 @@ import { z } from "zod"; | ||
requestId, | ||
isUsingAccessibilityTree, | ||
userProvidedInstructions, | ||
@@ -298,5 +299,4 @@ logger, | ||
logger: (message: LogLine) => void; | ||
}): Promise<{ | ||
elements: { elementId: number; description: string }[]; | ||
}> { | ||
isUsingAccessibilityTree?: boolean; | ||
}) { | ||
const observeSchema = z.object({ | ||
@@ -310,7 +310,13 @@ elements: z | ||
.describe( | ||
"a description of the element and what it is relevant for", | ||
isUsingAccessibilityTree | ||
? "a description of the accessible element and its purpose" | ||
: "a description of the element and what it is relevant for", | ||
), | ||
}), | ||
) | ||
.describe("an array of elements that match the instruction"), | ||
.describe( | ||
isUsingAccessibilityTree | ||
? "an array of accessible elements that match the instruction" | ||
: "an array of elements that match the instruction", | ||
), | ||
}); | ||
@@ -324,4 +330,11 @@ | ||
messages: [ | ||
buildObserveSystemPrompt(userProvidedInstructions), | ||
buildObserveUserMessage(instruction, domElements), | ||
buildObserveSystemPrompt( | ||
userProvidedInstructions, | ||
isUsingAccessibilityTree, | ||
), | ||
buildObserveUserMessage( | ||
instruction, | ||
domElements, | ||
isUsingAccessibilityTree, | ||
), | ||
], | ||
@@ -328,0 +341,0 @@ image: image |
@@ -355,13 +355,17 @@ import { LLMTool } from "../types/llm"; | ||
// observe | ||
const observeSystemPrompt = ` | ||
export function buildObserveSystemPrompt( | ||
userProvidedInstructions?: string, | ||
isUsingAccessibilityTree = false, | ||
): ChatMessage { | ||
const observeSystemPrompt = ` | ||
You are helping the user automate the browser by finding elements based on what the user wants to observe in the page. | ||
You will be given: | ||
1. a instruction of elements to observe | ||
2. a numbered list of possible elements or an annotated image of the page | ||
2. ${ | ||
isUsingAccessibilityTree | ||
? "a hierarchical accessibility tree showing the semantic structure of the page" | ||
: "a numbered list of possible elements or an annotated image of the page" | ||
} | ||
Return an array of elements that match the instruction. | ||
`; | ||
export function buildObserveSystemPrompt( | ||
userProvidedInstructions?: string, | ||
): ChatMessage { | ||
Return an array of elements that match the instruction if they exist, otherwise return an empty array.`; | ||
const content = observeSystemPrompt.replace(/\s+/g, " "); | ||
@@ -380,2 +384,3 @@ | ||
domElements: string, | ||
isUsingAccessibilityTree = false, | ||
): ChatMessage { | ||
@@ -385,4 +390,4 @@ return { | ||
content: `instruction: ${instruction} | ||
DOM: ${domElements}`, | ||
${isUsingAccessibilityTree ? "Accessibility Tree" : "DOM"}: ${domElements}`, | ||
}; | ||
} |
import type { | ||
Page as PlaywrightPage, | ||
BrowserContext as PlaywrightContext, | ||
CDPSession, | ||
} from "@playwright/test"; | ||
@@ -9,3 +10,3 @@ import { LLMClient } from "./llm/LLMClient"; | ||
import { StagehandContext } from "./StagehandContext"; | ||
import { Page } from "../types/page"; | ||
import { Page, defaultExtractSchema } from "../types/page"; | ||
import { | ||
@@ -29,2 +30,3 @@ ExtractOptions, | ||
private llmClient: LLMClient; | ||
private cdpClient: CDPSession | null = null; | ||
@@ -288,10 +290,3 @@ constructor( | ||
async act({ | ||
action, | ||
modelName, | ||
modelClientOptions, | ||
useVision = "fallback", | ||
variables = {}, | ||
domSettleTimeoutMs, | ||
}: ActOptions): Promise<ActResult> { | ||
async act(actionOrOptions: string | ActOptions): Promise<ActResult> { | ||
if (!this.actHandler) { | ||
@@ -301,3 +296,16 @@ throw new Error("Act handler not initialized"); | ||
useVision = useVision ?? "fallback"; | ||
const options: ActOptions = | ||
typeof actionOrOptions === "string" | ||
? { action: actionOrOptions } | ||
: actionOrOptions; | ||
const { | ||
action, | ||
modelName, | ||
modelClientOptions, | ||
useVision = "fallback", | ||
variables = {}, | ||
domSettleTimeoutMs, | ||
} = options; | ||
const requestId = Math.random().toString(36).substring(2); | ||
@@ -366,10 +374,5 @@ const llmClient: LLMClient = modelName | ||
async extract<T extends z.AnyZodObject>({ | ||
instruction, | ||
schema, | ||
modelName, | ||
modelClientOptions, | ||
domSettleTimeoutMs, | ||
useTextExtract, | ||
}: ExtractOptions<T>): Promise<ExtractResult<T>> { | ||
async extract<T extends z.AnyZodObject = typeof defaultExtractSchema>( | ||
instructionOrOptions: string | ExtractOptions<T>, | ||
): Promise<ExtractResult<T>> { | ||
if (!this.extractHandler) { | ||
@@ -379,2 +382,19 @@ throw new Error("Extract handler not initialized"); | ||
const options: ExtractOptions<T> = | ||
typeof instructionOrOptions === "string" | ||
? { | ||
instruction: instructionOrOptions, | ||
schema: defaultExtractSchema as T, | ||
} | ||
: instructionOrOptions; | ||
const { | ||
instruction, | ||
schema, | ||
modelName, | ||
modelClientOptions, | ||
domSettleTimeoutMs, | ||
useTextExtract, | ||
} = options; | ||
const requestId = Math.random().toString(36).substring(2); | ||
@@ -439,3 +459,5 @@ const llmClient = modelName | ||
async observe(options?: ObserveOptions): Promise<ObserveResult[]> { | ||
async observe( | ||
instructionOrOptions?: string | ObserveOptions, | ||
): Promise<ObserveResult[]> { | ||
if (!this.observeHandler) { | ||
@@ -445,8 +467,19 @@ throw new Error("Observe handler not initialized"); | ||
const options: ObserveOptions = | ||
typeof instructionOrOptions === "string" | ||
? { instruction: instructionOrOptions } | ||
: instructionOrOptions || {}; | ||
const { | ||
instruction = "Find actions that can be performed on this page.", | ||
modelName, | ||
modelClientOptions, | ||
useVision = false, | ||
domSettleTimeoutMs, | ||
useAccessibilityTree = false, | ||
} = options; | ||
const requestId = Math.random().toString(36).substring(2); | ||
const llmClient = options?.modelName | ||
? this.stagehand.llmProvider.getClient( | ||
options.modelName, | ||
options.modelClientOptions, | ||
) | ||
const llmClient = modelName | ||
? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) | ||
: this.llmClient; | ||
@@ -460,3 +493,3 @@ | ||
instruction: { | ||
value: options?.instruction, | ||
value: instruction, | ||
type: "string", | ||
@@ -472,2 +505,6 @@ }, | ||
}, | ||
useAccessibilityTree: { | ||
value: useAccessibilityTree ? "true" : "false", | ||
type: "boolean", | ||
}, | ||
}, | ||
@@ -478,10 +515,9 @@ }); | ||
.observe({ | ||
instruction: | ||
options?.instruction ?? | ||
"Find actions that can be performed on this page.", | ||
instruction, | ||
llmClient, | ||
useVision: options?.useVision ?? false, | ||
useVision, | ||
fullPage: false, | ||
requestId, | ||
domSettleTimeoutMs: options?.domSettleTimeoutMs, | ||
domSettleTimeoutMs, | ||
useAccessibilityTree, | ||
}) | ||
@@ -507,3 +543,3 @@ .catch((e) => { | ||
instruction: { | ||
value: options?.instruction, | ||
value: instruction, | ||
type: "string", | ||
@@ -521,2 +557,29 @@ }, | ||
} | ||
async getCDPClient(): Promise<CDPSession> { | ||
if (!this.cdpClient) { | ||
this.cdpClient = await this.context.newCDPSession(this.page); | ||
} | ||
return this.cdpClient; | ||
} | ||
async sendCDP<T>( | ||
command: string, | ||
args?: Record<string, unknown>, | ||
): Promise<T> { | ||
const client = await this.getCDPClient(); | ||
// Type assertion needed because CDP command strings are not fully typed | ||
return client.send( | ||
command as Parameters<CDPSession["send"]>[0], | ||
args || {}, | ||
) as Promise<T>; | ||
} | ||
async enableCDP(domain: string): Promise<void> { | ||
await this.sendCDP(`${domain}.enable`, {}); | ||
} | ||
async disableCDP(domain: string): Promise<void> { | ||
await this.sendCDP(`${domain}.disable`, {}); | ||
} | ||
} |
163
lib/utils.ts
@@ -6,2 +6,7 @@ import crypto from "crypto"; | ||
// This is a heuristic for the width of a character in pixels. It seems to work | ||
// better than attempting to calculate character widths dynamically, which sometimes | ||
// results in collisions when placing characters on the "canvas". | ||
const HEURISTIC_CHAR_WIDTH = 5; | ||
export function generateId(operation: string) { | ||
@@ -30,8 +35,4 @@ return crypto.createHash("sha256").update(operation).digest("hex"); | ||
): string { | ||
// **1:** Estimate the average character width in pixels by examining the text annotations. | ||
// If no reliable measurement is found, default to 10 pixels per character. | ||
const charWidth = estimateCharacterWidth(textAnnotations) || 10; | ||
// **2:** Create a copy of textAnnotations and sort them by their vertical position (y-coordinate), | ||
// ensuring that topmost annotations appear first and bottommost appear last. | ||
// **1: Sort annotations by vertical position (y-coordinate).** | ||
// The topmost annotations appear first, the bottommost last. | ||
const sortedAnnotations = [...textAnnotations].sort( | ||
@@ -41,6 +42,5 @@ (a, b) => a.bottom_left.y - b.bottom_left.y, | ||
// **3:** Group annotations by their line position. We use a small epsilon to handle | ||
// floating-point differences. Two annotations are considered on the same line if their | ||
// y-coordinates differ by less than epsilon. | ||
const epsilon = 0.0001; | ||
// **2: Group annotations by line based on their y-coordinate.** | ||
// We use an epsilon so that very close y-values are treated as the same line. | ||
const epsilon = 1; | ||
const lineMap: Map<number, TextAnnotation[]> = new Map(); | ||
@@ -50,5 +50,3 @@ | ||
let foundLineY: number | undefined; | ||
// **4:** Check if the annotation belongs to an existing line group. | ||
// If so, add it to that line. Otherwise, start a new line group. | ||
// **3: Check if this annotation belongs to any existing line group.** | ||
for (const key of lineMap.keys()) { | ||
@@ -61,2 +59,3 @@ if (Math.abs(key - annotation.bottom_left.y) < epsilon) { | ||
// If found, push into that line; otherwise, create a new line entry. | ||
if (foundLineY !== undefined) { | ||
@@ -69,8 +68,6 @@ lineMap.get(foundLineY)!.push(annotation); | ||
// **5:** Extract all line keys (y-coordinates) and sort them to process lines top-to-bottom. | ||
// **4: Get all unique y-coordinates for lines and sort them top-to-bottom.** | ||
const lineYs = Array.from(lineMap.keys()).sort((a, b) => a - b); | ||
// **6:** For each line, group words together and calculate the maximum normalized end position (maxNormalizedEndX). | ||
// This will help determine the necessary canvas width to accommodate all text. | ||
let maxNormalizedEndX = 0; | ||
// **5: Build an array of "final lines" (TextAnnotations[]) by grouping words for each line.** | ||
const finalLines: TextAnnotation[][] = []; | ||
@@ -81,30 +78,45 @@ | ||
// **7:** Sort annotations in the current line by their horizontal position (x-coordinate), | ||
// ensuring left-to-right ordering. | ||
// **6: Sort annotations in the current line left-to-right by x-coordinate.** | ||
lineAnnotations.sort((a, b) => a.bottom_left.x - b.bottom_left.x); | ||
// **8:** Group nearby annotations into word clusters, forming logical sentences or phrases. | ||
// **7: Group annotations into word clusters (sentences/phrases).** | ||
const groupedLineAnnotations = groupWordsInSentence(lineAnnotations); | ||
// **9:** Determine how far to the right the text in this line extends, normalized by page width. | ||
// Update maxNormalizedEndX to track the widest line encountered. | ||
for (const ann of groupedLineAnnotations) { | ||
const textLengthInPx = ann.text.length * charWidth; | ||
const normalizedTextLength = textLengthInPx / pageWidth; | ||
const endX = ann.bottom_left_normalized.x + normalizedTextLength; | ||
if (endX > maxNormalizedEndX) { | ||
maxNormalizedEndX = endX; | ||
// **8: Push the grouped annotations for this line into finalLines.** | ||
finalLines.push(groupedLineAnnotations); | ||
} | ||
// ------------------------- | ||
// **First Pass**: Calculate the width of the longest line (in characters) up front. | ||
// We will use this to set the width of the canvas, which will reduce likelihood of collisions. | ||
// ------------------------- | ||
let maxLineWidthInChars = 0; | ||
for (const line of finalLines) { | ||
let lineMaxEnd = 0; | ||
for (const ann of line) { | ||
// Convert normalized X to character index | ||
const startXInChars = Math.round( | ||
ann.bottom_left_normalized.x * (pageWidth / HEURISTIC_CHAR_WIDTH), | ||
); | ||
// Each annotation spans ann.text.length characters | ||
const endXInChars = startXInChars + ann.text.length; | ||
if (endXInChars > lineMaxEnd) { | ||
lineMaxEnd = endXInChars; | ||
} | ||
} | ||
// **10:** Save the processed line to finalLines for later rendering. | ||
finalLines.push(groupedLineAnnotations); | ||
// Track the largest width across all lines | ||
if (lineMaxEnd > maxLineWidthInChars) { | ||
maxLineWidthInChars = lineMaxEnd; | ||
} | ||
} | ||
// **11:** Determine the canvas width in characters. We scale according to maxNormalizedEndX and page width. | ||
// Add a small buffer (20 chars) to ensure no text overflows the canvas. | ||
let canvasWidth = Math.ceil(maxNormalizedEndX * (pageWidth / charWidth)) + 20; | ||
canvasWidth = Math.max(canvasWidth, 1); | ||
// **9: Add a 20-char buffer to ensure we don’t cut off text.** | ||
maxLineWidthInChars += 20; | ||
// **12:** Compute the baseline (lowest point) of each line. This helps us understand vertical spacing. | ||
// **10: Determine the canvas width based on the measured maxLineWidthInChars.** | ||
const canvasWidth = Math.max(maxLineWidthInChars, 1); | ||
// **11: Compute the baseline (lowest y) of each line to measure vertical spacing.** | ||
const lineBaselines = finalLines.map((line) => | ||
@@ -114,3 +126,3 @@ Math.min(...line.map((a) => a.bottom_left.y)), | ||
// **13:** Compute vertical gaps between consecutive lines to determine line spacing. | ||
// **12: Compute the gaps between consecutive lines.** | ||
const verticalGaps: number[] = []; | ||
@@ -121,31 +133,30 @@ for (let i = 1; i < lineBaselines.length; i++) { | ||
// **14:** Estimate what a "normal" line spacing is by taking the median of all vertical gaps. | ||
// **13: Estimate a "normal" line spacing via the median of these gaps.** | ||
const normalLineSpacing = verticalGaps.length > 0 ? median(verticalGaps) : 0; | ||
// **15:** Create a 2D character canvas initialized with spaces, onto which we'll "print" text lines. | ||
// **14: Create a 2D character canvas (array of arrays), filled with spaces.** | ||
let canvas: string[][] = []; | ||
// **16:** lineIndex represents the current line of the canvas. Initialize with -1 so the first line starts at 0. | ||
// **15: lineIndex tracks which row of the canvas we’re on; start at -1 so the first line is index 0.** | ||
let lineIndex = -1; | ||
// **17:** Iterate over each line of processed text. | ||
// **16: Render each line of text into our canvas.** | ||
for (let i = 0; i < finalLines.length; i++) { | ||
if (i === 0) { | ||
// **18:** For the first line, just increment lineIndex to start at 0 with no extra spacing. | ||
// **17: For the very first line, just increment lineIndex once.** | ||
lineIndex++; | ||
ensureLineExists(canvas, lineIndex, canvasWidth); | ||
} else { | ||
// **19:** For subsequent lines, calculate how many extra blank lines to insert based on spacing. | ||
// **18: For subsequent lines, figure out how many blank lines to insert | ||
// based on the gap between this line’s baseline and the previous line’s baseline.** | ||
const gap = lineBaselines[i] - lineBaselines[i - 1]; | ||
let extraLines = 0; | ||
// **20:** If we have a known normal line spacing, and the gap is larger than expected, | ||
// insert extra blank lines proportional to the ratio of gap to normal spacing. | ||
if (normalLineSpacing > 0) { | ||
if (gap > 1.2 * normalLineSpacing) { | ||
extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0); | ||
} | ||
// **19: If the gap is significantly larger than the "normal" spacing, | ||
// insert blank lines proportionally.** | ||
if (normalLineSpacing > 0 && gap > 1.2 * normalLineSpacing) { | ||
extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0); | ||
} | ||
// **21:** Insert the calculated extra blank lines to maintain approximate vertical spacing. | ||
// **20: Insert the calculated extra blank lines.** | ||
for (let e = 0; e < extraLines; e++) { | ||
@@ -156,3 +167,3 @@ lineIndex++; | ||
// **22:** After adjusting for spacing, increment lineIndex for the current line of text. | ||
// **21: Move to the next line (row) in the canvas for this line’s text.** | ||
lineIndex++; | ||
@@ -162,15 +173,17 @@ ensureLineExists(canvas, lineIndex, canvasWidth); | ||
// **23:** Now place the annotations for the current line onto the canvas at the appropriate horizontal positions. | ||
// **22: Place each annotation’s text in the correct horizontal position for this line.** | ||
const lineAnnotations = finalLines[i]; | ||
for (const annotation of lineAnnotations) { | ||
const text = annotation.text; | ||
// **24:** Calculate the starting x-position in the canvas based on normalized coordinates. | ||
// **23: Calculate the starting x-position in the canvas by converting normalized x to char space.** | ||
const startXInChars = Math.round( | ||
annotation.bottom_left_normalized.x * canvasWidth, | ||
annotation.bottom_left_normalized.x * | ||
(pageWidth / HEURISTIC_CHAR_WIDTH), | ||
); | ||
// **25:** Place each character of the annotation text into the canvas. | ||
// **24: Place each character of the annotation in the canvas.** | ||
for (let j = 0; j < text.length; j++) { | ||
const xPos = startXInChars + j; | ||
// **26:** Ensure we don't exceed the canvas width. | ||
// **25: Don’t write beyond the right edge of the canvas.** | ||
if (xPos < canvasWidth) { | ||
@@ -183,3 +196,3 @@ canvas[lineIndex][xPos] = text[j]; | ||
// **27:** Trim trailing whitespace from each line to create a cleaner output. | ||
// **26: Trim trailing whitespace from each line to clean up the output.** | ||
canvas = canvas.map((row) => { | ||
@@ -190,11 +203,11 @@ const lineStr = row.join(""); | ||
// **29:** Join all lines to form the final page text. Trim any trailing whitespace from the entire text. | ||
// **27: Combine all rows into a single string, separating rows with newlines.** | ||
let pageText = canvas.map((line) => line.join("")).join("\n"); | ||
pageText = pageText.trimEnd(); | ||
// **30:** Surround the page text with lines of dashes to clearly delineate the text block. | ||
// **28: Surround the rendered text with lines of dashes for clarity.** | ||
pageText = | ||
"-".repeat(canvasWidth) + "\n" + pageText + "\n" + "-".repeat(canvasWidth); | ||
// **31:** Return the fully formatted text. | ||
// **29: Return the final formatted text.** | ||
return pageText; | ||
@@ -225,24 +238,2 @@ } | ||
/** | ||
* `estimateCharacterWidth` estimates the average character width (in pixels) from a collection of text annotations. | ||
* It calculates the width per character for each annotation and uses their median as the result. | ||
* If no annotations are available or they have zero-length text, returns 0. | ||
* | ||
* @param textAnnotations - An array of text annotations with text and width fields. | ||
* @returns The median character width in pixels, or 0 if none can be calculated. | ||
*/ | ||
function estimateCharacterWidth(textAnnotations: TextAnnotation[]): number { | ||
// collect width-per-character measurements from each annotation | ||
const charWidths: number[] = []; | ||
for (const annotation of textAnnotations) { | ||
const length = annotation.text.length; | ||
if (length > 0) { | ||
charWidths.push(annotation.width / length); | ||
} | ||
} | ||
// return the median of all collected measurements | ||
return median(charWidths); | ||
} | ||
/** | ||
* `groupWordsInSentence` groups annotations within a single line into logical "words" or "sentences". | ||
@@ -270,3 +261,3 @@ * It uses a set of heuristics involving horizontal proximity and similar height | ||
// use a padding factor to allow slight spaces between words | ||
const padding = 2; | ||
const padding = 1; | ||
const lastAnn = currentGroup[currentGroup.length - 1]; | ||
@@ -295,4 +286,6 @@ const characterWidth = (lastAnn.width / lastAnn.text.length) * padding; | ||
const groupedAnnotation = createGroupedAnnotation(currentGroup); | ||
groupedAnnotations.push(groupedAnnotation); | ||
currentGroup = [annotation]; | ||
if (groupedAnnotation.text.length > 0) { | ||
groupedAnnotations.push(groupedAnnotation); | ||
currentGroup = [annotation]; | ||
} | ||
} | ||
@@ -299,0 +292,0 @@ } |
{ | ||
"name": "@browserbasehq/stagehand", | ||
"version": "1.10.0-alpha-d90a5b9dbb7ec564f30bef9c8d123e8efc530b6f", | ||
"version": "1.10.0-alpha-e5db23c0cf75bee5c08b73f702eaf745309bb6d7", | ||
"description": "An AI web browsing framework focused on simplicity and extensibility.", | ||
@@ -5,0 +5,0 @@ "main": "./dist/index.js", |
@@ -38,14 +38,33 @@ <div id="toc" align="center"> | ||
<p align="center"> | ||
<a href="https://trendshift.io/repositories/12122" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12122" alt="browserbase%2Fstagehand | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> | ||
</p> | ||
--- | ||
Stagehand is the easiest way to build browser automations. It is fully compatible with [Playwright](https://playwright.dev/), offering three simple AI APIs (`act`, `extract`, and `observe`) on top of the base Playwright `Page` class that provide the building blocks for web automation via natural language. It also makes Playwright more accessible to non-technical users and less vulnerable to minor changes in the UI/DOM. | ||
Stagehand is the easiest way to build browser automations. It is fully compatible with [Playwright](https://playwright.dev/), offering three simple AI APIs (`act`, `extract`, and `observe`) on top of the base Playwright `Page` class that provide the building blocks for web automation via natural language. | ||
Anything that can be done in a browser can be done with Stagehand. Consider: | ||
Here's a sample of what you can do with Stagehand: | ||
1. Go to Hacker News and extract the top stories of the day | ||
1. Log into Amazon, search for AirPods, and buy the most relevant product | ||
1. Go to ESPN, search for Steph Curry, and get stats for his last 10 games | ||
```typescript | ||
// Keep your existing Playwright code unchanged | ||
await page.goto("https://docs.stagehand.dev"); | ||
Stagehand makes it easier to write durable, performant browser automation code. When used with [Browserbase](https://browserbase.com/), it offers unparalleled debugging tools like session replay and step-by-step debugging. | ||
// Stagehand AI: Extract data from the page | ||
const { description } = await page.extract({ | ||
instruction: "Extract the description of the page", | ||
schema: z.object({ | ||
description: z.string(), | ||
}), | ||
}); | ||
// Stagehand AI: Act on the page | ||
await page.act({ action: "click on the 'Quickstart'" }); | ||
``` | ||
## Why? | ||
**Stagehand adds determinism to otherwise unpredictable agents.** | ||
While there's no limit to what you could instruct Stagehand to do, our primitives allow you to control how much you want to leave to an AI. It works best when your code is a sequence of atomic actions. Instead of writing a single script for a single website, Stagehand allows you to write durable, self-healing, and repeatable web automation workflows that actually work. | ||
> [!NOTE] | ||
@@ -52,0 +71,0 @@ > `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. |
Sorry, the diff of this file is too big to display
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
600794
176
15746
145