@browserbasehq/stagehand
Advanced tools
Comparing version 1.5.1-alpha-85483fe091544fc079015c62b6923b03f8b9caa7 to 1.6.0-alpha-474217cfaff8e68614212b66baa62d35493fd2ce
@@ -1,3 +0,13 @@ | ||
import { Stagehand } from "../lib"; | ||
import { AvailableModel, Stagehand } from "../lib"; | ||
import { LogLine } from "../types/log"; | ||
export declare const env: "BROWSERBASE" | "LOCAL"; | ||
export declare const initStagehand: ({ modelName, domSettleTimeoutMs, logger, }: { | ||
modelName: AvailableModel; | ||
domSettleTimeoutMs?: number; | ||
logger: EvalLogger; | ||
}) => Promise<{ | ||
stagehand: Stagehand; | ||
logger: EvalLogger; | ||
initResponse: import("../lib").InitResult; | ||
}>; | ||
type LogLineEval = LogLine & { | ||
@@ -16,2 +26,3 @@ parsedAuxiliary?: string | object; | ||
} | ||
export declare function normalizeString(str: string): string; | ||
export {}; |
import { Page, BrowserContext, Browser } from '@playwright/test'; | ||
import { z } from 'zod'; | ||
import { ZodType, z } from 'zod'; | ||
import Browserbase from '@browserbasehq/sdk'; | ||
import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk'; | ||
import { Tool } from '@anthropic-ai/sdk/resources'; | ||
import { ClientOptions as ClientOptions$1 } from 'openai'; | ||
import { ChatCompletionTool } from 'openai/resources'; | ||
import { ChatCompletionTool, ChatCompletionToolChoiceOption, ChatCompletion } from 'openai/resources'; | ||
@@ -26,3 +25,40 @@ type LogLine = { | ||
type ClientOptions = ClientOptions$1 | ClientOptions$2; | ||
type ToolCall = Tool | ChatCompletionTool; | ||
type ToolCall = ChatCompletionTool; | ||
type AnthropicTransformedResponse = { | ||
id: string; | ||
object: string; | ||
created: number; | ||
model: string; | ||
choices: { | ||
index: number; | ||
message: { | ||
role: string; | ||
content: string | null; | ||
tool_calls: { | ||
id: string; | ||
type: string; | ||
function: { | ||
name: string; | ||
arguments: string; | ||
}; | ||
}[]; | ||
}; | ||
finish_reason: string; | ||
}[]; | ||
usage: { | ||
prompt_tokens: number; | ||
completion_tokens: number; | ||
total_tokens: number; | ||
}; | ||
}; | ||
interface AnthropicJsonSchemaObject { | ||
definitions?: { | ||
MySchema?: { | ||
properties?: Record<string, unknown>; | ||
required?: string[]; | ||
}; | ||
}; | ||
properties?: Record<string, unknown>; | ||
required?: string[]; | ||
} | ||
@@ -57,9 +93,10 @@ interface ChatMessage { | ||
name: string; | ||
schema: any; | ||
schema: ZodType; | ||
}; | ||
tools?: ToolCall[]; | ||
tool_choice?: string; | ||
tool_choice?: "auto" | ChatCompletionToolChoiceOption; | ||
maxTokens?: number; | ||
requestId: string; | ||
} | ||
type LLMResponse = AnthropicTransformedResponse | ChatCompletion; | ||
declare abstract class LLMClient { | ||
@@ -69,3 +106,3 @@ modelName: AvailableModel; | ||
constructor(modelName: AvailableModel); | ||
abstract createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
abstract createChatCompletion<T = LLMResponse>(options: ChatCompletionOptions): Promise<T>; | ||
abstract logger: (message: { | ||
@@ -171,2 +208,7 @@ category?: string; | ||
} | ||
interface GotoOptions { | ||
timeout?: number; | ||
waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit"; | ||
referer?: string; | ||
} | ||
@@ -212,2 +254,2 @@ declare class Stagehand { | ||
export { type ActOptions, type ActResult, type AvailableModel, type BrowserResult, type ClientOptions, type ConstructorParams, type ExtractOptions, type ExtractResult, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, type ToolCall }; | ||
export { type ActOptions, type ActResult, type AnthropicJsonSchemaObject, type AnthropicTransformedResponse, type AvailableModel, type BrowserResult, type ClientOptions, type ConstructorParams, type ExtractOptions, type ExtractResult, type GotoOptions, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, type ToolCall }; |
/** | ||
* A file system cache to skip inference when repeating steps | ||
* It also acts as the source of truth for identifying previously seen actions and observatiosn | ||
* It also acts as the source of truth for identifying previously seen actions and observations | ||
*/ | ||
@@ -5,0 +5,0 @@ declare class Cache { |
import { LogLine } from "../../types/log"; | ||
export interface CacheEntry { | ||
timestamp: number; | ||
data: any; | ||
data: unknown; | ||
requestId: string; | ||
@@ -26,3 +26,3 @@ } | ||
protected ensureCacheDirectory(): void; | ||
protected createHash(data: any): string; | ||
protected createHash(data: unknown): string; | ||
protected sleep(ms: number): Promise<void>; | ||
@@ -43,3 +43,3 @@ acquireLock(): Promise<boolean>; | ||
*/ | ||
get(hashObj: Record<string, any> | string, requestId: string): Promise<T["data"] | null>; | ||
get(hashObj: Record<string, unknown> | string, requestId: string): Promise<T["data"] | null>; | ||
/** | ||
@@ -51,4 +51,4 @@ * Stores data in the cache based on the provided options and requestId. | ||
*/ | ||
set(hashObj: Record<string, any>, data: T["data"], requestId: string): Promise<void>; | ||
delete(hashObj: Record<string, any>): Promise<void>; | ||
set(hashObj: Record<string, unknown>, data: T["data"], requestId: string): Promise<void>; | ||
delete(hashObj: Record<string, unknown>): Promise<void>; | ||
/** | ||
@@ -55,0 +55,0 @@ * Tracks the usage of a hash with a specific requestId. |
@@ -14,3 +14,3 @@ import { BaseCache, CacheEntry } from "./BaseCache"; | ||
*/ | ||
get(options: Record<string, any>, requestId: string): Promise<any | null>; | ||
get<T>(options: Record<string, unknown>, requestId: string): Promise<T | null>; | ||
/** | ||
@@ -22,3 +22,3 @@ * Overrides the set method to include cache cleanup logic. | ||
*/ | ||
set(options: Record<string, any>, data: any, requestId: string): Promise<void>; | ||
set(options: Record<string, unknown>, data: unknown, requestId: string): Promise<void>; | ||
} |
@@ -1,1 +0,1 @@ | ||
export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.entries(selectorMap).forEach(([_index, selector]) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; | ||
export declare const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; |
@@ -0,5 +1,5 @@ | ||
import { LogLine } from "../../types/log"; | ||
import { Stagehand } from "../index"; | ||
import { LLMClient } from "../llm/LLMClient"; | ||
import { LLMProvider } from "../llm/LLMProvider"; | ||
import { LLMClient } from "../llm/LLMClient"; | ||
import { LogLine } from "../../types/log"; | ||
export declare class StagehandActHandler { | ||
@@ -13,7 +13,6 @@ private readonly stagehand; | ||
private readonly actionCache; | ||
private readonly llmClient; | ||
private readonly startDomDebug; | ||
private readonly cleanupDomDebug; | ||
private actions; | ||
constructor({ stagehand, verbose, llmProvider, enableCaching, logger, waitForSettledDom, llmClient, startDomDebug, cleanupDomDebug, }: { | ||
constructor({ stagehand, verbose, llmProvider, enableCaching, logger, waitForSettledDom, startDomDebug, cleanupDomDebug, }: { | ||
stagehand: Stagehand; | ||
@@ -20,0 +19,0 @@ verbose: 0 | 1 | 2; |
@@ -34,6 +34,5 @@ import { LLMProvider } from "../llm/LLMProvider"; | ||
}); | ||
extract<T extends z.AnyZodObject>({ instruction, schema, progress, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, }: { | ||
extract<T extends z.AnyZodObject>({ instruction, schema, content, chunksSeen, llmClient, requestId, domSettleTimeoutMs, }: { | ||
instruction: string; | ||
schema: T; | ||
progress?: string; | ||
content?: z.infer<T>; | ||
@@ -40,0 +39,0 @@ chunksSeen?: Array<number>; |
@@ -8,8 +8,7 @@ import { z } from "zod"; | ||
export declare function act({ action, domElements, steps, llmClient, screenshot, retries, logger, requestId, variables, }: ActParams): Promise<ActResult | null>; | ||
export declare function extract({ instruction, progress, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, }: { | ||
export declare function extract({ instruction, previouslyExtractedContent, domElements, schema, llmClient, chunksSeen, chunksTotal, requestId, }: { | ||
instruction: string; | ||
progress: string; | ||
previouslyExtractedContent: any; | ||
previouslyExtractedContent: object; | ||
domElements: string; | ||
schema: z.ZodObject<any>; | ||
schema: z.ZodObject<z.ZodRawShape>; | ||
llmClient: LLMClient; | ||
@@ -19,3 +18,8 @@ chunksSeen: number; | ||
requestId: string; | ||
}): Promise<any>; | ||
}): Promise<{ | ||
metadata: { | ||
completed?: boolean; | ||
progress?: string; | ||
}; | ||
}>; | ||
export declare function observe({ instruction, domElements, llmClient, image, requestId, }: { | ||
@@ -37,2 +41,2 @@ instruction: string; | ||
requestId: string; | ||
}): Promise<any>; | ||
}): Promise<string>; |
import { ClientOptions } from "@anthropic-ai/sdk"; | ||
import { LogLine } from "../../types/log"; | ||
import { AvailableModel } from "../../types/model"; | ||
import { AnthropicTransformedResponse, AvailableModel } from "../../types/model"; | ||
import { LLMCache } from "../cache/LLMCache"; | ||
@@ -12,5 +12,5 @@ import { ChatCompletionOptions, LLMClient } from "./LLMClient"; | ||
constructor(logger: (message: LogLine) => void, enableCaching: boolean, cache: LLMCache | undefined, modelName: AvailableModel, clientOptions?: ClientOptions); | ||
createChatCompletion(options: ChatCompletionOptions & { | ||
createChatCompletion<T = AnthropicTransformedResponse>(options: ChatCompletionOptions & { | ||
retries?: number; | ||
}): Promise<any>; | ||
}): Promise<T>; | ||
} |
@@ -1,2 +0,4 @@ | ||
import { AvailableModel, ToolCall } from "../../types/model"; | ||
import { ZodType } from "zod"; | ||
import { AnthropicTransformedResponse, AvailableModel, ToolCall } from "../../types/model"; | ||
import { ChatCompletion, ChatCompletionToolChoiceOption } from "openai/resources"; | ||
export interface ChatMessage { | ||
@@ -32,9 +34,10 @@ role: "system" | "user" | "assistant"; | ||
name: string; | ||
schema: any; | ||
schema: ZodType; | ||
}; | ||
tools?: ToolCall[]; | ||
tool_choice?: string; | ||
tool_choice?: "auto" | ChatCompletionToolChoiceOption; | ||
maxTokens?: number; | ||
requestId: string; | ||
} | ||
export type LLMResponse = AnthropicTransformedResponse | ChatCompletion; | ||
export declare abstract class LLMClient { | ||
@@ -44,3 +47,3 @@ modelName: AvailableModel; | ||
constructor(modelName: AvailableModel); | ||
abstract createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
abstract createChatCompletion<T = LLMResponse>(options: ChatCompletionOptions): Promise<T>; | ||
abstract logger: (message: { | ||
@@ -47,0 +50,0 @@ category?: string; |
@@ -1,4 +0,4 @@ | ||
import { LLMClient } from "./LLMClient"; | ||
import { LogLine } from "../../types/log"; | ||
import { AvailableModel, ClientOptions } from "../../types/model"; | ||
import { LLMClient } from "./LLMClient"; | ||
export declare class LLMProvider { | ||
@@ -5,0 +5,0 @@ private modelToProviderMap; |
import { ClientOptions } from "openai"; | ||
import { ChatCompletion } from "openai/resources/chat"; | ||
import { LogLine } from "../../types/log"; | ||
@@ -13,3 +14,3 @@ import { AvailableModel } from "../../types/model"; | ||
constructor(logger: (message: LogLine) => void, enableCaching: boolean, cache: LLMCache | undefined, modelName: AvailableModel, clientOptions?: ClientOptions); | ||
createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
createChatCompletion<T = ChatCompletion>(options: ChatCompletionOptions): Promise<T>; | ||
} |
@@ -20,3 +20,3 @@ import { Buffer } from "buffer"; | ||
element: number; | ||
args: any[]; | ||
args: unknown[]; | ||
completed: boolean; | ||
@@ -23,0 +23,0 @@ step: string; |
import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk"; | ||
import { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources"; | ||
import type { ClientOptions as OpenAIClientOptions } from "openai"; | ||
@@ -8,2 +7,39 @@ import { ChatCompletionTool as OpenAITool } from "openai/resources"; | ||
export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions; | ||
export type ToolCall = AnthropicTool | OpenAITool; | ||
export type ToolCall = OpenAITool; | ||
export type AnthropicTransformedResponse = { | ||
id: string; | ||
object: string; | ||
created: number; | ||
model: string; | ||
choices: { | ||
index: number; | ||
message: { | ||
role: string; | ||
content: string | null; | ||
tool_calls: { | ||
id: string; | ||
type: string; | ||
function: { | ||
name: string; | ||
arguments: string; | ||
}; | ||
}[]; | ||
}; | ||
finish_reason: string; | ||
}[]; | ||
usage: { | ||
prompt_tokens: number; | ||
completion_tokens: number; | ||
total_tokens: number; | ||
}; | ||
}; | ||
export interface AnthropicJsonSchemaObject { | ||
definitions?: { | ||
MySchema?: { | ||
properties?: Record<string, unknown>; | ||
required?: string[]; | ||
}; | ||
}; | ||
properties?: Record<string, unknown>; | ||
required?: string[]; | ||
} |
@@ -7,1 +7,6 @@ export declare class PlaywrightCommandException extends Error { | ||
} | ||
export interface GotoOptions { | ||
timeout?: number; | ||
waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit"; | ||
referer?: string; | ||
} |
@@ -1,3 +0,2 @@ | ||
const fs = require("fs"); | ||
const crypto = require("crypto"); | ||
import fs from "fs"; | ||
const observationsPath = "./.cache/observations.json"; | ||
@@ -8,3 +7,3 @@ const actionsPath = "./.cache/actions.json"; | ||
* A file system cache to skip inference when repeating steps | ||
* It also acts as the source of truth for identifying previously seen actions and observatiosn | ||
* It also acts as the source of truth for identifying previously seen actions and observations | ||
*/ | ||
@@ -11,0 +10,0 @@ class Cache { |
@@ -8,3 +8,3 @@ import * as fs from "fs"; | ||
timestamp: number; | ||
data: any; | ||
data: unknown; | ||
requestId: string; | ||
@@ -94,3 +94,3 @@ } | ||
protected createHash(data: any): string { | ||
protected createHash(data: unknown): string { | ||
const hash = crypto.createHash("sha256"); | ||
@@ -129,3 +129,18 @@ return hash.update(JSON.stringify(data)).digest("hex"); | ||
return true; | ||
} catch (error) { | ||
} catch (e) { | ||
this.logger({ | ||
category: "base_cache", | ||
message: "error acquiring lock", | ||
level: 2, | ||
auxiliary: { | ||
trace: { | ||
value: e.stack, | ||
type: "string", | ||
}, | ||
message: { | ||
value: e.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
await this.sleep(5); | ||
@@ -306,3 +321,3 @@ } | ||
public async get( | ||
hashObj: Record<string, any> | string, | ||
hashObj: Record<string, unknown> | string, | ||
requestId: string, | ||
@@ -359,3 +374,3 @@ ): Promise<T["data"] | null> { | ||
public async set( | ||
hashObj: Record<string, any>, | ||
hashObj: Record<string, unknown>, | ||
data: T["data"], | ||
@@ -411,3 +426,3 @@ requestId: string, | ||
public async delete(hashObj: Record<string, any>): Promise<void> { | ||
public async delete(hashObj: Record<string, unknown>): Promise<void> { | ||
if (!(await this.acquireLock())) { | ||
@@ -414,0 +429,0 @@ this.logger({ |
@@ -22,8 +22,8 @@ import { BaseCache, CacheEntry } from "./BaseCache"; | ||
*/ | ||
public async get( | ||
options: Record<string, any>, | ||
public async get<T>( | ||
options: Record<string, unknown>, | ||
requestId: string, | ||
): Promise<any | null> { | ||
): Promise<T | null> { | ||
const data = await super.get(options, requestId); | ||
return data; | ||
return data as T | null; // TODO: remove this cast | ||
} | ||
@@ -38,4 +38,4 @@ | ||
public async set( | ||
options: Record<string, any>, | ||
data: any, | ||
options: Record<string, unknown>, | ||
data: unknown, | ||
requestId: string, | ||
@@ -42,0 +42,0 @@ ): Promise<void> { |
@@ -505,3 +505,5 @@ (() => { | ||
window.chunkNumber = 0; | ||
const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber); | ||
const { selectorMap: multiSelectorMap } = await window.processElements( | ||
window.chunkNumber | ||
); | ||
const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap); | ||
@@ -522,3 +524,3 @@ drawChunk(selectorMap); | ||
cleanupMarkers(); | ||
Object.entries(selectorMap).forEach(([_index, selector]) => { | ||
Object.values(selectorMap).forEach((selector) => { | ||
const element = document.evaluate( | ||
@@ -525,0 +527,0 @@ selector, |
@@ -1,1 +0,1 @@ | ||
export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap, outputString } = await window.processElements(window.chunkNumber);\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.entries(selectorMap).forEach(([_index, selector]) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; | ||
export const scriptContent = "(() => {\n // lib/dom/xpathUtils.ts\n function getParentElement(node) {\n return isElementNode(node) ? node.parentElement : node.parentNode;\n }\n function getCombinations(attributes, size) {\n const results = [];\n function helper(start, combo) {\n if (combo.length === size) {\n results.push([...combo]);\n return;\n }\n for (let i = start; i < attributes.length; i++) {\n combo.push(attributes[i]);\n helper(i + 1, combo);\n combo.pop();\n }\n }\n helper(0, []);\n return results;\n }\n function isXPathFirstResultElement(xpath, target) {\n try {\n const result = document.evaluate(\n xpath,\n document.documentElement,\n null,\n XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,\n null\n );\n return result.snapshotItem(0) === target;\n } catch (error) {\n console.warn(`Invalid XPath expression: ${xpath}`, error);\n return false;\n }\n }\n function escapeXPathString(value) {\n if (value.includes(\"'\")) {\n if (value.includes('\"')) {\n return \"concat(\" + value.split(/('+)/).map((part) => {\n if (part === \"'\") {\n return `\"'\"`;\n } else if (part.startsWith(\"'\") && part.endsWith(\"'\")) {\n return `\"${part}\"`;\n } else {\n return `'${part}'`;\n }\n }).join(\",\") + \")\";\n } else {\n return `\"${value}\"`;\n }\n } else {\n return `'${value}'`;\n }\n }\n async function generateXPathsForElement(element) {\n if (!element) return [];\n const [complexXPath, standardXPath, idBasedXPath] = await Promise.all([\n generateComplexXPath(element),\n generateStandardXPath(element),\n generatedIdBasedXPath(element)\n ]);\n return [standardXPath, ...idBasedXPath ? [idBasedXPath] : [], complexXPath];\n }\n async function generateComplexXPath(element) {\n const parts = [];\n let currentElement = element;\n while (currentElement && (isTextNode(currentElement) || isElementNode(currentElement))) {\n if (isElementNode(currentElement)) {\n const el = currentElement;\n let selector = el.tagName.toLowerCase();\n const attributePriority = [\n \"data-qa\",\n \"data-component\",\n \"data-role\",\n \"role\",\n \"aria-role\",\n \"type\",\n \"name\",\n \"aria-label\",\n \"placeholder\",\n \"title\",\n \"alt\"\n ];\n const attributes = attributePriority.map((attr) => {\n let value = el.getAttribute(attr);\n if (attr === \"href-full\" && value) {\n value = el.getAttribute(\"href\");\n }\n return value ? { attr: attr === \"href-full\" ? \"href\" : attr, value } : null;\n }).filter((attr) => attr !== null);\n let uniqueSelector = \"\";\n for (let i = 1; i <= attributes.length; i++) {\n const combinations = getCombinations(attributes, i);\n for (const combo of combinations) {\n const conditions = combo.map((a) => `@${a.attr}=${escapeXPathString(a.value)}`).join(\" and \");\n const xpath2 = `//${selector}[${conditions}]`;\n if (isXPathFirstResultElement(xpath2, el)) {\n uniqueSelector = xpath2;\n break;\n }\n }\n if (uniqueSelector) break;\n }\n if (uniqueSelector) {\n parts.unshift(uniqueSelector.replace(\"//\", \"\"));\n break;\n } else {\n const parent = getParentElement(el);\n if (parent) {\n const siblings = Array.from(parent.children).filter(\n (sibling) => sibling.tagName === el.tagName\n );\n const index = siblings.indexOf(el) + 1;\n selector += siblings.length > 1 ? `[${index}]` : \"\";\n }\n parts.unshift(selector);\n }\n }\n currentElement = getParentElement(currentElement);\n }\n const xpath = \"//\" + parts.join(\"/\");\n return xpath;\n }\n async function generateStandardXPath(element) {\n const parts = [];\n while (element && (isTextNode(element) || isElementNode(element))) {\n let index = 0;\n let hasSameTypeSiblings = false;\n const siblings = element.parentElement ? Array.from(element.parentElement.childNodes) : [];\n for (let i = 0; i < siblings.length; i++) {\n const sibling = siblings[i];\n if (sibling.nodeType === element.nodeType && sibling.nodeName === element.nodeName) {\n index = index + 1;\n hasSameTypeSiblings = true;\n if (sibling.isSameNode(element)) {\n break;\n }\n }\n }\n if (element.nodeName !== \"#text\") {\n const tagName = element.nodeName.toLowerCase();\n const pathIndex = hasSameTypeSiblings ? `[${index}]` : \"\";\n parts.unshift(`${tagName}${pathIndex}`);\n }\n element = element.parentElement;\n }\n return parts.length ? `/${parts.join(\"/\")}` : \"\";\n }\n async function generatedIdBasedXPath(element) {\n if (isElementNode(element) && element.id) {\n return `//*[@id='${element.id}']`;\n }\n return null;\n }\n\n // lib/dom/utils.ts\n async function waitForDomSettle() {\n return new Promise((resolve) => {\n const createTimeout = () => {\n return setTimeout(() => {\n resolve();\n }, 2e3);\n };\n let timeout = createTimeout();\n const observer = new MutationObserver(() => {\n clearTimeout(timeout);\n timeout = createTimeout();\n });\n observer.observe(window.document.body, { childList: true, subtree: true });\n });\n }\n window.waitForDomSettle = waitForDomSettle;\n function calculateViewportHeight() {\n return Math.ceil(window.innerHeight * 0.75);\n }\n\n // lib/dom/process.ts\n function isElementNode(node) {\n return node.nodeType === Node.ELEMENT_NODE;\n }\n function isTextNode(node) {\n return node.nodeType === Node.TEXT_NODE && Boolean(node.textContent?.trim());\n }\n async function processDom(chunksSeen) {\n const { chunk, chunksArray } = await pickChunk(chunksSeen);\n const { outputString, selectorMap } = await processElements(chunk);\n console.log(\n `Stagehand (Browser Process): Extracted dom elements:\n${outputString}`\n );\n return {\n outputString,\n selectorMap,\n chunk,\n chunks: chunksArray\n };\n }\n async function processAllOfDom() {\n console.log(\"Stagehand (Browser Process): Processing all of DOM\");\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n let index = 0;\n const results = [];\n for (let chunk = 0; chunk < totalChunks; chunk++) {\n const result = await processElements(chunk, true, index);\n results.push(result);\n index += Object.keys(result.selectorMap).length;\n }\n await scrollToHeight(0);\n const allOutputString = results.map((result) => result.outputString).join(\"\");\n const allSelectorMap = results.reduce(\n (acc, result) => ({ ...acc, ...result.selectorMap }),\n {}\n );\n console.log(\n `Stagehand (Browser Process): All dom elements: ${allOutputString}`\n );\n return {\n outputString: allOutputString,\n selectorMap: allSelectorMap\n };\n }\n async function scrollToHeight(height) {\n window.scrollTo({ top: height, left: 0, behavior: \"smooth\" });\n await new Promise((resolve) => {\n let scrollEndTimer;\n const handleScrollEnd = () => {\n clearTimeout(scrollEndTimer);\n scrollEndTimer = window.setTimeout(() => {\n window.removeEventListener(\"scroll\", handleScrollEnd);\n resolve();\n }, 100);\n };\n window.addEventListener(\"scroll\", handleScrollEnd, { passive: true });\n handleScrollEnd();\n });\n }\n var xpathCache = /* @__PURE__ */ new Map();\n async function processElements(chunk, scrollToChunk = true, indexOffset = 0) {\n console.time(\"processElements:total\");\n const viewportHeight = calculateViewportHeight();\n const chunkHeight = viewportHeight * chunk;\n const maxScrollTop = document.documentElement.scrollHeight - viewportHeight;\n const offsetTop = Math.min(chunkHeight, maxScrollTop);\n if (scrollToChunk) {\n console.time(\"processElements:scroll\");\n await scrollToHeight(offsetTop);\n console.timeEnd(\"processElements:scroll\");\n }\n const candidateElements = [];\n const DOMQueue = [...document.body.childNodes];\n console.log(\"Stagehand (Browser Process): Generating candidate elements\");\n console.time(\"processElements:findCandidates\");\n while (DOMQueue.length > 0) {\n const element = DOMQueue.pop();\n let shouldAddElement = false;\n if (element && isElementNode(element)) {\n const childrenCount = element.childNodes.length;\n for (let i = childrenCount - 1; i >= 0; i--) {\n const child = element.childNodes[i];\n DOMQueue.push(child);\n }\n if (isInteractiveElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n if (isLeafElement(element)) {\n if (isActive(element) && isVisible(element)) {\n shouldAddElement = true;\n }\n }\n }\n if (element && isTextNode(element) && isTextVisible(element)) {\n shouldAddElement = true;\n }\n if (shouldAddElement) {\n candidateElements.push(element);\n }\n }\n console.timeEnd(\"processElements:findCandidates\");\n const selectorMap = {};\n let outputString = \"\";\n console.log(\n `Stagehand (Browser Process): Processing candidate elements: ${candidateElements.length}`\n );\n console.time(\"processElements:processCandidates\");\n console.time(\"processElements:generateXPaths\");\n const xpathLists = await Promise.all(\n candidateElements.map(async (element) => {\n if (xpathCache.has(element)) {\n return xpathCache.get(element);\n }\n const xpaths = await generateXPathsForElement(element);\n xpathCache.set(element, xpaths);\n return xpaths;\n })\n );\n console.timeEnd(\"processElements:generateXPaths\");\n candidateElements.forEach((element, index) => {\n const xpaths = xpathLists[index];\n let elementOutput = \"\";\n if (isTextNode(element)) {\n const textContent = element.textContent?.trim();\n if (textContent) {\n elementOutput += `${index + indexOffset}:${textContent}\n`;\n }\n } else if (isElementNode(element)) {\n const tagName = element.tagName.toLowerCase();\n const attributes = collectEssentialAttributes(element);\n const openingTag = `<${tagName}${attributes ? \" \" + attributes : \"\"}>`;\n const closingTag = `</${tagName}>`;\n const textContent = element.textContent?.trim() || \"\";\n elementOutput += `${index + indexOffset}:${openingTag}${textContent}${closingTag}\n`;\n }\n outputString += elementOutput;\n selectorMap[index + indexOffset] = xpaths;\n });\n console.timeEnd(\"processElements:processCandidates\");\n console.timeEnd(\"processElements:total\");\n return {\n outputString,\n selectorMap\n };\n }\n function collectEssentialAttributes(element) {\n const essentialAttributes = [\n \"id\",\n \"class\",\n \"href\",\n \"src\",\n \"aria-label\",\n \"aria-name\",\n \"aria-role\",\n \"aria-description\",\n \"aria-expanded\",\n \"aria-haspopup\",\n \"type\",\n \"value\"\n ];\n const attrs = essentialAttributes.map((attr) => {\n const value = element.getAttribute(attr);\n return value ? `${attr}=\"${value}\"` : \"\";\n }).filter((attr) => attr !== \"\");\n Array.from(element.attributes).forEach((attr) => {\n if (attr.name.startsWith(\"data-\")) {\n attrs.push(`${attr.name}=\"${attr.value}\"`);\n }\n });\n return attrs.join(\" \");\n }\n window.processDom = processDom;\n window.processAllOfDom = processAllOfDom;\n window.processElements = processElements;\n window.scrollToHeight = scrollToHeight;\n var leafElementDenyList = [\"SVG\", \"IFRAME\", \"SCRIPT\", \"STYLE\", \"LINK\"];\n var interactiveElementTypes = [\n \"A\",\n \"BUTTON\",\n \"DETAILS\",\n \"EMBED\",\n \"INPUT\",\n \"LABEL\",\n \"MENU\",\n \"MENUITEM\",\n \"OBJECT\",\n \"SELECT\",\n \"TEXTAREA\",\n \"SUMMARY\"\n ];\n var interactiveRoles = [\n \"button\",\n \"menu\",\n \"menuitem\",\n \"link\",\n \"checkbox\",\n \"radio\",\n \"slider\",\n \"tab\",\n \"tabpanel\",\n \"textbox\",\n \"combobox\",\n \"grid\",\n \"listbox\",\n \"option\",\n \"progressbar\",\n \"scrollbar\",\n \"searchbox\",\n \"switch\",\n \"tree\",\n \"treeitem\",\n \"spinbutton\",\n \"tooltip\"\n ];\n var interactiveAriaRoles = [\"menu\", \"menuitem\", \"button\"];\n var isVisible = (element) => {\n const rect = element.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n if (!isTopElement(element, rect)) {\n return false;\n }\n const visible = element.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n var isTextVisible = (element) => {\n const range = document.createRange();\n range.selectNodeContents(element);\n const rect = range.getBoundingClientRect();\n if (rect.width === 0 || rect.height === 0 || rect.top < 0 || rect.top > window.innerHeight) {\n return false;\n }\n const parent = element.parentElement;\n if (!parent) {\n return false;\n }\n if (!isTopElement(parent, rect)) {\n return false;\n }\n const visible = parent.checkVisibility({\n checkOpacity: true,\n checkVisibilityCSS: true\n });\n return visible;\n };\n function isTopElement(elem, rect) {\n const points = [\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.25 },\n { x: rect.left + rect.width * 0.25, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width * 0.75, y: rect.top + rect.height * 0.75 },\n { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }\n ];\n return points.some((point) => {\n const topEl = document.elementFromPoint(point.x, point.y);\n let current = topEl;\n while (current && current !== document.body) {\n if (current.isSameNode(elem)) {\n return true;\n }\n current = current.parentElement;\n }\n return false;\n });\n }\n var isActive = (element) => {\n if (element.hasAttribute(\"disabled\") || element.hasAttribute(\"hidden\") || element.getAttribute(\"aria-disabled\") === \"true\") {\n return false;\n }\n return true;\n };\n var isInteractiveElement = (element) => {\n const elementType = element.tagName;\n const elementRole = element.getAttribute(\"role\");\n const elementAriaRole = element.getAttribute(\"aria-role\");\n return elementType && interactiveElementTypes.includes(elementType) || elementRole && interactiveRoles.includes(elementRole) || elementAriaRole && interactiveAriaRoles.includes(elementAriaRole);\n };\n var isLeafElement = (element) => {\n if (element.textContent === \"\") {\n return false;\n }\n if (element.childNodes.length === 0) {\n return !leafElementDenyList.includes(element.tagName);\n }\n if (element.childNodes.length === 1 && isTextNode(element.childNodes[0])) {\n return true;\n }\n return false;\n };\n async function pickChunk(chunksSeen) {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const chunks = Math.ceil(documentHeight / viewportHeight);\n const chunksArray = Array.from({ length: chunks }, (_, i) => i);\n const chunksRemaining = chunksArray.filter((chunk2) => {\n return !chunksSeen.includes(chunk2);\n });\n const currentScrollPosition = window.scrollY;\n const closestChunk = chunksRemaining.reduce((closest, current) => {\n const currentChunkTop = viewportHeight * current;\n const closestChunkTop = viewportHeight * closest;\n return Math.abs(currentScrollPosition - currentChunkTop) < Math.abs(currentScrollPosition - closestChunkTop) ? current : closest;\n }, chunksRemaining[0]);\n const chunk = closestChunk;\n if (chunk === void 0) {\n throw new Error(`No chunks remaining to check: ${chunksRemaining}`);\n }\n return {\n chunk,\n chunksArray\n };\n }\n\n // lib/dom/debug.ts\n async function debugDom() {\n window.chunkNumber = 0;\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n }\n function multiSelectorMapToSelectorMap(multiSelectorMap) {\n return Object.fromEntries(\n Object.entries(multiSelectorMap).map(([key, selectors]) => [\n Number(key),\n selectors[0]\n ])\n );\n }\n function drawChunk(selectorMap) {\n if (!window.showChunks) return;\n cleanupMarkers();\n Object.values(selectorMap).forEach((selector) => {\n const element = document.evaluate(\n selector,\n document,\n null,\n XPathResult.FIRST_ORDERED_NODE_TYPE,\n null\n ).singleNodeValue;\n if (element) {\n let rect;\n if (element.nodeType === Node.ELEMENT_NODE) {\n rect = element.getBoundingClientRect();\n } else {\n const range = document.createRange();\n range.selectNodeContents(element);\n rect = range.getBoundingClientRect();\n }\n const color = \"grey\";\n const overlay = document.createElement(\"div\");\n overlay.style.position = \"absolute\";\n overlay.style.left = `${rect.left + window.scrollX}px`;\n overlay.style.top = `${rect.top + window.scrollY}px`;\n overlay.style.padding = \"2px\";\n overlay.style.width = `${rect.width}px`;\n overlay.style.height = `${rect.height}px`;\n overlay.style.backgroundColor = color;\n overlay.className = \"stagehand-marker\";\n overlay.style.opacity = \"0.3\";\n overlay.style.zIndex = \"1000000000\";\n overlay.style.border = \"1px solid\";\n overlay.style.pointerEvents = \"none\";\n document.body.appendChild(overlay);\n }\n });\n }\n async function cleanupDebug() {\n cleanupMarkers();\n cleanupNav();\n }\n function cleanupMarkers() {\n const markers = document.querySelectorAll(\".stagehand-marker\");\n markers.forEach((marker) => {\n marker.remove();\n });\n }\n function cleanupNav() {\n const stagehandNavElements = document.querySelectorAll(\".stagehand-nav\");\n stagehandNavElements.forEach((element) => {\n element.remove();\n });\n }\n function setupChunkNav() {\n const viewportHeight = calculateViewportHeight();\n const documentHeight = document.documentElement.scrollHeight;\n const totalChunks = Math.ceil(documentHeight / viewportHeight);\n if (window.chunkNumber > 0) {\n const prevChunkButton = document.createElement(\"button\");\n prevChunkButton.className = \"stagehand-nav\";\n prevChunkButton.textContent = \"Previous\";\n prevChunkButton.style.marginLeft = \"50px\";\n prevChunkButton.style.position = \"fixed\";\n prevChunkButton.style.bottom = \"10px\";\n prevChunkButton.style.left = \"50%\";\n prevChunkButton.style.transform = \"translateX(-50%)\";\n prevChunkButton.style.zIndex = \"1000000000\";\n prevChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber -= 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(prevChunkButton);\n }\n if (totalChunks > window.chunkNumber) {\n const nextChunkButton = document.createElement(\"button\");\n nextChunkButton.className = \"stagehand-nav\";\n nextChunkButton.textContent = \"Next\";\n nextChunkButton.style.marginRight = \"50px\";\n nextChunkButton.style.position = \"fixed\";\n nextChunkButton.style.bottom = \"10px\";\n nextChunkButton.style.right = \"50%\";\n nextChunkButton.style.transform = \"translateX(50%)\";\n nextChunkButton.style.zIndex = \"1000000000\";\n nextChunkButton.onclick = async () => {\n cleanupMarkers();\n cleanupNav();\n window.chunkNumber += 1;\n window.scrollTo(0, window.chunkNumber * viewportHeight);\n await window.waitForDomSettle();\n const { selectorMap: multiSelectorMap } = await window.processElements(\n window.chunkNumber\n );\n const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap);\n drawChunk(selectorMap);\n setupChunkNav();\n };\n document.body.appendChild(nextChunkButton);\n }\n }\n window.debugDom = debugDom;\n window.cleanupDebug = cleanupDebug;\n})();\n"; |
@@ -6,4 +6,5 @@ import { calculateViewportHeight } from "./utils"; | ||
const { selectorMap: multiSelectorMap, outputString } = | ||
await window.processElements(window.chunkNumber); | ||
const { selectorMap: multiSelectorMap } = await window.processElements( | ||
window.chunkNumber, | ||
); | ||
@@ -30,3 +31,3 @@ const selectorMap = multiSelectorMapToSelectorMap(multiSelectorMap); | ||
cleanupMarkers(); | ||
Object.entries(selectorMap).forEach(([_index, selector]) => { | ||
Object.values(selectorMap).forEach((selector) => { | ||
const element = document.evaluate( | ||
@@ -33,0 +34,0 @@ selector as string, |
@@ -24,3 +24,6 @@ export {}; | ||
waitForDomSettle: () => Promise<void>; | ||
__playwright?: unknown; | ||
__pw_manual?: unknown; | ||
__PW_inspect?: unknown; | ||
} | ||
} |
@@ -95,4 +95,3 @@ import { generateXPathsForElement as generateXPaths } from "./xpathUtils"; | ||
// Calculate the maximum scrollable offset | ||
const maxScrollTop = | ||
document.documentElement.scrollHeight - viewportHeight; | ||
const maxScrollTop = document.documentElement.scrollHeight - viewportHeight; | ||
@@ -99,0 +98,0 @@ // Adjust the offsetTop to not exceed the maximum scrollable offset |
@@ -21,2 +21,2 @@ export async function waitForDomSettle() { | ||
return Math.ceil(window.innerHeight * 0.75); | ||
} | ||
} |
@@ -1,9 +0,2 @@ | ||
import { Stagehand } from "../index"; | ||
import { LLMProvider } from "../llm/LLMProvider"; | ||
import { ScreenshotService } from "../vision"; | ||
import { verifyActCompletion, act, fillInVariables } from "../inference"; | ||
import { Locator, Page } from "@playwright/test"; | ||
import { ActionCache } from "../cache/ActionCache"; | ||
import { LLMClient, modelsWithVision } from "../llm/LLMClient"; | ||
import { generateId } from "../utils"; | ||
import { LogLine } from "../../types/log"; | ||
@@ -14,2 +7,9 @@ import { | ||
} from "../../types/playwright"; | ||
import { ActionCache } from "../cache/ActionCache"; | ||
import { Stagehand } from "../index"; | ||
import { act, fillInVariables, verifyActCompletion } from "../inference"; | ||
import { LLMClient } from "../llm/LLMClient"; | ||
import { LLMProvider } from "../llm/LLMProvider"; | ||
import { generateId } from "../utils"; | ||
import { ScreenshotService } from "../vision"; | ||
@@ -26,3 +26,2 @@ export class StagehandActHandler { | ||
private readonly actionCache: ActionCache | undefined; | ||
private readonly llmClient: LLMClient; | ||
private readonly startDomDebug: () => Promise<void>; | ||
@@ -39,3 +38,2 @@ private readonly cleanupDomDebug: () => Promise<void>; | ||
waitForSettledDom, | ||
llmClient, | ||
startDomDebug, | ||
@@ -61,3 +59,2 @@ cleanupDomDebug, | ||
this.actionCache = enableCaching ? new ActionCache(this.logger) : undefined; | ||
this.llmClient = llmClient; | ||
this.startDomDebug = startDomDebug; | ||
@@ -194,3 +191,3 @@ this.cleanupDomDebug = cleanupDomDebug; | ||
method: string, | ||
args: string[], | ||
args: unknown[], | ||
xpath: string, | ||
@@ -215,3 +212,3 @@ domSettleTimeoutMs?: number, | ||
await locator | ||
.evaluate((element: any) => { | ||
.evaluate((element: HTMLElement) => { | ||
element.scrollIntoView({ behavior: "smooth", block: "center" }); | ||
@@ -267,3 +264,3 @@ }) | ||
await locator.click(); | ||
const text = args[0]; | ||
const text = args[0]?.toString(); | ||
for (const char of text) { | ||
@@ -299,3 +296,3 @@ await this.stagehand.page.keyboard.type(char, { | ||
try { | ||
const key = args[0]; | ||
const key = args[0]?.toString(); | ||
await this.stagehand.page.keyboard.press(key); | ||
@@ -341,4 +338,7 @@ } catch (e) { | ||
try { | ||
// @ts-ignore | ||
await locator[method](...args); | ||
await ( | ||
locator[method as keyof Locator] as unknown as ( | ||
...args: string[] | ||
) => Promise<void> | ||
)(...args.map((arg) => arg?.toString() || "")); | ||
} catch (e) { | ||
@@ -433,3 +433,3 @@ this.logger({ | ||
new Promise((resolve) => setTimeout(resolve, 5_000)), | ||
]).catch((e: Error) => { | ||
]).catch((e) => { | ||
this.logger({ | ||
@@ -439,2 +439,12 @@ category: "action", | ||
level: 1, | ||
auxiliary: { | ||
trace: { | ||
value: e.stack, | ||
type: "string", | ||
}, | ||
message: { | ||
value: e.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
@@ -631,3 +641,3 @@ }); | ||
// First try to get the value (for input/textarea elements) | ||
let currentComponent = await this._getComponentString(locator); | ||
const currentComponent = await this._getComponentString(locator); | ||
@@ -859,5 +869,4 @@ this.logger({ | ||
steps = steps + cachedStep.newStepString; | ||
const { outputString, selectorMap } = await this.stagehand.page.evaluate( | ||
await this.stagehand.page.evaluate( | ||
({ chunksSeen }: { chunksSeen: number[] }) => { | ||
// @ts-ignore | ||
return window.processDom(chunksSeen); | ||
@@ -870,3 +879,3 @@ }, | ||
// Verify the action was completed successfully | ||
let actionCompleted = await this._verifyActionCompletion({ | ||
const actionCompleted = await this._verifyActionCompletion({ | ||
completed: true, | ||
@@ -1050,3 +1059,2 @@ verifierUseVision, | ||
({ chunksSeen }: { chunksSeen: number[] }) => { | ||
// @ts-ignore | ||
return window.processDom(chunksSeen); | ||
@@ -1283,3 +1291,3 @@ }, | ||
method, | ||
args: responseArgs, | ||
args: responseArgs.map((arg) => arg?.toString() || ""), | ||
}, | ||
@@ -1286,0 +1294,0 @@ componentString, |
@@ -58,3 +58,2 @@ import { LLMProvider } from "../llm/LLMProvider"; | ||
schema, | ||
progress = "", | ||
content = {}, | ||
@@ -68,3 +67,2 @@ chunksSeen = [], | ||
schema: T; | ||
progress?: string; | ||
content?: z.infer<T>; | ||
@@ -116,3 +114,2 @@ chunksSeen?: Array<number>; | ||
instruction, | ||
progress, | ||
previouslyExtractedContent: content, | ||
@@ -128,3 +125,3 @@ domElements: outputString, | ||
const { | ||
metadata: { progress: newProgress, completed }, | ||
metadata: { completed }, | ||
...output | ||
@@ -175,3 +172,2 @@ } = extractionResponse; | ||
schema, | ||
progress: newProgress, | ||
content: output, | ||
@@ -178,0 +174,0 @@ chunksSeen, |
@@ -100,3 +100,3 @@ import { LogLine } from "../../types/log"; | ||
await this.startDomDebug(); | ||
let { outputString, selectorMap } = await this.stagehand.page.evaluate( | ||
const evalResult = await this.stagehand.page.evaluate( | ||
(fullPage: boolean) => | ||
@@ -107,2 +107,6 @@ fullPage ? window.processAllOfDom() : window.processDom([]), | ||
const { selectorMap } = evalResult; | ||
// has to be like this atm because of the re-assignment | ||
let { outputString } = evalResult; | ||
let annotatedScreenshot: Buffer | undefined; | ||
@@ -109,0 +113,0 @@ if (useVision === true) { |
import { Browserbase } from "@browserbasehq/sdk"; | ||
import { type BrowserContext, chromium, type Page } from "@playwright/test"; | ||
import { randomUUID } from "crypto"; | ||
import dotenv from "dotenv"; | ||
import fs from "fs"; | ||
@@ -10,2 +11,3 @@ import os from "os"; | ||
import { LogLine } from "../types/log"; | ||
import { GotoOptions } from "../types/playwright"; | ||
import { | ||
@@ -32,3 +34,3 @@ ActOptions, | ||
require("dotenv").config({ path: ".env" }); | ||
dotenv.config({ path: ".env" }); | ||
@@ -276,5 +278,5 @@ const DEFAULT_MODEL_NAME = "gpt-4o"; | ||
// Remove Playwright-specific properties | ||
delete (window as any).__playwright; | ||
delete (window as any).__pw_manual; | ||
delete (window as any).__PW_inspect; | ||
delete window.__playwright; | ||
delete window.__pw_manual; | ||
delete window.__PW_inspect; | ||
@@ -288,3 +290,3 @@ // Redefine the headless property | ||
const originalQuery = window.navigator.permissions.query; | ||
window.navigator.permissions.query = (parameters: any) => | ||
window.navigator.permissions.query = (parameters) => | ||
parameters.name === "notifications" | ||
@@ -314,3 +316,3 @@ ? Promise.resolve({ | ||
private enableCaching: boolean; | ||
private variables: { [key: string]: any }; | ||
private variables: { [key: string]: unknown }; | ||
private browserbaseResumeSessionID?: string; | ||
@@ -383,7 +385,8 @@ private contextPath?: string; | ||
console.error("Error in init:", e); | ||
return { | ||
const br: BrowserResult = { | ||
context: undefined, | ||
debugUrl: undefined, | ||
sessionUrl: undefined, | ||
} as BrowserResult; | ||
}; | ||
return br; | ||
}); | ||
@@ -400,3 +403,3 @@ this.contextPath = contextPath; | ||
const originalGoto = this.page.goto.bind(this.page); | ||
this.page.goto = async (url: string, options?: any) => { | ||
this.page.goto = async (url: string, options: GotoOptions) => { | ||
const result = await originalGoto(url, options); | ||
@@ -470,3 +473,3 @@ if (this.debugDom) { | ||
const originalGoto = this.page.goto.bind(this.page); | ||
this.page.goto = async (url: string, options?: any) => { | ||
this.page.goto = async (url: string, options?: GotoOptions) => { | ||
const result = await originalGoto(url, options); | ||
@@ -555,6 +558,21 @@ if (this.debugDom) { | ||
}) | ||
.catch((e) => { | ||
.catch(() => { | ||
// NAVIDTODO: Rerun the log call on the new page | ||
// This is expected to happen when the user is changing pages | ||
// console.error("Logging Error:", e); | ||
// this.log({ | ||
// category: "browserbase", | ||
// message: "error logging to browserbase", | ||
// level: 1, | ||
// auxiliary: { | ||
// trace: { | ||
// value: e.stack, | ||
// type: "string", | ||
// }, | ||
// message: { | ||
// value: e.message, | ||
// type: "string", | ||
// }, | ||
// }, | ||
// }); | ||
}); | ||
@@ -569,3 +587,3 @@ } | ||
const timeoutPromise = new Promise<void>((resolve, reject) => { | ||
const timeoutPromise = new Promise<void>((resolve) => { | ||
timeoutHandle = setTimeout(() => { | ||
@@ -572,0 +590,0 @@ this.log({ |
@@ -36,9 +36,13 @@ import { | ||
}: VerifyActCompletionParams): Promise<boolean> { | ||
const messages: ChatMessage[] = [ | ||
buildVerifyActCompletionSystemPrompt(), | ||
buildVerifyActCompletionUserPrompt(goal, steps, domElements), | ||
]; | ||
const verificationSchema = z.object({ | ||
completed: z.boolean().describe("true if the goal is accomplished"), | ||
}); | ||
const response = await llmClient.createChatCompletion({ | ||
messages, | ||
type VerificationResponse = z.infer<typeof verificationSchema>; | ||
const response = await llmClient.createChatCompletion<VerificationResponse>({ | ||
messages: [ | ||
buildVerifyActCompletionSystemPrompt(), | ||
buildVerifyActCompletionUserPrompt(goal, steps, domElements), | ||
], | ||
temperature: 0.1, | ||
@@ -56,5 +60,3 @@ top_p: 1, | ||
name: "Verification", | ||
schema: z.object({ | ||
completed: z.boolean().describe("true if the goal is accomplished"), | ||
}), | ||
schema: verificationSchema, | ||
}, | ||
@@ -156,3 +158,2 @@ requestId, | ||
instruction, | ||
progress, | ||
previouslyExtractedContent, | ||
@@ -167,6 +168,5 @@ domElements, | ||
instruction: string; | ||
progress: string; | ||
previouslyExtractedContent: any; | ||
previouslyExtractedContent: object; | ||
domElements: string; | ||
schema: z.ZodObject<any>; | ||
schema: z.ZodObject<z.ZodRawShape>; | ||
llmClient: LLMClient; | ||
@@ -177,38 +177,43 @@ chunksSeen: number; | ||
}) { | ||
const extractionResponse = await llmClient.createChatCompletion({ | ||
messages: [ | ||
buildExtractSystemPrompt(), | ||
buildExtractUserPrompt(instruction, domElements), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "Extraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
type ExtractionResponse = z.infer<typeof schema>; | ||
type MetadataResponse = z.infer<typeof metadataSchema>; | ||
const refinedResponse = await llmClient.createChatCompletion({ | ||
messages: [ | ||
buildRefineSystemPrompt(), | ||
buildRefineUserPrompt( | ||
instruction, | ||
previouslyExtractedContent, | ||
extractionResponse, | ||
), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "RefinedExtraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
const extractionResponse = | ||
await llmClient.createChatCompletion<ExtractionResponse>({ | ||
messages: [ | ||
buildExtractSystemPrompt(), | ||
buildExtractUserPrompt(instruction, domElements), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "Extraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
const refinedResponse = | ||
await llmClient.createChatCompletion<ExtractionResponse>({ | ||
messages: [ | ||
buildRefineSystemPrompt(), | ||
buildRefineUserPrompt( | ||
instruction, | ||
previouslyExtractedContent, | ||
extractionResponse, | ||
), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "RefinedExtraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
const metadataSchema = z.object({ | ||
@@ -227,26 +232,28 @@ progress: z | ||
const metadataResponse = await llmClient.createChatCompletion({ | ||
messages: [ | ||
buildMetadataSystemPrompt(), | ||
buildMetadataPrompt( | ||
instruction, | ||
refinedResponse, | ||
chunksSeen, | ||
chunksTotal, | ||
), | ||
], | ||
response_model: { | ||
name: "Metadata", | ||
schema: metadataSchema, | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
const metadataResponse = | ||
await llmClient.createChatCompletion<MetadataResponse>({ | ||
messages: [ | ||
buildMetadataSystemPrompt(), | ||
buildMetadataPrompt( | ||
instruction, | ||
refinedResponse, | ||
chunksSeen, | ||
chunksTotal, | ||
), | ||
], | ||
response_model: { | ||
name: "Metadata", | ||
schema: metadataSchema, | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
refinedResponse.metadata = metadataResponse; | ||
return refinedResponse; | ||
return { | ||
...refinedResponse, | ||
metadata: metadataResponse, | ||
}; | ||
} | ||
@@ -284,26 +291,33 @@ | ||
const observationResponse = await llmClient.createChatCompletion({ | ||
messages: [ | ||
buildObserveSystemPrompt(), | ||
buildObserveUserMessage(instruction, domElements), | ||
], | ||
image: image | ||
? { buffer: image, description: AnnotatedScreenshotText } | ||
: undefined, | ||
response_model: { | ||
schema: observeSchema, | ||
name: "Observation", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
type ObserveResponse = z.infer<typeof observeSchema>; | ||
if (!observationResponse) { | ||
throw new Error("no response when finding a selector"); | ||
} | ||
const observationResponse = | ||
await llmClient.createChatCompletion<ObserveResponse>({ | ||
messages: [ | ||
buildObserveSystemPrompt(), | ||
buildObserveUserMessage(instruction, domElements), | ||
], | ||
image: image | ||
? { buffer: image, description: AnnotatedScreenshotText } | ||
: undefined, | ||
response_model: { | ||
schema: observeSchema, | ||
name: "Observation", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
return observationResponse; | ||
const parsedResponse = { | ||
elements: | ||
observationResponse.elements?.map((el) => ({ | ||
elementId: Number(el.elementId), | ||
description: String(el.description), | ||
})) ?? [], | ||
} satisfies { elements: { elementId: number; description: string }[] }; | ||
return parsedResponse; | ||
} | ||
@@ -310,0 +324,0 @@ |
import Anthropic, { ClientOptions } from "@anthropic-ai/sdk"; | ||
import { Message, MessageCreateParams } from "@anthropic-ai/sdk/resources"; | ||
import { | ||
ImageBlockParam, | ||
MessageParam, | ||
TextBlockParam, | ||
Tool, | ||
} from "@anthropic-ai/sdk/resources"; | ||
import { zodToJsonSchema } from "zod-to-json-schema"; | ||
import { LogLine } from "../../types/log"; | ||
import { AvailableModel } from "../../types/model"; | ||
import { | ||
AnthropicJsonSchemaObject, | ||
AnthropicTransformedResponse, | ||
AvailableModel, | ||
} from "../../types/model"; | ||
import { LLMCache } from "../cache/LLMCache"; | ||
@@ -30,7 +39,8 @@ import { ChatCompletionOptions, LLMClient } from "./LLMClient"; | ||
async createChatCompletion( | ||
async createChatCompletion<T = AnthropicTransformedResponse>( | ||
options: ChatCompletionOptions & { retries?: number }, | ||
): Promise<any> { | ||
// TODO (kamath): remove this forced typecast | ||
const { image: _, ...optionsWithoutImage } = options; | ||
): Promise<T> { | ||
const optionsWithoutImage = { ...options }; | ||
delete optionsWithoutImage.image; | ||
this.logger({ | ||
@@ -59,3 +69,3 @@ category: "anthropic", | ||
if (this.enableCaching) { | ||
const cachedResponse = await this.cache.get( | ||
const cachedResponse = await this.cache.get<T>( | ||
cacheOptions, | ||
@@ -84,3 +94,3 @@ options.requestId, | ||
}); | ||
return cachedResponse; | ||
return cachedResponse as T; | ||
} else { | ||
@@ -105,3 +115,13 @@ this.logger({ | ||
const systemMessage = options.messages.find((msg) => msg.role === "system"); | ||
const systemMessage = options.messages.find((msg) => { | ||
if (msg.role === "system") { | ||
if (typeof msg.content === "string") { | ||
return true; | ||
} else if (Array.isArray(msg.content)) { | ||
return msg.content.every((content) => content.type !== "image_url"); | ||
} | ||
} | ||
return false; | ||
}); | ||
const userMessages = options.messages.filter( | ||
@@ -111,4 +131,33 @@ (msg) => msg.role !== "system", | ||
const formattedMessages: MessageParam[] = userMessages.map((msg) => { | ||
if (typeof msg.content === "string") { | ||
return { | ||
role: msg.role as "user" | "assistant", // ensure its not checking for system types | ||
content: msg.content, | ||
}; | ||
} else { | ||
return { | ||
role: msg.role as "user" | "assistant", | ||
content: msg.content.map((content) => { | ||
if ("image_url" in content) { | ||
const formattedContent: ImageBlockParam = { | ||
type: "image", | ||
source: { | ||
type: "base64", | ||
media_type: "image/jpeg", | ||
data: content.image_url.url, | ||
}, | ||
}; | ||
return formattedContent; | ||
} else { | ||
return { type: "text", text: content.text }; | ||
} | ||
}), | ||
}; | ||
} | ||
}); | ||
if (options.image) { | ||
const screenshotMessage: any = { | ||
const screenshotMessage: MessageParam = { | ||
role: "user", | ||
@@ -124,13 +173,19 @@ content: [ | ||
}, | ||
...(options.image.description | ||
? [{ type: "text", text: options.image.description }] | ||
: []), | ||
], | ||
}; | ||
options.messages = [...options.messages, screenshotMessage]; | ||
if ( | ||
options.image.description && | ||
Array.isArray(screenshotMessage.content) | ||
) { | ||
screenshotMessage.content.push({ | ||
type: "text", | ||
text: options.image.description, | ||
}); | ||
} | ||
formattedMessages.push(screenshotMessage); | ||
} | ||
// Transform tools to Anthropic's format | ||
let anthropicTools = options.tools?.map((tool: any) => { | ||
let anthropicTools: Tool[] = options.tools?.map((tool) => { | ||
if (tool.type === "function") { | ||
@@ -147,22 +202,10 @@ return { | ||
} | ||
return tool; | ||
}); | ||
let toolDefinition; | ||
let toolDefinition: Tool | undefined; | ||
if (options.response_model) { | ||
const jsonSchema = zodToJsonSchema(options.response_model.schema); | ||
const { properties: schemaProperties, required: schemaRequired } = | ||
extractSchemaProperties(jsonSchema); | ||
// Extract the actual schema properties | ||
// TODO (kamath): fix this forced typecast | ||
const schemaProperties = | ||
( | ||
jsonSchema.definitions?.MySchema as { | ||
properties?: Record<string, any>; | ||
} | ||
)?.properties || | ||
(jsonSchema as { properties?: Record<string, any> }).properties; | ||
const schemaRequired = | ||
(jsonSchema.definitions?.MySchema as { required?: string[] }) | ||
?.required || (jsonSchema as { required?: string[] }).required; | ||
toolDefinition = { | ||
@@ -184,13 +227,12 @@ name: "print_extracted_data", | ||
const response = (await this.client.messages.create({ | ||
const response = await this.client.messages.create({ | ||
model: this.modelName, | ||
max_tokens: options.maxTokens || 3000, | ||
messages: userMessages.map((msg) => ({ | ||
role: msg.role, | ||
content: msg.content, | ||
})), | ||
max_tokens: options.maxTokens || 1500, | ||
messages: formattedMessages, | ||
tools: anthropicTools, | ||
system: systemMessage?.content, | ||
system: systemMessage | ||
? (systemMessage.content as string | TextBlockParam[]) // we can cast because we already filtered out image content | ||
: undefined, | ||
temperature: options.temperature, | ||
} as MessageCreateParams)) as Message; // TODO (kamath): remove this forced typecast | ||
}); | ||
@@ -213,4 +255,3 @@ this.logger({ | ||
// Parse the response here | ||
const transformedResponse = { | ||
const transformedResponse: AnthropicTransformedResponse = { | ||
id: response.id, | ||
@@ -229,3 +270,3 @@ object: "chat.completion", | ||
.filter((c) => c.type === "tool_use") | ||
.map((toolUse: any) => ({ | ||
.map((toolUse) => ({ | ||
id: toolUse.id, | ||
@@ -274,3 +315,3 @@ type: "function", | ||
return result; | ||
return result as T; // anthropic returns this as `unknown`, so we need to cast | ||
} else { | ||
@@ -323,4 +364,15 @@ if (!options.retries || options.retries < 5) { | ||
return transformedResponse; | ||
// if the function was called with a response model, it would have returned earlier | ||
// so we can safely cast here to T, which defaults to AnthropicTransformedResponse | ||
return transformedResponse as T; | ||
} | ||
} | ||
const extractSchemaProperties = (jsonSchema: AnthropicJsonSchemaObject) => { | ||
const schemaRoot = jsonSchema.definitions?.MySchema || jsonSchema; | ||
return { | ||
properties: schemaRoot.properties, | ||
required: schemaRoot.required, | ||
}; | ||
}; |
@@ -1,2 +0,11 @@ | ||
import { AvailableModel, ToolCall } from "../../types/model"; | ||
import { ZodType } from "zod"; | ||
import { | ||
AnthropicTransformedResponse, | ||
AvailableModel, | ||
ToolCall, | ||
} from "../../types/model"; | ||
import { | ||
ChatCompletion, | ||
ChatCompletionToolChoiceOption, | ||
} from "openai/resources"; | ||
@@ -47,6 +56,6 @@ export interface ChatMessage { | ||
name: string; | ||
schema: any; | ||
schema: ZodType; | ||
}; | ||
tools?: ToolCall[]; | ||
tool_choice?: string; | ||
tool_choice?: "auto" | ChatCompletionToolChoiceOption; | ||
maxTokens?: number; | ||
@@ -56,2 +65,4 @@ requestId: string; | ||
export type LLMResponse = AnthropicTransformedResponse | ChatCompletion; | ||
export abstract class LLMClient { | ||
@@ -66,4 +77,6 @@ public modelName: AvailableModel; | ||
abstract createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
abstract createChatCompletion<T = LLMResponse>( | ||
options: ChatCompletionOptions, | ||
): Promise<T>; | ||
abstract logger: (message: { category?: string; message: string }) => void; | ||
} |
@@ -1,11 +0,11 @@ | ||
import { OpenAIClient } from "./OpenAIClient"; | ||
import { AnthropicClient } from "./AnthropicClient"; | ||
import { LLMClient } from "./LLMClient"; | ||
import { LLMCache } from "../cache/LLMCache"; | ||
import { LogLine } from "../../types/log"; | ||
import { | ||
AvailableModel, | ||
ClientOptions, | ||
ModelProvider, | ||
ClientOptions, | ||
} from "../../types/model"; | ||
import { LLMCache } from "../cache/LLMCache"; | ||
import { AnthropicClient } from "./AnthropicClient"; | ||
import { LLMClient } from "./LLMClient"; | ||
import { OpenAIClient } from "./OpenAIClient"; | ||
@@ -12,0 +12,0 @@ export class LLMProvider { |
import OpenAI, { ClientOptions } from "openai"; | ||
import { zodResponseFormat } from "openai/helpers/zod"; | ||
import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; | ||
import { | ||
ChatCompletion, | ||
ChatCompletionAssistantMessageParam, | ||
ChatCompletionContentPartImage, | ||
ChatCompletionContentPartText, | ||
ChatCompletionCreateParamsNonStreaming, | ||
ChatCompletionMessageParam, | ||
ChatCompletionSystemMessageParam, | ||
ChatCompletionUserMessageParam, | ||
} from "openai/resources/chat"; | ||
import { LogLine } from "../../types/log"; | ||
@@ -31,4 +40,6 @@ import { AvailableModel } from "../../types/model"; | ||
async createChatCompletion(options: ChatCompletionOptions) { | ||
const { image: _, ...optionsWithoutImage } = options; | ||
async createChatCompletion<T = ChatCompletion>( | ||
options: ChatCompletionOptions, | ||
): Promise<T> { | ||
const { image, requestId, ...optionsWithoutImageAndRequestId } = options; | ||
this.logger({ | ||
@@ -40,3 +51,6 @@ category: "openai", | ||
options: { | ||
value: JSON.stringify(optionsWithoutImage), | ||
value: JSON.stringify({ | ||
...optionsWithoutImageAndRequestId, | ||
requestId, | ||
}), | ||
type: "object", | ||
@@ -57,3 +71,3 @@ }, | ||
presence_penalty: options.presence_penalty, | ||
image: options.image, | ||
image: image, | ||
response_model: options.response_model, | ||
@@ -63,7 +77,6 @@ }; | ||
if (this.enableCaching) { | ||
const cachedResponse = await this.cache.get( | ||
const cachedResponse = await this.cache.get<T>( | ||
cacheOptions, | ||
options.requestId, | ||
); | ||
if (cachedResponse) { | ||
@@ -117,7 +130,7 @@ this.logger({ | ||
options.messages = [...options.messages, screenshotMessage]; | ||
options.messages.push(screenshotMessage); | ||
} | ||
const { image, response_model, requestId, ...openAiOptions } = { | ||
...options, | ||
const { response_model, ...openAiOptions } = { | ||
...optionsWithoutImageAndRequestId, | ||
model: this.modelName, | ||
@@ -146,7 +159,72 @@ }; | ||
const response = await this.client.chat.completions.create({ | ||
const formattedMessages: ChatCompletionMessageParam[] = | ||
options.messages.map((message) => { | ||
if (Array.isArray(message.content)) { | ||
const contentParts = message.content.map((content) => { | ||
if ("image_url" in content) { | ||
const imageContent: ChatCompletionContentPartImage = { | ||
image_url: { | ||
url: content.image_url.url, | ||
}, | ||
type: "image_url", | ||
}; | ||
return imageContent; | ||
} else { | ||
const textContent: ChatCompletionContentPartText = { | ||
text: content.text, | ||
type: "text", | ||
}; | ||
return textContent; | ||
} | ||
}); | ||
if (message.role === "system") { | ||
const formattedMessage: ChatCompletionSystemMessageParam = { | ||
...message, | ||
role: "system", | ||
content: contentParts.filter( | ||
(content): content is ChatCompletionContentPartText => | ||
content.type === "text", | ||
), | ||
}; | ||
return formattedMessage; | ||
} else if (message.role === "user") { | ||
const formattedMessage: ChatCompletionUserMessageParam = { | ||
...message, | ||
role: "user", | ||
content: contentParts, | ||
}; | ||
return formattedMessage; | ||
} else { | ||
const formattedMessage: ChatCompletionAssistantMessageParam = { | ||
...message, | ||
role: "assistant", | ||
content: contentParts.filter( | ||
(content): content is ChatCompletionContentPartText => | ||
content.type === "text", | ||
), | ||
}; | ||
return formattedMessage; | ||
} | ||
} | ||
const formattedMessage: ChatCompletionUserMessageParam = { | ||
role: "user", | ||
content: message.content, | ||
}; | ||
return formattedMessage; | ||
}); | ||
const body: ChatCompletionCreateParamsNonStreaming = { | ||
...openAiOptions, | ||
model: this.modelName, | ||
messages: formattedMessages, | ||
response_format: responseFormat, | ||
} as unknown as ChatCompletionCreateParamsNonStreaming); // TODO (kamath): remove this forced typecast | ||
stream: false, | ||
tools: options.tools?.filter((tool) => "function" in tool), // ensure only OpenAI tools are used | ||
}; | ||
const response = await this.client.chat.completions.create(body); | ||
this.logger({ | ||
@@ -182,5 +260,3 @@ category: "openai", | ||
return { | ||
...parsedData, | ||
}; | ||
return parsedData; | ||
} | ||
@@ -211,4 +287,6 @@ | ||
return response; | ||
// if the function was called with a response model, it would have returned earlier | ||
// so we can safely cast here to T, which defaults to ChatCompletion | ||
return response as T; | ||
} | ||
} |
@@ -125,4 +125,4 @@ import OpenAI from "openai"; | ||
# Variables | ||
${Object.entries(variables) | ||
.map(([key, value]) => `<|${key.toUpperCase()}|>`) | ||
${Object.keys(variables) | ||
.map((key) => `<|${key.toUpperCase()}|>`) | ||
.join("\n")} | ||
@@ -129,0 +129,0 @@ `; |
@@ -165,4 +165,4 @@ import { Page } from "@playwright/test"; | ||
// Try each selector until one works | ||
const selectorPromises: Promise<any | null>[] = selectors.map( | ||
async (selector) => { | ||
const selectorPromises: Promise<Omit<AnnotationBox, "id"> | null>[] = | ||
selectors.map(async (selector) => { | ||
try { | ||
@@ -172,7 +172,6 @@ element = await this.page.locator(`xpath=${selector}`).first(); | ||
return box; | ||
} catch (e) { | ||
} catch { | ||
return null; | ||
} | ||
}, | ||
); | ||
}); | ||
@@ -240,3 +239,3 @@ const boxes = await Promise.all(selectorPromises); | ||
const circleRadius = 12; | ||
let position: NumberPosition = { | ||
const position: NumberPosition = { | ||
x: box.x - circleRadius, | ||
@@ -243,0 +242,0 @@ y: box.y - circleRadius, |
{ | ||
"name": "@browserbasehq/stagehand", | ||
"version": "1.5.1-alpha-85483fe091544fc079015c62b6923b03f8b9caa7", | ||
"version": "1.6.0-alpha-474217cfaff8e68614212b66baa62d35493fd2ce", | ||
"description": "An AI web browsing framework focused on simplicity and extensibility.", | ||
@@ -13,2 +13,5 @@ "main": "./dist/index.js", | ||
"format": "prettier --write .", | ||
"prettier": "prettier --check .", | ||
"prettier:fix": "prettier --write .", | ||
"eslint": "eslint .", | ||
"cache:clear": "rm -rf .cache", | ||
@@ -19,3 +22,4 @@ "evals": "npm run build-dom-scripts && tsx evals/index.eval.ts", | ||
"build-js": "tsup lib/index.ts --dts", | ||
"build": "npm run build-dom-scripts && npm run build-js && npm run build-types", | ||
"build": "npm run lint && npm run build-dom-scripts && npm run build-js && npm run build-types", | ||
"lint": "npm run prettier && npm run eslint", | ||
"release": "npm run build && changeset publish", | ||
@@ -34,2 +38,3 @@ "release-canary": "npm run build && changeset version --snapshot && changeset publish --tag alpha" | ||
"@changesets/cli": "^2.27.9", | ||
"@eslint/js": "^9.16.0", | ||
"@types/cheerio": "^0.22.35", | ||
@@ -43,3 +48,5 @@ "@types/express": "^4.17.21", | ||
"esbuild": "^0.21.4", | ||
"eslint": "^9.16.0", | ||
"express": "^4.21.0", | ||
"globals": "^15.13.0", | ||
"multer": "^1.4.5-lts.1", | ||
@@ -49,3 +56,4 @@ "prettier": "^3.2.5", | ||
"tsx": "^4.10.5", | ||
"typescript": "^5.2.2" | ||
"typescript": "^5.2.2", | ||
"typescript-eslint": "^8.17.0" | ||
}, | ||
@@ -52,0 +60,0 @@ "peerDependencies": { |
@@ -37,3 +37,3 @@ <div id="toc" align="center"> | ||
> [!NOTE] | ||
> [!NOTE] | ||
> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. | ||
@@ -277,3 +277,3 @@ | ||
> [!NOTE] | ||
> [!NOTE] | ||
> `observe()` currently only evaluates the first chunk in the page. | ||
@@ -280,0 +280,0 @@ |
Sorry, the diff of this file is too big to display
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
456983
112
11929
20