@browserbasehq/stagehand
Advanced tools
Comparing version 1.6.0-alpha-9605836ee6b8207ed7dc9146e12ced1c78630d59 to 1.6.0-alpha-d6d70570623a718354797ef83aa8489eacc085d1
@@ -22,3 +22,3 @@ import { Page, BrowserContext, Browser } from '@playwright/test'; | ||
type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620"; | ||
type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "o1-mini" | "o1-preview"; | ||
type ModelProvider = "openai" | "anthropic"; | ||
@@ -102,4 +102,6 @@ type ClientOptions = ClientOptions$1 | ClientOptions$2; | ||
declare abstract class LLMClient { | ||
type: "openai" | "anthropic"; | ||
modelName: AvailableModel; | ||
hasVision: boolean; | ||
clientOptions: ClientOptions; | ||
constructor(modelName: AvailableModel); | ||
@@ -106,0 +108,0 @@ abstract createChatCompletion<T = LLMResponse>(options: ChatCompletionOptions): Promise<T>; |
@@ -7,2 +7,3 @@ import { ClientOptions } from "@anthropic-ai/sdk"; | ||
export declare class AnthropicClient extends LLMClient { | ||
type: "anthropic"; | ||
private client; | ||
@@ -12,2 +13,3 @@ private cache; | ||
private enableCaching; | ||
clientOptions: ClientOptions; | ||
constructor(logger: (message: LogLine) => void, enableCaching: boolean, cache: LLMCache | undefined, modelName: AvailableModel, clientOptions?: ClientOptions); | ||
@@ -14,0 +16,0 @@ createChatCompletion<T = AnthropicTransformedResponse>(options: ChatCompletionOptions & { |
import { ZodType } from "zod"; | ||
import { AnthropicTransformedResponse, AvailableModel, ToolCall } from "../../types/model"; | ||
import { AnthropicTransformedResponse, AvailableModel, ClientOptions, ToolCall } from "../../types/model"; | ||
import { ChatCompletion, ChatCompletionToolChoiceOption } from "openai/resources"; | ||
@@ -43,4 +43,6 @@ export interface ChatMessage { | ||
export declare abstract class LLMClient { | ||
type: "openai" | "anthropic"; | ||
modelName: AvailableModel; | ||
hasVision: boolean; | ||
clientOptions: ClientOptions; | ||
constructor(modelName: AvailableModel); | ||
@@ -47,0 +49,0 @@ abstract createChatCompletion<T = LLMResponse>(options: ChatCompletionOptions): Promise<T>; |
@@ -8,2 +8,3 @@ import { ClientOptions } from "openai"; | ||
export declare class OpenAIClient extends LLMClient { | ||
type: "openai"; | ||
private client; | ||
@@ -13,5 +14,5 @@ private cache; | ||
private enableCaching; | ||
private clientOptions; | ||
clientOptions: ClientOptions; | ||
constructor(logger: (message: LogLine) => void, enableCaching: boolean, cache: LLMCache | undefined, modelName: AvailableModel, clientOptions?: ClientOptions); | ||
createChatCompletion<T = ChatCompletion>(options: ChatCompletionOptions): Promise<T>; | ||
createChatCompletion<T = ChatCompletion>(optionsInitial: ChatCompletionOptions, retries?: number): Promise<T>; | ||
} |
@@ -8,4 +8,4 @@ import OpenAI from "openai"; | ||
export declare const actTools: Array<OpenAI.ChatCompletionTool>; | ||
export declare function buildExtractSystemPrompt(): ChatMessage; | ||
export declare function buildExtractUserPrompt(instruction: string, domElements: string): ChatMessage; | ||
export declare function buildExtractSystemPrompt(isUsingPrintExtractedDataTool?: boolean): ChatMessage; | ||
export declare function buildExtractUserPrompt(instruction: string, domElements: string, isUsingPrintExtractedDataTool?: boolean): ChatMessage; | ||
export declare function buildRefineSystemPrompt(): ChatMessage; | ||
@@ -12,0 +12,0 @@ export declare function buildRefineUserPrompt(instruction: string, previouslyExtractedContent: object, newlyExtractedContent: object): ChatMessage; |
import { LogLine } from "../types/log"; | ||
import { z } from "zod"; | ||
export declare function generateId(operation: string): string; | ||
export declare function logLineToString(logLine: LogLine): string; | ||
export declare function validateZodSchema(schema: z.ZodTypeAny, data: unknown): boolean; |
import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk"; | ||
import type { ClientOptions as OpenAIClientOptions } from "openai"; | ||
import { ChatCompletionTool as OpenAITool } from "openai/resources"; | ||
export type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620"; | ||
export type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620" | "o1-mini" | "o1-preview"; | ||
export type ModelProvider = "openai" | "anthropic"; | ||
@@ -6,0 +6,0 @@ export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions; |
@@ -88,2 +88,15 @@ import { Locator, Page } from "@playwright/test"; | ||
// o1 is overkill for this task + this task uses a lot of tokens. So we switch it 4o | ||
let verifyLLmClient = llmClient; | ||
if ( | ||
llmClient.modelName === "o1-mini" || | ||
llmClient.modelName === "o1-preview" || | ||
llmClient.modelName.startsWith("o1-") | ||
) { | ||
verifyLLmClient = this.llmProvider.getClient( | ||
"gpt-4o", | ||
llmClient.clientOptions, | ||
); | ||
} | ||
const { selectorMap } = await this.stagehand.page.evaluate(() => { | ||
@@ -159,3 +172,3 @@ return window.processAllOfDom(); | ||
llmProvider: this.llmProvider, | ||
llmClient, | ||
llmClient: verifyLLmClient, | ||
screenshot: fullpageScreenshot, | ||
@@ -1317,2 +1330,17 @@ domElements, | ||
domSettleTimeoutMs, | ||
}).catch((error) => { | ||
this.logger({ | ||
category: "action", | ||
message: | ||
"error verifying action completion. Assuming action completed.", | ||
level: 1, | ||
auxiliary: { | ||
error: { | ||
value: error.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
return true; | ||
}); | ||
@@ -1319,0 +1347,0 @@ |
@@ -36,2 +36,8 @@ import { Browserbase } from "@browserbasehq/sdk"; | ||
const DEFAULT_MODEL_NAME = "gpt-4o"; | ||
const BROWSERBASE_REGION_DOMAIN = { | ||
"us-west-2": "wss://connect.usw2.browserbase.com", | ||
"us-east-1": "wss://connect.use1.browserbase.com", | ||
"eu-central-1": "wss://connect.euc1.browserbase.com", | ||
"ap-southeast-1": "wss://connect.apse1.browserbase.com", | ||
}; | ||
@@ -95,3 +101,6 @@ async function getBrowser( | ||
sessionId = browserbaseResumeSessionID; | ||
connectUrl = `wss://connect.browserbase.com?apiKey=${apiKey}&sessionId=${sessionId}`; | ||
const browserbaseDomain = | ||
BROWSERBASE_REGION_DOMAIN[sessionStatus.region] || | ||
"wss://connect.browserbase.com"; | ||
connectUrl = `${browserbaseDomain}?apiKey=${apiKey}&sessionId=${sessionId}`; | ||
@@ -98,0 +107,0 @@ logger({ |
@@ -175,19 +175,19 @@ import { | ||
type MetadataResponse = z.infer<typeof metadataSchema>; | ||
const isUsingAnthropic = llmClient.type === "anthropic"; | ||
const extractionResponse = | ||
await llmClient.createChatCompletion<ExtractionResponse>({ | ||
messages: [ | ||
buildExtractSystemPrompt(), | ||
buildExtractUserPrompt(instruction, domElements), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "Extraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
const extractionResponse = await llmClient.createChatCompletion({ | ||
messages: [ | ||
buildExtractSystemPrompt(isUsingAnthropic), | ||
buildExtractUserPrompt(instruction, domElements, isUsingAnthropic), | ||
], | ||
response_model: { | ||
schema: schema, | ||
name: "Extraction", | ||
}, | ||
temperature: 0.1, | ||
top_p: 1, | ||
frequency_penalty: 0, | ||
presence_penalty: 0, | ||
requestId, | ||
}); | ||
@@ -194,0 +194,0 @@ const refinedResponse = |
@@ -19,2 +19,3 @@ import Anthropic, { ClientOptions } from "@anthropic-ai/sdk"; | ||
export class AnthropicClient extends LLMClient { | ||
public type = "anthropic" as const; | ||
private client: Anthropic; | ||
@@ -24,2 +25,3 @@ private cache: LLMCache | undefined; | ||
private enableCaching: boolean; | ||
public clientOptions: ClientOptions; | ||
@@ -39,2 +41,3 @@ constructor( | ||
this.modelName = modelName; | ||
this.clientOptions = clientOptions; | ||
} | ||
@@ -41,0 +44,0 @@ |
@@ -5,2 +5,3 @@ import { ZodType } from "zod"; | ||
AvailableModel, | ||
ClientOptions, | ||
ToolCall, | ||
@@ -68,4 +69,6 @@ } from "../../types/model"; | ||
export abstract class LLMClient { | ||
public type: "openai" | "anthropic"; | ||
public modelName: AvailableModel; | ||
public hasVision: boolean; | ||
public clientOptions: ClientOptions; | ||
@@ -72,0 +75,0 @@ constructor(modelName: AvailableModel) { |
@@ -17,2 +17,4 @@ import { LogLine } from "../../types/log"; | ||
"gpt-4o-2024-08-06": "openai", | ||
"o1-mini": "openai", | ||
"o1-preview": "openai", | ||
"claude-3-5-sonnet-latest": "anthropic", | ||
@@ -19,0 +21,0 @@ "claude-3-5-sonnet-20240620": "anthropic", |
@@ -17,4 +17,7 @@ import OpenAI, { ClientOptions } from "openai"; | ||
import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient"; | ||
import { validateZodSchema } from "../utils"; | ||
import zodToJsonSchema from "zod-to-json-schema"; | ||
export class OpenAIClient extends LLMClient { | ||
public type = "openai" as const; | ||
private client: OpenAI; | ||
@@ -24,3 +27,3 @@ private cache: LLMCache | undefined; | ||
private enableCaching: boolean; | ||
private clientOptions: ClientOptions; | ||
public clientOptions: ClientOptions; | ||
@@ -35,2 +38,3 @@ constructor( | ||
super(modelName); | ||
this.clientOptions = clientOptions; | ||
this.client = new OpenAI(clientOptions); | ||
@@ -44,5 +48,69 @@ this.logger = logger; | ||
async createChatCompletion<T = ChatCompletion>( | ||
options: ChatCompletionOptions, | ||
optionsInitial: ChatCompletionOptions, | ||
retries: number = 3, | ||
): Promise<T> { | ||
let options: Partial<ChatCompletionOptions> = optionsInitial; | ||
// O1 models do not support most of the options. So we override them. | ||
// For schema and tools, we add them as user messages. | ||
let isToolsOverridedForO1 = false; | ||
if (this.modelName === "o1-mini" || this.modelName === "o1-preview") { | ||
/* eslint-disable */ | ||
// Remove unsupported options | ||
let { | ||
tool_choice, | ||
top_p, | ||
frequency_penalty, | ||
presence_penalty, | ||
temperature, | ||
} = options; | ||
({ | ||
tool_choice, | ||
top_p, | ||
frequency_penalty, | ||
presence_penalty, | ||
temperature, | ||
...options | ||
} = options); | ||
/* eslint-enable */ | ||
// Remove unsupported options | ||
options.messages = options.messages.map((message) => ({ | ||
...message, | ||
role: "user", | ||
})); | ||
if (options.tools && options.response_model) { | ||
throw new Error( | ||
"Cannot use both tool and response_model for o1 models", | ||
); | ||
} | ||
if (options.tools) { | ||
// Remove unsupported options | ||
let { tools } = options; | ||
({ tools, ...options } = options); | ||
isToolsOverridedForO1 = true; | ||
options.messages.push({ | ||
role: "user", | ||
content: `You have the following tools available to you:\n${JSON.stringify( | ||
tools, | ||
)} | ||
Respond with the following zod schema format to use a method: { | ||
"name": "<tool_name>", | ||
"arguments": <tool_args> | ||
} | ||
Do not include any other text or formattings like \`\`\` in your response. Just the JSON object.`, | ||
}); | ||
} | ||
} | ||
if ( | ||
options.temperature && | ||
(this.modelName === "o1-mini" || this.modelName === "o1-preview") | ||
) { | ||
throw new Error("Temperature is not supported for o1 models"); | ||
} | ||
const { image, requestId, ...optionsWithoutImageAndRequestId } = options; | ||
this.logger({ | ||
@@ -66,2 +134,3 @@ category: "openai", | ||
}); | ||
const cacheOptions = { | ||
@@ -134,2 +203,43 @@ model: this.modelName, | ||
let responseFormat = undefined; | ||
if (options.response_model) { | ||
// For O1 models, we need to add the schema as a user message. | ||
if (this.modelName === "o1-mini" || this.modelName === "o1-preview") { | ||
try { | ||
const parsedSchema = JSON.stringify( | ||
zodToJsonSchema(options.response_model.schema), | ||
); | ||
options.messages.push({ | ||
role: "user", | ||
content: `Respond in this zod schema format:\n${parsedSchema}\n | ||
Do not include any other text, formating or markdown in your output. Do not include \`\`\` or \`\`\`json in your response. Only the JSON object itself.`, | ||
}); | ||
} catch (error) { | ||
this.logger({ | ||
category: "openai", | ||
message: "Failed to parse response model schema", | ||
level: 0, | ||
}); | ||
if (retries > 0) { | ||
// as-casting to account for o1 models not supporting all options | ||
return this.createChatCompletion( | ||
options as ChatCompletionOptions, | ||
retries - 1, | ||
); | ||
} | ||
throw error; | ||
} | ||
} else { | ||
responseFormat = zodResponseFormat( | ||
options.response_model.schema, | ||
options.response_model.name, | ||
); | ||
} | ||
} | ||
/* eslint-disable */ | ||
// Remove unsupported options | ||
const { response_model, ...openAiOptions } = { | ||
@@ -139,11 +249,4 @@ ...optionsWithoutImageAndRequestId, | ||
}; | ||
/* eslint-enable */ | ||
let responseFormat = undefined; | ||
if (options.response_model) { | ||
responseFormat = zodResponseFormat( | ||
options.response_model.schema, | ||
options.response_model.name, | ||
); | ||
} | ||
this.logger({ | ||
@@ -231,2 +334,47 @@ category: "openai", | ||
// For O1 models, we need to parse the tool call response manually and add it to the response. | ||
if (isToolsOverridedForO1) { | ||
try { | ||
const parsedContent = JSON.parse(response.choices[0].message.content); | ||
response.choices[0].message.tool_calls = [ | ||
{ | ||
function: { | ||
name: parsedContent["name"], | ||
arguments: JSON.stringify(parsedContent["arguments"]), | ||
}, | ||
type: "function", | ||
id: "-1", | ||
}, | ||
]; | ||
response.choices[0].message.content = null; | ||
} catch (error) { | ||
this.logger({ | ||
category: "openai", | ||
message: "Failed to parse tool call response", | ||
level: 0, | ||
auxiliary: { | ||
error: { | ||
value: error.message, | ||
type: "string", | ||
}, | ||
content: { | ||
value: response.choices[0].message.content, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
if (retries > 0) { | ||
// as-casting to account for o1 models not supporting all options | ||
return this.createChatCompletion( | ||
options as ChatCompletionOptions, | ||
retries - 1, | ||
); | ||
} | ||
throw error; | ||
} | ||
} | ||
this.logger({ | ||
@@ -242,3 +390,3 @@ category: "openai", | ||
requestId: { | ||
value: options.requestId, | ||
value: requestId, | ||
type: "string", | ||
@@ -249,6 +397,18 @@ }, | ||
if (response_model) { | ||
if (options.response_model) { | ||
const extractedData = response.choices[0].message.content; | ||
const parsedData = JSON.parse(extractedData); | ||
if (!validateZodSchema(options.response_model.schema, parsedData)) { | ||
if (retries > 0) { | ||
// as-casting to account for o1 models not supporting all options | ||
return this.createChatCompletion( | ||
options as ChatCompletionOptions, | ||
retries - 1, | ||
); | ||
} | ||
throw new Error("Invalid response schema"); | ||
} | ||
if (this.enableCaching) { | ||
@@ -255,0 +415,0 @@ this.cache.set( |
@@ -210,9 +210,13 @@ import OpenAI from "openai"; | ||
Print null or an empty string if no new information is found. | ||
`; | ||
export function buildExtractSystemPrompt( | ||
isUsingPrintExtractedDataTool: boolean = false, | ||
): ChatMessage { | ||
let content = extractSystemPrompt.replace(/\s+/g, " "); | ||
if (isUsingPrintExtractedDataTool) { | ||
content += ` | ||
ONLY print the content using the print_extracted_data tool provided. | ||
ONLY print the content using the print_extracted_data tool provided. | ||
`; | ||
export function buildExtractSystemPrompt(): ChatMessage { | ||
const content = extractSystemPrompt.replace(/\s+/g, " "); | ||
ONLY print the content using the print_extracted_data tool provided.`; | ||
} | ||
return { | ||
@@ -227,10 +231,16 @@ role: "system", | ||
domElements: string, | ||
isUsingPrintExtractedDataTool: boolean = false, | ||
): ChatMessage { | ||
let content = `Instruction: ${instruction} | ||
DOM: ${domElements}`; | ||
if (isUsingPrintExtractedDataTool) { | ||
content += ` | ||
ONLY print the content using the print_extracted_data tool provided. | ||
ONLY print the content using the print_extracted_data tool provided.`; | ||
} | ||
return { | ||
role: "user", | ||
content: `Instruction: ${instruction} | ||
DOM: ${domElements} | ||
ONLY print the content using the print_extracted_data tool provided. | ||
ONLY print the content using the print_extracted_data tool provided.`, | ||
content, | ||
}; | ||
@@ -237,0 +247,0 @@ } |
import crypto from "crypto"; | ||
import { LogLine } from "../types/log"; | ||
import { z } from "zod"; | ||
@@ -9,9 +10,23 @@ export function generateId(operation: string) { | ||
export function logLineToString(logLine: LogLine): string { | ||
const timestamp = logLine.timestamp || new Date().toISOString(); | ||
if (logLine.auxiliary?.error) { | ||
return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message}\n ${logLine.auxiliary.error.value}\n ${logLine.auxiliary.trace.value}`; | ||
try { | ||
const timestamp = logLine.timestamp || new Date().toISOString(); | ||
if (logLine.auxiliary?.error) { | ||
return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message}\n ${logLine.auxiliary.error.value}\n ${logLine.auxiliary.trace.value}`; | ||
} | ||
return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message} ${ | ||
logLine.auxiliary ? JSON.stringify(logLine.auxiliary) : "" | ||
}`; | ||
} catch (error) { | ||
console.error(`Error logging line:`, error); | ||
return "error logging line"; | ||
} | ||
return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message} ${ | ||
logLine.auxiliary ? JSON.stringify(logLine.auxiliary) : "" | ||
}`; | ||
} | ||
export function validateZodSchema(schema: z.ZodTypeAny, data: unknown) { | ||
try { | ||
schema.parse(data); | ||
return true; | ||
} catch { | ||
return false; | ||
} | ||
} |
@@ -127,7 +127,26 @@ import { Page } from "@playwright/test"; | ||
const svgAnnotations = await Promise.all( | ||
Object.entries(this.selectorMap).map(async ([id, selectors]) => | ||
this.createElementAnnotation(id, selectors), | ||
), | ||
); | ||
const svgAnnotations = ( | ||
await Promise.all( | ||
Object.entries(this.selectorMap).map(async ([id, selectors]) => | ||
this.createElementAnnotation(id, selectors).catch((error: Error) => { | ||
this.log({ | ||
category: "screenshotService", | ||
message: | ||
"warning: failed to create screenshot annotation for element", | ||
level: 2, | ||
auxiliary: { | ||
message: { | ||
value: error.message, | ||
type: "string", | ||
}, | ||
trace: { | ||
value: error.stack, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
}), | ||
), | ||
) | ||
).filter((annotation) => annotation !== null); | ||
@@ -134,0 +153,0 @@ const scrollPosition = await this.page.evaluate(() => { |
{ | ||
"name": "@browserbasehq/stagehand", | ||
"version": "1.6.0-alpha-9605836ee6b8207ed7dc9146e12ced1c78630d59", | ||
"version": "1.6.0-alpha-d6d70570623a718354797ef83aa8489eacc085d1", | ||
"description": "An AI web browsing framework focused on simplicity and extensibility.", | ||
@@ -66,3 +66,3 @@ "main": "./dist/index.js", | ||
"sharp": "^0.33.5", | ||
"zod-to-json-schema": "^3.23.3" | ||
"zod-to-json-schema": "^3.23.5" | ||
}, | ||
@@ -69,0 +69,0 @@ "directories": { |
Sorry, the diff of this file is too big to display
472162
12370
112
Updatedzod-to-json-schema@^3.23.5