@browserbasehq/stagehand
Advanced tools
Comparing version 1.3.0 to 1.3.1-alpha-a855e1f8d85293182b22854ab5f95514f45bee1b
@@ -1,17 +0,45 @@ | ||
import { Page, BrowserContext } from '@playwright/test'; | ||
import { Page, BrowserContext, Browser } from '@playwright/test'; | ||
import { z } from 'zod'; | ||
import { Browserbase } from '@browserbasehq/sdk'; | ||
import Browserbase from '@browserbasehq/sdk'; | ||
import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk'; | ||
import { Tool } from '@anthropic-ai/sdk/resources'; | ||
import { ClientOptions as ClientOptions$1 } from 'openai'; | ||
import { ChatCompletionTool } from 'openai/resources'; | ||
type LogLine = { | ||
id?: string; | ||
category?: string; | ||
message: string; | ||
level?: 0 | 1 | 2; | ||
timestamp?: string; | ||
auxiliary?: { | ||
[key: string]: { | ||
value: string; | ||
type: "object" | "string" | "html" | "integer" | "float" | "boolean"; | ||
}; | ||
}; | ||
}; | ||
type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620"; | ||
type ModelProvider = "openai" | "anthropic"; | ||
type ClientOptions = ClientOptions$1 | ClientOptions$2; | ||
type ToolCall = Tool | ChatCompletionTool; | ||
interface ChatMessage { | ||
role: "system" | "user" | "assistant"; | ||
content: string | { | ||
type: "image_url" | "text"; | ||
image_url?: { | ||
url: string; | ||
}; | ||
text?: string; | ||
}[]; | ||
content: ChatMessageContent; | ||
} | ||
type ChatMessageContent = string | (ChatMessageImageContent | ChatMessageTextContent)[]; | ||
interface ChatMessageImageContent { | ||
type: "image_url"; | ||
image_url: { | ||
url: string; | ||
}; | ||
text?: string; | ||
} | ||
interface ChatMessageTextContent { | ||
type: string; | ||
text: string; | ||
} | ||
interface ChatCompletionOptions { | ||
model: string; | ||
messages: ChatMessage[]; | ||
@@ -26,3 +54,2 @@ temperature?: number; | ||
}; | ||
[key: string]: any; | ||
response_model?: { | ||
@@ -32,6 +59,13 @@ name: string; | ||
}; | ||
tools?: ToolCall[]; | ||
tool_choice?: string; | ||
maxTokens?: number; | ||
requestId: string; | ||
} | ||
interface LLMClient { | ||
createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
logger: (message: { | ||
declare abstract class LLMClient { | ||
modelName: AvailableModel; | ||
hasVision: boolean; | ||
constructor(modelName: AvailableModel); | ||
abstract createChatCompletion(options: ChatCompletionOptions): Promise<any>; | ||
abstract logger: (message: { | ||
category?: string; | ||
@@ -42,17 +76,2 @@ message: string; | ||
type LogLine = { | ||
id?: string; | ||
category?: string; | ||
message: string; | ||
level?: 0 | 1 | 2; | ||
timestamp?: string; | ||
auxiliary?: { | ||
[key: string]: { | ||
value: string; | ||
type: "object" | "string" | "html" | "integer" | "float" | "boolean"; | ||
}; | ||
}; | ||
}; | ||
type AvailableModel = "gpt-4o" | "gpt-4o-mini" | "gpt-4o-2024-08-06" | "claude-3-5-sonnet-latest" | "claude-3-5-sonnet-20241022" | "claude-3-5-sonnet-20240620"; | ||
declare class LLMProvider { | ||
@@ -65,8 +84,92 @@ private modelToProviderMap; | ||
cleanRequestCache(requestId: string): void; | ||
getClient(modelName: AvailableModel, requestId: string): LLMClient; | ||
getClient(modelName: AvailableModel, clientOptions?: ClientOptions): LLMClient; | ||
} | ||
interface ConstructorParams { | ||
env: "LOCAL" | "BROWSERBASE"; | ||
apiKey?: string; | ||
projectId?: string; | ||
verbose?: 0 | 1 | 2; | ||
debugDom?: boolean; | ||
llmProvider?: LLMProvider; | ||
headless?: boolean; | ||
logger?: (message: LogLine) => void; | ||
domSettleTimeoutMs?: number; | ||
browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; | ||
enableCaching?: boolean; | ||
browserbaseResumeSessionID?: string; | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
} | ||
interface InitOptions { | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
domSettleTimeoutMs?: number; | ||
} | ||
interface InitResult { | ||
debugUrl: string; | ||
sessionUrl: string; | ||
} | ||
interface InitResult { | ||
debugUrl: string; | ||
sessionUrl: string; | ||
} | ||
interface InitFromPageOptions { | ||
page: Page; | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
} | ||
interface InitFromPageResult { | ||
context: BrowserContext; | ||
} | ||
interface ActOptions { | ||
action: string; | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
useVision?: "fallback" | boolean; | ||
variables?: Record<string, string>; | ||
domSettleTimeoutMs?: number; | ||
} | ||
interface ActResult { | ||
success: boolean; | ||
message: string; | ||
action: string; | ||
} | ||
interface ExtractOptions<T extends z.AnyZodObject> { | ||
instruction: string; | ||
schema: T; | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
domSettleTimeoutMs?: number; | ||
} | ||
type ExtractResult<T extends z.AnyZodObject> = z.infer<T>; | ||
interface ObserveOptions { | ||
instruction?: string; | ||
modelName?: AvailableModel; | ||
modelClientOptions?: ClientOptions; | ||
useVision?: boolean; | ||
domSettleTimeoutMs?: number; | ||
} | ||
interface ObserveResult { | ||
selector: string; | ||
description: string; | ||
} | ||
interface BrowserResult { | ||
browser?: Browser; | ||
context: BrowserContext; | ||
debugUrl?: string; | ||
sessionUrl?: string; | ||
} | ||
declare class PlaywrightCommandException extends Error { | ||
constructor(message: string); | ||
} | ||
declare class PlaywrightCommandMethodNotSupportedException extends Error { | ||
constructor(message: string); | ||
} | ||
declare class Stagehand { | ||
private llmProvider; | ||
private observations; | ||
private llmClient; | ||
page: Page; | ||
@@ -79,3 +182,2 @@ context: BrowserContext; | ||
private debugDom; | ||
private defaultModelName; | ||
private headless; | ||
@@ -88,28 +190,9 @@ private logger; | ||
private variables; | ||
private actHandler; | ||
private browserbaseResumeSessionID?; | ||
constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, }?: { | ||
env: "LOCAL" | "BROWSERBASE"; | ||
apiKey?: string; | ||
projectId?: string; | ||
verbose?: 0 | 1 | 2; | ||
debugDom?: boolean; | ||
llmProvider?: LLMProvider; | ||
headless?: boolean; | ||
logger?: (message: LogLine) => void; | ||
domSettleTimeoutMs?: number; | ||
browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; | ||
enableCaching?: boolean; | ||
browserbaseResumeSessionID?: string; | ||
}); | ||
init({ modelName, domSettleTimeoutMs, }?: { | ||
modelName?: AvailableModel; | ||
domSettleTimeoutMs?: number; | ||
}): Promise<{ | ||
debugUrl: string; | ||
sessionUrl: string; | ||
}>; | ||
initFromPage(page: Page, modelName?: AvailableModel): Promise<{ | ||
context: BrowserContext; | ||
}>; | ||
private actHandler?; | ||
private extractHandler?; | ||
private observeHandler?; | ||
constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, modelName, modelClientOptions, }?: ConstructorParams); | ||
init({ modelName, modelClientOptions, domSettleTimeoutMs, }?: InitOptions): Promise<InitResult>; | ||
initFromPage({ page, modelName, modelClientOptions, }: InitFromPageOptions): Promise<InitFromPageResult>; | ||
private pending_logs_to_send_to_browserbase; | ||
@@ -123,33 +206,7 @@ private is_processing_browserbase_logs; | ||
private cleanupDomDebug; | ||
private _recordObservation; | ||
private _extract; | ||
private _observe; | ||
act({ action, modelName, useVision, variables, domSettleTimeoutMs, }: { | ||
action: string; | ||
modelName?: AvailableModel; | ||
useVision?: "fallback" | boolean; | ||
variables?: Record<string, string>; | ||
domSettleTimeoutMs?: number; | ||
}): Promise<{ | ||
success: boolean; | ||
message: string; | ||
action: string; | ||
}>; | ||
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, domSettleTimeoutMs, }: { | ||
instruction: string; | ||
schema: T; | ||
modelName?: AvailableModel; | ||
domSettleTimeoutMs?: number; | ||
}): Promise<z.infer<T>>; | ||
observe(options?: { | ||
instruction?: string; | ||
modelName?: AvailableModel; | ||
useVision?: boolean; | ||
domSettleTimeoutMs?: number; | ||
}): Promise<{ | ||
selector: string; | ||
description: string; | ||
}[]>; | ||
act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>; | ||
extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>; | ||
observe(options?: ObserveOptions): Promise<ObserveResult[]>; | ||
} | ||
export { Stagehand }; | ||
export { type ActOptions, type ActResult, type AvailableModel, type BrowserResult, type ClientOptions, type ConstructorParams, type ExtractOptions, type ExtractResult, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, type ToolCall }; |
{ | ||
"name": "@browserbasehq/stagehand", | ||
"version": "1.3.0", | ||
"version": "1.3.1-alpha-a855e1f8d85293182b22854ab5f95514f45bee1b", | ||
"description": "An AI web browsing framework focused on simplicity and extensibility.", | ||
@@ -14,10 +14,13 @@ "main": "./dist/index.js", | ||
"cache:clear": "rm -rf .cache", | ||
"evals": "npm run build-dom-scripts && npx braintrust eval evals/index.eval.ts", | ||
"evals": "npm run build-dom-scripts && tsx evals/index.eval.ts", | ||
"build-dom-scripts": "tsx lib/dom/genDomScripts.ts", | ||
"build": "npm run build-dom-scripts && tsup lib/index.ts --dts", | ||
"release": "changeset publish" | ||
"build-types": "tsc --emitDeclarationOnly --outDir dist", | ||
"build-js": "tsup lib/index.ts --dts", | ||
"build": "npm run build-dom-scripts && npm run build-js && npm run build-types", | ||
"postinstall": "npm run build", | ||
"release": "npm run build && changeset publish", | ||
"release-canary": "npm run build && changeset version --snapshot && changeset publish --tag alpha" | ||
}, | ||
"files": [ | ||
"dist/**", | ||
"patches/**" | ||
"dist/**" | ||
], | ||
@@ -34,4 +37,5 @@ "keywords": [], | ||
"autoevals": "^0.0.64", | ||
"braintrust": "^0.0.127", | ||
"braintrust": "^0.0.171", | ||
"cheerio": "^1.0.0", | ||
"chromium-bidi": "^0.10.0", | ||
"esbuild": "^0.21.4", | ||
@@ -41,3 +45,3 @@ "express": "^4.21.0", | ||
"prettier": "^3.2.5", | ||
"tsup": "^8.1.0", | ||
"tsup": "^8.2.1", | ||
"tsx": "^4.10.5", | ||
@@ -56,4 +60,2 @@ "typescript": "^5.2.2" | ||
"@browserbasehq/sdk": "^2.0.0", | ||
"anthropic": "^0.0.0", | ||
"anthropic-ai": "^0.0.10", | ||
"sharp": "^0.33.5", | ||
@@ -60,0 +62,0 @@ "zod-to-json-schema": "^3.23.3" |
103
README.md
@@ -14,3 +14,2 @@ <div id="toc" align="center"> | ||
<p align="center"> | ||
<a href="https://github.com/browserbase/stagehand/actions/workflows/ci.yml"><img alt="Build Status" src="https://github.com/browserbase/stagehand/actions/workflows/ci.yml/badge.svg" /></a> | ||
<a href="https://www.npmjs.com/package/@browserbasehq/stagehand"><img alt="NPM" src="https://img.shields.io/npm/v/@browserbasehq/stagehand.svg" /></a> | ||
@@ -31,2 +30,3 @@ <a href="https://github.com/browserbase/stagehand/blob/main/license"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue" /></a> | ||
- [How It Works](#how-it-works) | ||
- [Stagehand vs Playwright](#stagehand-vs-playwright) | ||
- [Prompting Tips](#prompting-tips) | ||
@@ -38,3 +38,3 @@ - [Roadmap](#roadmap) | ||
> [!NOTE] | ||
> [!NOTE] | ||
> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback. | ||
@@ -137,3 +137,13 @@ | ||
- `env`: `'LOCAL'` or `'BROWSERBASE'`. | ||
- `env`: `'LOCAL'` or `'BROWSERBASE'`. Defaults to `'BROWSERBASE'`. | ||
- `modelName`: (optional) an `AvailableModel` string to specify the default model to use. | ||
- `modelClientOptions`: (optional) configuration options for the model client. | ||
- `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`. | ||
- `headless`: a `boolean` that determines if the browser runs in headless mode. Defaults to `false`. When the env is set to `BROWSERBASE`, this will be ignored. | ||
- `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. Defaults to 30000 (30 seconds). | ||
- `apiKey`: (optional) your Browserbase API key. Defaults to `BROWSERBASE_API_KEY` environment variable. | ||
- `projectId`: (optional) your Browserbase project ID. Defaults to `BROWSERBASE_PROJECT_ID` environment variable. | ||
- `browserBaseSessionCreateParams`: configuration options for creating new Browserbase sessions. | ||
- `browserbaseResumeSessionID`: ID of an existing Browserbase session to resume. | ||
- `logger`: a function that handles log messages. Useful for custom logging implementations. | ||
- `verbose`: an `integer` that enables several levels of logging during automation: | ||
@@ -144,4 +154,2 @@ - `0`: limited to no logging | ||
- `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation. | ||
- `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. It can be overriden in individual function calls if needed. Defaults to 30000 (30 seconds). | ||
- `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`. | ||
@@ -153,4 +161,23 @@ - **Returns:** | ||
- **Example:** | ||
```javascript | ||
// Basic usage | ||
const stagehand = new Stagehand(); | ||
// Custom configuration | ||
const stagehand = new Stagehand({ | ||
env: "LOCAL", | ||
verbose: 1, | ||
headless: true, | ||
enableCaching: true, | ||
logger: (logLine) => { | ||
console.log(`[${logLine.category}] ${logLine.message}`); | ||
}, | ||
}); | ||
// Resume existing Browserbase session | ||
const stagehand = new Stagehand({ | ||
env: "BROWSERBASE", | ||
browserbaseResumeSessionID: "existing-session-id", | ||
}); | ||
``` | ||
@@ -166,3 +193,5 @@ | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use. This will be used for all other methods unless overridden. Defaults to `"gpt-4o"`. | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use. This will be used for all other methods unless overridden. | ||
- `modelClientOptions`: (optional) configuration options for the model client | ||
- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle | ||
@@ -186,6 +215,8 @@ - **Returns:** | ||
- `action`: a `string` describing the action to perform, e.g., `"search for 'x'"`. | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use. | ||
- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"`. | ||
- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization. | ||
- `action`: a `string` describing the action to perform | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use | ||
- `modelClientOptions`: (optional) configuration options for the model client | ||
- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"` | ||
- `variables`: (optional) a `Record<string, string>` of variables to use in the action. Variables in the action string are referenced using `%variable_name%` | ||
- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle | ||
@@ -200,4 +231,23 @@ - **Returns:** | ||
- **Example:** | ||
```javascript | ||
// Basic usage | ||
await stagehand.act({ action: "click on add to cart" }); | ||
// Using variables | ||
await stagehand.act({ | ||
action: "enter %username% into the username field", | ||
variables: { | ||
username: "john.doe@example.com", | ||
}, | ||
}); | ||
// Multiple variables | ||
await stagehand.act({ | ||
action: "fill in the form with %username% and %password%", | ||
variables: { | ||
username: "john.doe", | ||
password: "secretpass123", | ||
}, | ||
}); | ||
``` | ||
@@ -211,6 +261,7 @@ | ||
- `instruction`: a `string` providing instructions for extraction. | ||
- `schema`: a `z.AnyZodObject` defining the structure of the data to extract. | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use. | ||
- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization. | ||
- `instruction`: a `string` providing instructions for extraction | ||
- `schema`: a `z.AnyZodObject` defining the structure of the data to extract | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use | ||
- `modelClientOptions`: (optional) configuration options for the model client | ||
- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle | ||
@@ -242,9 +293,13 @@ - **Returns:** | ||
- `instruction`: a `string` providing instructions for the observation. | ||
- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"`. | ||
- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization. | ||
- `instruction`: (optional) a `string` providing instructions for the observation. Defaults to "Find actions that can be performed on this page." | ||
- `modelName`: (optional) an `AvailableModel` string to specify the model to use | ||
- `modelClientOptions`: (optional) configuration options for the model client | ||
- `useVision`: (optional) a `boolean` to determine if vision-based processing should be used. Defaults to `false` | ||
- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle | ||
- **Returns:** | ||
- A `Promise` that resolves to an array of `string`s representing the actions that can be taken on the current page. | ||
- A `Promise` that resolves to an array of objects containing: | ||
- `selector`: a `string` representing the element selector | ||
- `description`: a `string` describing the possible action | ||
@@ -337,2 +392,8 @@ - **Example:** | ||
### Stagehand vs Playwright | ||
Below is an example of how to extract a list of companies from the AI Grant website using both Stagehand and Playwright. | ||
![](./docs/media/stagehand-playwright.png) | ||
## Prompting Tips | ||
@@ -455,4 +516,6 @@ | ||
After that, you can run the eval using `npm run evals` | ||
After that, you can run all evals at once using `npm run evals` | ||
You can also run individual evals using `npm run evals -- your_eval_name`. | ||
### Adding new evals | ||
@@ -478,3 +541,3 @@ | ||
Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanilla `esbuild` to build scripts that run in the DOM. | ||
Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanilla [esbuild](https://esbuild.github.io/d) to build the scripts that run in the DOM. | ||
@@ -481,0 +544,0 @@ 1. run `npm run build` |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Install scripts
Supply chain riskInstall scripts are run when the package is installed. The majority of malware in npm is hidden in install scripts.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
228899
9
42
5191
544
8
16
1
1
- Removedanthropic@^0.0.0
- Removedanthropic-ai@^0.0.10
- Removedanthropic@0.0.0(transitive)
- Removedanthropic-ai@0.0.10(transitive)