@@ -1,17 +0,45 @@
		import { Page, BrowserContext } from '@playwright/test';
		import { Page, BrowserContext, Browser } from '@playwright/test';
		import { z } from 'zod';
		import { Browserbase } from '@browserbasehq/sdk';
		import Browserbase from '@browserbasehq/sdk';
		import { ClientOptions as ClientOptions$2 } from '@anthropic-ai/sdk';
		import { Tool } from '@anthropic-ai/sdk/resources';
		import { ClientOptions as ClientOptions$1 } from 'openai';
		import { ChatCompletionTool } from 'openai/resources';

		type LogLine = {
		id?: string;
		category?: string;
		message: string;
		level?: 0 \| 1 \| 2;
		timestamp?: string;
		auxiliary?: {
		[key: string]: {
		value: string;
		type: "object" \| "string" \| "html" \| "integer" \| "float" \| "boolean";
		};
		};
		};

		type AvailableModel = "gpt-4o" \| "gpt-4o-mini" \| "gpt-4o-2024-08-06" \| "claude-3-5-sonnet-latest" \| "claude-3-5-sonnet-20241022" \| "claude-3-5-sonnet-20240620";
		type ModelProvider = "openai" \| "anthropic";
		type ClientOptions = ClientOptions$1 \| ClientOptions$2;
		type ToolCall = Tool \| ChatCompletionTool;

		interface ChatMessage {
		role: "system" \| "user" \| "assistant";
		content: string \| {
		type: "image_url" \| "text";
		image_url?: {
		url: string;
		};
		text?: string;
		}[];
		content: ChatMessageContent;
		}
		type ChatMessageContent = string \| (ChatMessageImageContent \| ChatMessageTextContent)[];
		interface ChatMessageImageContent {
		type: "image_url";
		image_url: {
		url: string;
		};
		text?: string;
		}
		interface ChatMessageTextContent {
		type: string;
		text: string;
		}
		interface ChatCompletionOptions {
		model: string;
		messages: ChatMessage[];
		@@ -26,3 +54,2 @@ temperature?: number;
		};
		[key: string]: any;
		response_model?: {
		@@ -32,6 +59,13 @@ name: string;
		};
		tools?: ToolCall[];
		tool_choice?: string;
		maxTokens?: number;
		requestId: string;
		}
		interface LLMClient {
		createChatCompletion(options: ChatCompletionOptions): Promise<any>;
		logger: (message: {
		declare abstract class LLMClient {
		modelName: AvailableModel;
		hasVision: boolean;
		constructor(modelName: AvailableModel);
		abstract createChatCompletion(options: ChatCompletionOptions): Promise<any>;
		abstract logger: (message: {
		category?: string;
		@@ -42,17 +76,2 @@ message: string;

		type LogLine = {
		id?: string;
		category?: string;
		message: string;
		level?: 0 \| 1 \| 2;
		timestamp?: string;
		auxiliary?: {
		[key: string]: {
		value: string;
		type: "object" \| "string" \| "html" \| "integer" \| "float" \| "boolean";
		};
		};
		};

		type AvailableModel = "gpt-4o" \| "gpt-4o-mini" \| "gpt-4o-2024-08-06" \| "claude-3-5-sonnet-latest" \| "claude-3-5-sonnet-20241022" \| "claude-3-5-sonnet-20240620";
		declare class LLMProvider {
		@@ -65,8 +84,92 @@ private modelToProviderMap;
		cleanRequestCache(requestId: string): void;
		getClient(modelName: AvailableModel, requestId: string): LLMClient;
		getClient(modelName: AvailableModel, clientOptions?: ClientOptions): LLMClient;
		}

		interface ConstructorParams {
		env: "LOCAL" \| "BROWSERBASE";
		apiKey?: string;
		projectId?: string;
		verbose?: 0 \| 1 \| 2;
		debugDom?: boolean;
		llmProvider?: LLMProvider;
		headless?: boolean;
		logger?: (message: LogLine) => void;
		domSettleTimeoutMs?: number;
		browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams;
		enableCaching?: boolean;
		browserbaseResumeSessionID?: string;
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		}
		interface InitOptions {
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		domSettleTimeoutMs?: number;
		}
		interface InitResult {
		debugUrl: string;
		sessionUrl: string;
		}
		interface InitResult {
		debugUrl: string;
		sessionUrl: string;
		}
		interface InitFromPageOptions {
		page: Page;
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		}
		interface InitFromPageResult {
		context: BrowserContext;
		}
		interface ActOptions {
		action: string;
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		useVision?: "fallback" \| boolean;
		variables?: Record<string, string>;
		domSettleTimeoutMs?: number;
		}
		interface ActResult {
		success: boolean;
		message: string;
		action: string;
		}
		interface ExtractOptions<T extends z.AnyZodObject> {
		instruction: string;
		schema: T;
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		domSettleTimeoutMs?: number;
		}
		type ExtractResult<T extends z.AnyZodObject> = z.infer<T>;
		interface ObserveOptions {
		instruction?: string;
		modelName?: AvailableModel;
		modelClientOptions?: ClientOptions;
		useVision?: boolean;
		domSettleTimeoutMs?: number;
		}
		interface ObserveResult {
		selector: string;
		description: string;
		}

		interface BrowserResult {
		browser?: Browser;
		context: BrowserContext;
		debugUrl?: string;
		sessionUrl?: string;
		}

		declare class PlaywrightCommandException extends Error {
		constructor(message: string);
		}
		declare class PlaywrightCommandMethodNotSupportedException extends Error {
		constructor(message: string);
		}

		declare class Stagehand {
		private llmProvider;
		private observations;
		private llmClient;
		page: Page;
		@@ -79,3 +182,2 @@ context: BrowserContext;
		private debugDom;
		private defaultModelName;
		private headless;
		@@ -88,28 +190,9 @@ private logger;
		private variables;
		private actHandler;
		private browserbaseResumeSessionID?;
		constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, }?: {
		env: "LOCAL" \| "BROWSERBASE";
		apiKey?: string;
		projectId?: string;
		verbose?: 0 \| 1 \| 2;
		debugDom?: boolean;
		llmProvider?: LLMProvider;
		headless?: boolean;
		logger?: (message: LogLine) => void;
		domSettleTimeoutMs?: number;
		browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams;
		enableCaching?: boolean;
		browserbaseResumeSessionID?: string;
		});
		init({ modelName, domSettleTimeoutMs, }?: {
		modelName?: AvailableModel;
		domSettleTimeoutMs?: number;
		}): Promise<{
		debugUrl: string;
		sessionUrl: string;
		}>;
		initFromPage(page: Page, modelName?: AvailableModel): Promise<{
		context: BrowserContext;
		}>;
		private actHandler?;
		private extractHandler?;
		private observeHandler?;
		constructor({ env, apiKey, projectId, verbose, debugDom, llmProvider, headless, logger, browserBaseSessionCreateParams, domSettleTimeoutMs, enableCaching, browserbaseResumeSessionID, modelName, modelClientOptions, }?: ConstructorParams);
		init({ modelName, modelClientOptions, domSettleTimeoutMs, }?: InitOptions): Promise<InitResult>;
		initFromPage({ page, modelName, modelClientOptions, }: InitFromPageOptions): Promise<InitFromPageResult>;
		private pending_logs_to_send_to_browserbase;
		@@ -123,33 +206,7 @@ private is_processing_browserbase_logs;
		private cleanupDomDebug;
		private _recordObservation;
		private _extract;
		private _observe;
		act({ action, modelName, useVision, variables, domSettleTimeoutMs, }: {
		action: string;
		modelName?: AvailableModel;
		useVision?: "fallback" \| boolean;
		variables?: Record<string, string>;
		domSettleTimeoutMs?: number;
		}): Promise<{
		success: boolean;
		message: string;
		action: string;
		}>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, domSettleTimeoutMs, }: {
		instruction: string;
		schema: T;
		modelName?: AvailableModel;
		domSettleTimeoutMs?: number;
		}): Promise<z.infer<T>>;
		observe(options?: {
		instruction?: string;
		modelName?: AvailableModel;
		useVision?: boolean;
		domSettleTimeoutMs?: number;
		}): Promise<{
		selector: string;
		description: string;
		}[]>;
		act({ action, modelName, modelClientOptions, useVision, variables, domSettleTimeoutMs, }: ActOptions): Promise<ActResult>;
		extract<T extends z.AnyZodObject>({ instruction, schema, modelName, modelClientOptions, domSettleTimeoutMs, }: ExtractOptions<T>): Promise<ExtractResult<T>>;
		observe(options?: ObserveOptions): Promise<ObserveResult[]>;
		}

		export { Stagehand };
		export { type ActOptions, type ActResult, type AvailableModel, type BrowserResult, type ClientOptions, type ConstructorParams, type ExtractOptions, type ExtractResult, type InitFromPageOptions, type InitFromPageResult, type InitOptions, type InitResult, type LogLine, type ModelProvider, type ObserveOptions, type ObserveResult, PlaywrightCommandException, PlaywrightCommandMethodNotSupportedException, Stagehand, type ToolCall };

package.json

		{
		"name": "@browserbasehq/stagehand",
		"version": "1.3.0",
		"version": "1.3.1-alpha-a855e1f8d85293182b22854ab5f95514f45bee1b",
		"description": "An AI web browsing framework focused on simplicity and extensibility.",
		@@ -14,10 +14,13 @@ "main": "./dist/index.js",
		"cache:clear": "rm -rf .cache",
		"evals": "npm run build-dom-scripts && npx braintrust eval evals/index.eval.ts",
		"evals": "npm run build-dom-scripts && tsx evals/index.eval.ts",
		"build-dom-scripts": "tsx lib/dom/genDomScripts.ts",
		"build": "npm run build-dom-scripts && tsup lib/index.ts --dts",
		"release": "changeset publish"
		"build-types": "tsc --emitDeclarationOnly --outDir dist",
		"build-js": "tsup lib/index.ts --dts",
		"build": "npm run build-dom-scripts && npm run build-js && npm run build-types",
		"postinstall": "npm run build",
		"release": "npm run build && changeset publish",
		"release-canary": "npm run build && changeset version --snapshot && changeset publish --tag alpha"
		},
		"files": [
		"dist/**",
		"patches/**"
		"dist/**"
		],
		@@ -34,4 +37,5 @@ "keywords": [],
		"autoevals": "^0.0.64",
		"braintrust": "^0.0.127",
		"braintrust": "^0.0.171",
		"cheerio": "^1.0.0",
		"chromium-bidi": "^0.10.0",
		"esbuild": "^0.21.4",
		@@ -41,3 +45,3 @@ "express": "^4.21.0",
		"prettier": "^3.2.5",
		"tsup": "^8.1.0",
		"tsup": "^8.2.1",
		"tsx": "^4.10.5",
		@@ -56,4 +60,2 @@ "typescript": "^5.2.2"
		"@browserbasehq/sdk": "^2.0.0",
		"anthropic": "^0.0.0",
		"anthropic-ai": "^0.0.10",
		"sharp": "^0.33.5",
		@@ -60,0 +62,0 @@ "zod-to-json-schema": "^3.23.3"

103

README.md

		@@ -14,3 +14,2 @@ <div id="toc" align="center">
		<p align="center">
		<a href="https://github.com/browserbase/stagehand/actions/workflows/ci.yml"><img alt="Build Status" src="https://github.com/browserbase/stagehand/actions/workflows/ci.yml/badge.svg" /></a>
		<a href="https://www.npmjs.com/package/@browserbasehq/stagehand"><img alt="NPM" src="https://img.shields.io/npm/v/@browserbasehq/stagehand.svg" /></a>
		@@ -31,2 +30,3 @@ <a href="https://github.com/browserbase/stagehand/blob/main/license"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue" /></a>
		- [How It Works](#how-it-works)
		- [Stagehand vs Playwright](#stagehand-vs-playwright)
		- [Prompting Tips](#prompting-tips)
		@@ -38,3 +38,3 @@ - [Roadmap](#roadmap)

		> [!NOTE]
		> [!NOTE]
		> `Stagehand` is currently available as an early release, and we're actively seeking feedback from the community. Please join our [Slack community](https://join.slack.com/t/stagehand-dev/shared_invite/zt-2tdncfgkk-fF8y5U0uJzR2y2_M9c9OJA) to stay updated on the latest developments and provide feedback.
		@@ -137,3 +137,13 @@

		- `env`: `'LOCAL'` or `'BROWSERBASE'`.
		- `env`: `'LOCAL'` or `'BROWSERBASE'`. Defaults to `'BROWSERBASE'`.
		- `modelName`: (optional) an `AvailableModel` string to specify the default model to use.
		- `modelClientOptions`: (optional) configuration options for the model client.
		- `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`.
		- `headless`: a `boolean` that determines if the browser runs in headless mode. Defaults to `false`. When the env is set to `BROWSERBASE`, this will be ignored.
		- `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. Defaults to 30000 (30 seconds).
		- `apiKey`: (optional) your Browserbase API key. Defaults to `BROWSERBASE_API_KEY` environment variable.
		- `projectId`: (optional) your Browserbase project ID. Defaults to `BROWSERBASE_PROJECT_ID` environment variable.
		- `browserBaseSessionCreateParams`: configuration options for creating new Browserbase sessions.
		- `browserbaseResumeSessionID`: ID of an existing Browserbase session to resume.
		- `logger`: a function that handles log messages. Useful for custom logging implementations.
		- `verbose`: an `integer` that enables several levels of logging during automation:
		@@ -144,4 +154,2 @@ - `0`: limited to no logging
		- `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation.
		- `domSettleTimeoutMs`: an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. It can be overriden in individual function calls if needed. Defaults to 30000 (30 seconds).
		- `enableCaching`: a `boolean` that enables caching of LLM responses. When set to `true`, the LLM requests will be cached on disk and reused for identical requests. Defaults to `false`.

		@@ -153,4 +161,23 @@ - Returns:
		- Example:

		```javascript
		// Basic usage
		const stagehand = new Stagehand();

		// Custom configuration
		const stagehand = new Stagehand({
		env: "LOCAL",
		verbose: 1,
		headless: true,
		enableCaching: true,
		logger: (logLine) => {
		console.log(`[${logLine.category}] ${logLine.message}`);
		},
		});

		// Resume existing Browserbase session
		const stagehand = new Stagehand({
		env: "BROWSERBASE",
		browserbaseResumeSessionID: "existing-session-id",
		});
		```
		@@ -166,3 +193,5 @@

		- `modelName`: (optional) an `AvailableModel` string to specify the model to use. This will be used for all other methods unless overridden. Defaults to `"gpt-4o"`.
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use. This will be used for all other methods unless overridden.
		- `modelClientOptions`: (optional) configuration options for the model client
		- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle

		@@ -186,6 +215,8 @@ - Returns:

		- `action`: a `string` describing the action to perform, e.g., `"search for 'x'"`.
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use.
		- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"`.
		- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization.
		- `action`: a `string` describing the action to perform
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use
		- `modelClientOptions`: (optional) configuration options for the model client
		- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"`
		- `variables`: (optional) a `Record<string, string>` of variables to use in the action. Variables in the action string are referenced using `%variable_name%`
		- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle

		@@ -200,4 +231,23 @@ - Returns:
		- Example:

		```javascript
		// Basic usage
		await stagehand.act({ action: "click on add to cart" });

		// Using variables
		await stagehand.act({
		action: "enter %username% into the username field",
		variables: {
		username: "john.doe@example.com",
		},
		});

		// Multiple variables
		await stagehand.act({
		action: "fill in the form with %username% and %password%",
		variables: {
		username: "john.doe",
		password: "secretpass123",
		},
		});
		```
		@@ -211,6 +261,7 @@

		- `instruction`: a `string` providing instructions for extraction.
		- `schema`: a `z.AnyZodObject` defining the structure of the data to extract.
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use.
		- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization.
		- `instruction`: a `string` providing instructions for extraction
		- `schema`: a `z.AnyZodObject` defining the structure of the data to extract
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use
		- `modelClientOptions`: (optional) configuration options for the model client
		- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle

		@@ -242,9 +293,13 @@ - Returns:

		- `instruction`: a `string` providing instructions for the observation.
		- `useVision`: (optional) a `boolean` or `"fallback"` to determine if vision-based processing should be used. Defaults to `"fallback"`.
		- `domSettleTimeoutMs`: (optional) an `integer` that specifies the timeout in milliseconds for waiting for the DOM to settle. If not set, defaults to the timeout value specified during initialization.
		- `instruction`: (optional) a `string` providing instructions for the observation. Defaults to "Find actions that can be performed on this page."
		- `modelName`: (optional) an `AvailableModel` string to specify the model to use
		- `modelClientOptions`: (optional) configuration options for the model client
		- `useVision`: (optional) a `boolean` to determine if vision-based processing should be used. Defaults to `false`
		- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle

		- Returns:

		- A `Promise` that resolves to an array of `string`s representing the actions that can be taken on the current page.
		- A `Promise` that resolves to an array of objects containing:
		- `selector`: a `string` representing the element selector
		- `description`: a `string` describing the possible action

		@@ -337,2 +392,8 @@ - Example:

		### Stagehand vs Playwright

		Below is an example of how to extract a list of companies from the AI Grant website using both Stagehand and Playwright.

		![](./docs/media/stagehand-playwright.png)

		## Prompting Tips
		@@ -455,4 +516,6 @@

		After that, you can run the eval using `npm run evals`
		After that, you can run all evals at once using `npm run evals`

		You can also run individual evals using `npm run evals -- your_eval_name`.

		### Adding new evals
		@@ -478,3 +541,3 @@

		Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanilla `esbuild` to build scripts that run in the DOM.
		Stagehand uses [tsup](https://github.com/egoist/tsup) to build the SDK and vanilla [esbuild](https://esbuild.github.io/d) to build the scripts that run in the DOM.

		@@ -481,0 +544,0 @@ 1. run `npm run build`

dist/index.js

Sorry, the diff of this file is too big to display

LICENSE

Sorry, the diff of this file is not supported yet

@browserbasehq/stagehand - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes