@browserbasehq/stagehand - npm Package Compare versions

Comparing version 1.10.0-alpha-7c484124db7bb587ea1b23631c29e3940cdf3a4e to 1.10.0-alpha-9c20de3e66f0ac20374d5e5e02eb107c620a2263

lib/handlers/extractHandler.ts

		@@ -250,3 +250,5 @@ import { z } from "zod";
		};
		allAnnotations.push(annotation);
		if (annotation.text.length > 0) {
		allAnnotations.push(annotation);
		}
		}
		@@ -253,0 +255,0 @@ }

lib/llm/OpenAIClient.ts

		@@ -219,3 +219,3 @@ import OpenAI, { ClientOptions } from "openai";

		Do not include any other text, formating or markdown in your output. Do not include \`\`\` or \`\`\`json in your response. Only the JSON object itself.`,
		Do not include any other text, formatting or markdown in your output. Do not include \`\`\` or \`\`\`json in your response. Only the JSON object itself.`,
		});
		@@ -222,0 +222,0 @@ } catch (error) {

163

lib/utils.ts

		@@ -6,2 +6,7 @@ import crypto from "crypto";

		// This is a heuristic for the width of a character in pixels. It seems to work
		// better than attempting to calculate character widths dynamically, which sometimes
		// results in collisions when placing characters on the "canvas".
		const HEURISTIC_CHAR_WIDTH = 5;

		export function generateId(operation: string) {
		@@ -30,8 +35,4 @@ return crypto.createHash("sha256").update(operation).digest("hex");
		): string {
		// 1: Estimate the average character width in pixels by examining the text annotations.
		// If no reliable measurement is found, default to 10 pixels per character.
		const charWidth = estimateCharacterWidth(textAnnotations) \|\| 10;

		// 2: Create a copy of textAnnotations and sort them by their vertical position (y-coordinate),
		// ensuring that topmost annotations appear first and bottommost appear last.
		// 1: Sort annotations by vertical position (y-coordinate).
		// The topmost annotations appear first, the bottommost last.
		const sortedAnnotations = [...textAnnotations].sort(
		@@ -41,6 +42,5 @@ (a, b) => a.bottom_left.y - b.bottom_left.y,

		// 3: Group annotations by their line position. We use a small epsilon to handle
		// floating-point differences. Two annotations are considered on the same line if their
		// y-coordinates differ by less than epsilon.
		const epsilon = 0.0001;
		// 2: Group annotations by line based on their y-coordinate.
		// We use an epsilon so that very close y-values are treated as the same line.
		const epsilon = 1;
		const lineMap: Map<number, TextAnnotation[]> = new Map();
		@@ -50,5 +50,3 @@
		let foundLineY: number \| undefined;

		// 4: Check if the annotation belongs to an existing line group.
		// If so, add it to that line. Otherwise, start a new line group.
		// 3: Check if this annotation belongs to any existing line group.
		for (const key of lineMap.keys()) {
		@@ -61,2 +59,3 @@ if (Math.abs(key - annotation.bottom_left.y) < epsilon) {

		// If found, push into that line; otherwise, create a new line entry.
		if (foundLineY !== undefined) {
		@@ -69,8 +68,6 @@ lineMap.get(foundLineY)!.push(annotation);

		// 5: Extract all line keys (y-coordinates) and sort them to process lines top-to-bottom.
		// 4: Get all unique y-coordinates for lines and sort them top-to-bottom.
		const lineYs = Array.from(lineMap.keys()).sort((a, b) => a - b);

		// 6: For each line, group words together and calculate the maximum normalized end position (maxNormalizedEndX).
		// This will help determine the necessary canvas width to accommodate all text.
		let maxNormalizedEndX = 0;
		// 5: Build an array of "final lines" (TextAnnotations[]) by grouping words for each line.
		const finalLines: TextAnnotation[][] = [];
		@@ -81,30 +78,45 @@

		// 7: Sort annotations in the current line by their horizontal position (x-coordinate),
		// ensuring left-to-right ordering.
		// 6: Sort annotations in the current line left-to-right by x-coordinate.
		lineAnnotations.sort((a, b) => a.bottom_left.x - b.bottom_left.x);

		// 8: Group nearby annotations into word clusters, forming logical sentences or phrases.
		// 7: Group annotations into word clusters (sentences/phrases).
		const groupedLineAnnotations = groupWordsInSentence(lineAnnotations);

		// 9: Determine how far to the right the text in this line extends, normalized by page width.
		// Update maxNormalizedEndX to track the widest line encountered.
		for (const ann of groupedLineAnnotations) {
		const textLengthInPx = ann.text.length * charWidth;
		const normalizedTextLength = textLengthInPx / pageWidth;
		const endX = ann.bottom_left_normalized.x + normalizedTextLength;
		if (endX > maxNormalizedEndX) {
		maxNormalizedEndX = endX;
		// 8: Push the grouped annotations for this line into finalLines.
		finalLines.push(groupedLineAnnotations);
		}

		// -------------------------
		// First Pass: Calculate the width of the longest line (in characters) up front.
		// We will use this to set the width of the canvas, which will reduce likelihood of collisions.
		// -------------------------
		let maxLineWidthInChars = 0;

		for (const line of finalLines) {
		let lineMaxEnd = 0;
		for (const ann of line) {
		// Convert normalized X to character index
		const startXInChars = Math.round(
		ann.bottom_left_normalized.x * (pageWidth / HEURISTIC_CHAR_WIDTH),
		);
		// Each annotation spans ann.text.length characters
		const endXInChars = startXInChars + ann.text.length;

		if (endXInChars > lineMaxEnd) {
		lineMaxEnd = endXInChars;
		}
		}

		// 10: Save the processed line to finalLines for later rendering.
		finalLines.push(groupedLineAnnotations);
		// Track the largest width across all lines
		if (lineMaxEnd > maxLineWidthInChars) {
		maxLineWidthInChars = lineMaxEnd;
		}
		}

		// 11: Determine the canvas width in characters. We scale according to maxNormalizedEndX and page width.
		// Add a small buffer (20 chars) to ensure no text overflows the canvas.
		let canvasWidth = Math.ceil(maxNormalizedEndX * (pageWidth / charWidth)) + 20;
		canvasWidth = Math.max(canvasWidth, 1);
		// 9: Add a 20-char buffer to ensure we don’t cut off text.
		maxLineWidthInChars += 20;

		// 12: Compute the baseline (lowest point) of each line. This helps us understand vertical spacing.
		// 10: Determine the canvas width based on the measured maxLineWidthInChars.
		const canvasWidth = Math.max(maxLineWidthInChars, 1);

		// 11: Compute the baseline (lowest y) of each line to measure vertical spacing.
		const lineBaselines = finalLines.map((line) =>
		@@ -114,3 +126,3 @@ Math.min(...line.map((a) => a.bottom_left.y)),

		// 13: Compute vertical gaps between consecutive lines to determine line spacing.
		// 12: Compute the gaps between consecutive lines.
		const verticalGaps: number[] = [];
		@@ -121,31 +133,30 @@ for (let i = 1; i < lineBaselines.length; i++) {

		// 14: Estimate what a "normal" line spacing is by taking the median of all vertical gaps.
		// 13: Estimate a "normal" line spacing via the median of these gaps.
		const normalLineSpacing = verticalGaps.length > 0 ? median(verticalGaps) : 0;

		// 15: Create a 2D character canvas initialized with spaces, onto which we'll "print" text lines.
		// 14: Create a 2D character canvas (array of arrays), filled with spaces.
		let canvas: string[][] = [];

		// 16: lineIndex represents the current line of the canvas. Initialize with -1 so the first line starts at 0.
		// 15: lineIndex tracks which row of the canvas we’re on; start at -1 so the first line is index 0.
		let lineIndex = -1;

		// 17: Iterate over each line of processed text.
		// 16: Render each line of text into our canvas.
		for (let i = 0; i < finalLines.length; i++) {
		if (i === 0) {
		// 18: For the first line, just increment lineIndex to start at 0 with no extra spacing.
		// 17: For the very first line, just increment lineIndex once.
		lineIndex++;
		ensureLineExists(canvas, lineIndex, canvasWidth);
		} else {
		// 19: For subsequent lines, calculate how many extra blank lines to insert based on spacing.
		// **18: For subsequent lines, figure out how many blank lines to insert
		// based on the gap between this line’s baseline and the previous line’s baseline.**
		const gap = lineBaselines[i] - lineBaselines[i - 1];

		let extraLines = 0;
		// 20: If we have a known normal line spacing, and the gap is larger than expected,
		// insert extra blank lines proportional to the ratio of gap to normal spacing.
		if (normalLineSpacing > 0) {
		if (gap > 1.2 * normalLineSpacing) {
		extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0);
		}
		// **19: If the gap is significantly larger than the "normal" spacing,
		// insert blank lines proportionally.**
		if (normalLineSpacing > 0 && gap > 1.2 * normalLineSpacing) {
		extraLines = Math.max(Math.round(gap / normalLineSpacing) - 1, 0);
		}

		// 21: Insert the calculated extra blank lines to maintain approximate vertical spacing.
		// 20: Insert the calculated extra blank lines.
		for (let e = 0; e < extraLines; e++) {
		@@ -156,3 +167,3 @@ lineIndex++;

		// 22: After adjusting for spacing, increment lineIndex for the current line of text.
		// 21: Move to the next line (row) in the canvas for this line’s text.
		lineIndex++;
		@@ -162,15 +173,17 @@ ensureLineExists(canvas, lineIndex, canvasWidth);

		// 23: Now place the annotations for the current line onto the canvas at the appropriate horizontal positions.
		// 22: Place each annotation’s text in the correct horizontal position for this line.
		const lineAnnotations = finalLines[i];
		for (const annotation of lineAnnotations) {
		const text = annotation.text;
		// 24: Calculate the starting x-position in the canvas based on normalized coordinates.

		// 23: Calculate the starting x-position in the canvas by converting normalized x to char space.
		const startXInChars = Math.round(
		annotation.bottom_left_normalized.x * canvasWidth,
		annotation.bottom_left_normalized.x *
		(pageWidth / HEURISTIC_CHAR_WIDTH),
		);

		// 25: Place each character of the annotation text into the canvas.
		// 24: Place each character of the annotation in the canvas.
		for (let j = 0; j < text.length; j++) {
		const xPos = startXInChars + j;
		// 26: Ensure we don't exceed the canvas width.
		// 25: Don’t write beyond the right edge of the canvas.
		if (xPos < canvasWidth) {
		@@ -183,3 +196,3 @@ canvas[lineIndex][xPos] = text[j];

		// 27: Trim trailing whitespace from each line to create a cleaner output.
		// 26: Trim trailing whitespace from each line to clean up the output.
		canvas = canvas.map((row) => {
		@@ -190,11 +203,11 @@ const lineStr = row.join("");

		// 29: Join all lines to form the final page text. Trim any trailing whitespace from the entire text.
		// 27: Combine all rows into a single string, separating rows with newlines.
		let pageText = canvas.map((line) => line.join("")).join("\n");
		pageText = pageText.trimEnd();

		// 30: Surround the page text with lines of dashes to clearly delineate the text block.
		// 28: Surround the rendered text with lines of dashes for clarity.
		pageText =
		"-".repeat(canvasWidth) + "\n" + pageText + "\n" + "-".repeat(canvasWidth);

		// 31: Return the fully formatted text.
		// 29: Return the final formatted text.
		return pageText;
		@@ -225,24 +238,2 @@ }
		/**
		* `estimateCharacterWidth` estimates the average character width (in pixels) from a collection of text annotations.
		* It calculates the width per character for each annotation and uses their median as the result.
		* If no annotations are available or they have zero-length text, returns 0.
		*
		* @param textAnnotations - An array of text annotations with text and width fields.
		* @returns The median character width in pixels, or 0 if none can be calculated.
		*/
		function estimateCharacterWidth(textAnnotations: TextAnnotation[]): number {
		// collect width-per-character measurements from each annotation
		const charWidths: number[] = [];
		for (const annotation of textAnnotations) {
		const length = annotation.text.length;
		if (length > 0) {
		charWidths.push(annotation.width / length);
		}
		}

		// return the median of all collected measurements
		return median(charWidths);
		}

		/**
		* `groupWordsInSentence` groups annotations within a single line into logical "words" or "sentences".
		@@ -270,3 +261,3 @@ * It uses a set of heuristics involving horizontal proximity and similar height
		// use a padding factor to allow slight spaces between words
		const padding = 2;
		const padding = 1;
		const lastAnn = currentGroup[currentGroup.length - 1];
		@@ -295,4 +286,6 @@ const characterWidth = (lastAnn.width / lastAnn.text.length) * padding;
		const groupedAnnotation = createGroupedAnnotation(currentGroup);
		groupedAnnotations.push(groupedAnnotation);
		currentGroup = [annotation];
		if (groupedAnnotation.text.length > 0) {
		groupedAnnotations.push(groupedAnnotation);
		currentGroup = [annotation];
		}
		}
		@@ -299,0 +292,0 @@ }

package.json

		{
		"name": "@browserbasehq/stagehand",
		"version": "1.10.0-alpha-7c484124db7bb587ea1b23631c29e3940cdf3a4e",
		"version": "1.10.0-alpha-9c20de3e66f0ac20374d5e5e02eb107c620a2263",
		"description": "An AI web browsing framework focused on simplicity and extensibility.",
		@@ -5,0 +5,0 @@ "main": "./dist/index.js",

README.md

		@@ -38,2 +38,6 @@ <div id="toc" align="center">

		<p align="center">
		<a href="https://trendshift.io/repositories/12122" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12122" alt="browserbase%2Fstagehand \| Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
		</p>

		---
		@@ -40,0 +44,0 @@

dist/index.js

Sorry, the diff of this file is too big to display

@browserbasehq/stagehand - npm Package Compare versions

Improved metrics

Worsened metrics