@empiricalrun/llm - npm Package Compare versions

Comparing version 0.9.4 to 0.9.5

CHANGELOG.md

		# @empiricalrun/llm

		## 0.9.5

		### Patch Changes

		- 0eb7700: feat: get annotated image for bbox
		- 55f21cf: feat: simpler user inputs for bounding box method

		## 0.9.4
		@@ -4,0 +11,0 @@

dist/vision/bbox/index.d.ts

		@@ -20,4 +20,5 @@ interface BoundingBox {
		};
		annotatedImage?: string;
		}
		export declare function getBoundingBox(base64Image: string, instruction: string, options?: {
		export declare function getBoundingBox(base64Image: string, elementDescription: string, options?: {
		debug?: true;
		@@ -24,0 +25,0 @@ }): Promise<BoundingBox>;

dist/vision/bbox/index.js

		@@ -7,7 +7,7 @@ "use strict";
		exports.getBoundingBox = void 0;
		const child_process_1 = require("child_process");
		const canvas_1 = require("canvas");
		const image_size_1 = __importDefault(require("image-size"));
		const __1 = require("../..");
		const utils_1 = require("../utils");
		async function getBoundingBox(base64Image, instruction, options = {}) {
		async function getBoundingBox(base64Image, elementDescription, options = {}) {
		if (!process.env.GOOGLE_API_KEY) {
		@@ -20,3 +20,3 @@ throw new Error("Missing GOOGLE_API_KEY: Required for bounding box.");
		});
		const common = `Give me the output as an array [y_min, x_min, y_max, x_max]. Return only the array and nothing else`;
		const content = `You are given a screenshot and a description of an element on the screen. You are an expert vision model that can identify bounding box for the element on the screen. Return the output as an array [y_min, x_min, y_max, x_max]. Return only the array and nothing else.\n\nElement description:`;
		const llmResponse = await llm.createChatCompletion({
		@@ -26,3 +26,3 @@ messages: [
		role: "system",
		content: `${instruction}\n${common}`,
		content: `${content}\n${elementDescription}`,
		},
		@@ -64,4 +64,3 @@ {
		try {
		const url = browserUrl(base64Image, bbox);
		copyToClipboard(url);
		bbox.annotatedImage = await annotatedImage(base64Image, bbox);
		}
		@@ -114,30 +113,20 @@ catch (e) {
		}
		function browserUrl(base64Image, bbox) {
		async function annotatedImage(base64Image, bbox) {
		const { corners, center } = bbox;
		const width = corners.max.x - corners.min.x;
		const height = corners.max.y - corners.min.y;
		return `
		data:text/html,<html><body><canvas id="c"></canvas><script>
		const canvas = document.getElementById('c');
		const ctx = canvas.getContext('2d');
		const img = new Image();
		img.onload = function() {
		canvas.width = img.width;
		canvas.height = img.height;
		ctx.drawImage(img, 0, 0);
		ctx.strokeStyle = 'red';
		ctx.lineWidth = 2;
		ctx.strokeRect(${corners.min.x}, ${corners.min.y}, ${width}, ${height});
		ctx.fillStyle = 'red';
		ctx.beginPath();
		ctx.arc(${center.x}, ${center.y}, 5, 0, 2 * Math.PI);
		ctx.fill();
		};
		img.src = 'data:image/png;base64,${base64Image}';
		</script></body></html>
		`;
		const imgBuffer = Buffer.from(base64Image, "base64");
		const dimensions = (0, image_size_1.default)(imgBuffer);
		const canvas = (0, canvas_1.createCanvas)(dimensions.width, dimensions.height);
		const ctx = canvas.getContext("2d");
		const image = await (0, canvas_1.loadImage)(imgBuffer);
		ctx.drawImage(image, 0, 0);
		ctx.strokeStyle = "red";
		ctx.lineWidth = 2;
		ctx.strokeRect(corners.min.x, corners.min.y, width, height);
		ctx.fillStyle = "red";
		ctx.beginPath();
		ctx.arc(center.x, center.y, 5, 0, 2 * Math.PI);
		ctx.fill();
		return canvas.toDataURL();
		}
		function copyToClipboard(text) {
		const textString = String(text);
		(0, child_process_1.execSync)(`echo "${textString.replace(/"/g, '\\"')}" \| pbcopy`);
		}

package.json

		{
		"name": "@empiricalrun/llm",
		"version": "0.9.4",
		"version": "0.9.5",
		"main": "dist/index.js",
		@@ -34,2 +34,5 @@ "exports": {
		},
		"peerDependencies": {
		"canvas": "^2.11.2"
		},
		"scripts": {
		@@ -36,0 +39,0 @@ "dev": "tsc --build --watch",

README.md

		@@ -54,28 +54,13 @@ # llm
		Bounding box can require some prompt iterations, and you can do that with a `debug` flag. This flag
		copies the output of the operation to your clipboard (macOS only.)
		returns a base64 image that has the bounding box drawn on top of the original image.

		Paste this output in the address bar of your browser to visualize the output.

		```ts
		const bbox = await getBoundingBox(data.toString('base64'), instruction, { debug: true });
		console.log(bbox.annotatedImage);
		```

		Example script for prompt iterations. The package should be installed and available to this script.
		To use this, you need to install the canvas npm package, which is a peer dependency.

		```js
		// Create script.mjs
		// Run as `node script.mjs`

		import fs from "fs";
		import { getBoundingBox } from "@empiricalrun/llm/vision";

		async function main() {
		const prompt = "What is the bounding box for the first dropdown menu?";
		const imagePath = "/path/to/the/image.png";
		const imageData = fs.readFileSync(imagePath).toString("base64");
		await getBoundingBox(imageData, prompt, { debug: true });
		// The debug output has been copied -- paste it in the web browser
		}

		main();
		```sh
		npm install canvas
		```

dist/vision/bbox/index.d.ts.map

Sorry, the diff of this file is not supported yet

@empiricalrun/llm - npm Package Compare versions

Fixed alerts

Improved metrics

Worsened metrics