@empiricalrun/llm
Advanced tools
Comparing version 0.9.4 to 0.9.5
# @empiricalrun/llm | ||
## 0.9.5 | ||
### Patch Changes | ||
- 0eb7700: feat: get annotated image for bbox | ||
- 55f21cf: feat: simpler user inputs for bounding box method | ||
## 0.9.4 | ||
@@ -4,0 +11,0 @@ |
@@ -20,4 +20,5 @@ interface BoundingBox { | ||
}; | ||
annotatedImage?: string; | ||
} | ||
export declare function getBoundingBox(base64Image: string, instruction: string, options?: { | ||
export declare function getBoundingBox(base64Image: string, elementDescription: string, options?: { | ||
debug?: true; | ||
@@ -24,0 +25,0 @@ }): Promise<BoundingBox>; |
@@ -7,7 +7,7 @@ "use strict"; | ||
exports.getBoundingBox = void 0; | ||
const child_process_1 = require("child_process"); | ||
const canvas_1 = require("canvas"); | ||
const image_size_1 = __importDefault(require("image-size")); | ||
const __1 = require("../.."); | ||
const utils_1 = require("../utils"); | ||
async function getBoundingBox(base64Image, instruction, options = {}) { | ||
async function getBoundingBox(base64Image, elementDescription, options = {}) { | ||
if (!process.env.GOOGLE_API_KEY) { | ||
@@ -20,3 +20,3 @@ throw new Error("Missing GOOGLE_API_KEY: Required for bounding box."); | ||
}); | ||
const common = `Give me the output as an array [y_min, x_min, y_max, x_max]. Return only the array and nothing else`; | ||
const content = `You are given a screenshot and a description of an element on the screen. You are an expert vision model that can identify bounding box for the element on the screen. Return the output as an array [y_min, x_min, y_max, x_max]. Return only the array and nothing else.\n\nElement description:`; | ||
const llmResponse = await llm.createChatCompletion({ | ||
@@ -26,3 +26,3 @@ messages: [ | ||
role: "system", | ||
content: `${instruction}\n${common}`, | ||
content: `${content}\n${elementDescription}`, | ||
}, | ||
@@ -64,4 +64,3 @@ { | ||
try { | ||
const url = browserUrl(base64Image, bbox); | ||
copyToClipboard(url); | ||
bbox.annotatedImage = await annotatedImage(base64Image, bbox); | ||
} | ||
@@ -114,30 +113,20 @@ catch (e) { | ||
} | ||
function browserUrl(base64Image, bbox) { | ||
async function annotatedImage(base64Image, bbox) { | ||
const { corners, center } = bbox; | ||
const width = corners.max.x - corners.min.x; | ||
const height = corners.max.y - corners.min.y; | ||
return ` | ||
data:text/html,<html><body><canvas id="c"></canvas><script> | ||
const canvas = document.getElementById('c'); | ||
const ctx = canvas.getContext('2d'); | ||
const img = new Image(); | ||
img.onload = function() { | ||
canvas.width = img.width; | ||
canvas.height = img.height; | ||
ctx.drawImage(img, 0, 0); | ||
ctx.strokeStyle = 'red'; | ||
ctx.lineWidth = 2; | ||
ctx.strokeRect(${corners.min.x}, ${corners.min.y}, ${width}, ${height}); | ||
ctx.fillStyle = 'red'; | ||
ctx.beginPath(); | ||
ctx.arc(${center.x}, ${center.y}, 5, 0, 2 * Math.PI); | ||
ctx.fill(); | ||
}; | ||
img.src = 'data:image/png;base64,${base64Image}'; | ||
</script></body></html> | ||
`; | ||
const imgBuffer = Buffer.from(base64Image, "base64"); | ||
const dimensions = (0, image_size_1.default)(imgBuffer); | ||
const canvas = (0, canvas_1.createCanvas)(dimensions.width, dimensions.height); | ||
const ctx = canvas.getContext("2d"); | ||
const image = await (0, canvas_1.loadImage)(imgBuffer); | ||
ctx.drawImage(image, 0, 0); | ||
ctx.strokeStyle = "red"; | ||
ctx.lineWidth = 2; | ||
ctx.strokeRect(corners.min.x, corners.min.y, width, height); | ||
ctx.fillStyle = "red"; | ||
ctx.beginPath(); | ||
ctx.arc(center.x, center.y, 5, 0, 2 * Math.PI); | ||
ctx.fill(); | ||
return canvas.toDataURL(); | ||
} | ||
function copyToClipboard(text) { | ||
const textString = String(text); | ||
(0, child_process_1.execSync)(`echo "${textString.replace(/"/g, '\\"')}" | pbcopy`); | ||
} |
{ | ||
"name": "@empiricalrun/llm", | ||
"version": "0.9.4", | ||
"version": "0.9.5", | ||
"main": "dist/index.js", | ||
@@ -34,2 +34,5 @@ "exports": { | ||
}, | ||
"peerDependencies": { | ||
"canvas": "^2.11.2" | ||
}, | ||
"scripts": { | ||
@@ -36,0 +39,0 @@ "dev": "tsc --build --watch", |
@@ -54,28 +54,13 @@ # llm | ||
Bounding box can require some prompt iterations, and you can do that with a `debug` flag. This flag | ||
copies the output of the operation to your clipboard (macOS only.) | ||
returns a base64 image that has the bounding box drawn on top of the original image. | ||
Paste this output in the address bar of your browser to visualize the output. | ||
```ts | ||
const bbox = await getBoundingBox(data.toString('base64'), instruction, { debug: true }); | ||
console.log(bbox.annotatedImage); | ||
``` | ||
Example script for prompt iterations. The package should be installed and available to this script. | ||
To use this, you need to install the canvas npm package, which is a peer dependency. | ||
```js | ||
// Create script.mjs | ||
// Run as `node script.mjs` | ||
import fs from "fs"; | ||
import { getBoundingBox } from "@empiricalrun/llm/vision"; | ||
async function main() { | ||
const prompt = "What is the bounding box for the first dropdown menu?"; | ||
const imagePath = "/path/to/the/image.png"; | ||
const imageData = fs.readFileSync(imagePath).toString("base64"); | ||
await getBoundingBox(imageData, prompt, { debug: true }); | ||
// The debug output has been copied -- paste it in the web browser | ||
} | ||
main(); | ||
```sh | ||
npm install canvas | ||
``` |
Sorry, the diff of this file is not supported yet
Shell access
Supply chain riskThis module accesses the system shell. Accessing the system shell increases the risk of executing arbitrary code.
Found 1 instance in 1 package
0
151387
6
546
66