@empiricalrun/llm
Advanced tools
Comparing version 0.9.13 to 0.9.14
# @empiricalrun/llm | ||
## 0.9.14 | ||
### Patch Changes | ||
- b1343f9: feat: use sharp instead of canvas for image manipulation | ||
## 0.9.13 | ||
@@ -4,0 +10,0 @@ |
@@ -1,26 +0,5 @@ | ||
interface BoundingBox { | ||
corners: { | ||
min: { | ||
x: number; | ||
y: number; | ||
}; | ||
max: { | ||
x: number; | ||
y: number; | ||
}; | ||
}; | ||
center: { | ||
x: number; | ||
y: number; | ||
}; | ||
container: { | ||
width: number; | ||
height: number; | ||
}; | ||
annotatedImage?: string; | ||
} | ||
import { BoundingBox } from "../types"; | ||
export declare function getBoundingBox(base64Image: string, elementDescription: string, options?: { | ||
debug?: true; | ||
}): Promise<BoundingBox>; | ||
export {}; | ||
//# sourceMappingURL=index.d.ts.map |
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.getBoundingBox = void 0; | ||
const canvas_1 = require("canvas"); | ||
const image_size_1 = __importDefault(require("image-size")); | ||
const __1 = require("../.."); | ||
const image_1 = require("../image"); | ||
const utils_1 = require("../utils"); | ||
@@ -61,3 +57,3 @@ async function getBoundingBox(base64Image, elementDescription, options = {}) { | ||
try { | ||
bbox.annotatedImage = await annotatedImage(base64Image, bbox); | ||
bbox.annotatedImage = await (0, image_1.drawBoxOnImage)(base64Image, bbox); | ||
} | ||
@@ -74,7 +70,3 @@ catch (e) { | ||
// Docs: https://ai.google.dev/gemini-api/docs/vision?lang=python#bbox | ||
const dimensions = (0, image_size_1.default)(Buffer.from(base64Image, "base64")); | ||
const { width, height } = dimensions; | ||
if (!width || !height) { | ||
throw new Error("Failed to get image dimensions"); | ||
} | ||
const { width, height } = (0, image_1.dimensions)(base64Image); | ||
const [y_min, x_min, y_max, x_max] = coordinates; | ||
@@ -111,20 +103,1 @@ function sy(value) { | ||
} | ||
async function annotatedImage(base64Image, bbox) { | ||
const { corners, center } = bbox; | ||
const width = corners.max.x - corners.min.x; | ||
const height = corners.max.y - corners.min.y; | ||
const imgBuffer = Buffer.from(base64Image, "base64"); | ||
const dimensions = (0, image_size_1.default)(imgBuffer); | ||
const canvas = (0, canvas_1.createCanvas)(dimensions.width, dimensions.height); | ||
const ctx = canvas.getContext("2d"); | ||
const image = await (0, canvas_1.loadImage)(imgBuffer); | ||
ctx.drawImage(image, 0, 0); | ||
ctx.strokeStyle = "red"; | ||
ctx.lineWidth = 2; | ||
ctx.strokeRect(corners.min.x, corners.min.y, width, height); | ||
ctx.fillStyle = "red"; | ||
ctx.beginPath(); | ||
ctx.arc(center.x, center.y, 5, 0, 2 * Math.PI); | ||
ctx.fill(); | ||
return canvas.toDataURL(); | ||
} |
@@ -1,16 +0,4 @@ | ||
export type Point = { | ||
x: number; | ||
y: number; | ||
}; | ||
export interface Coordinates { | ||
x: number; | ||
y: number; | ||
container: { | ||
width: number; | ||
height: number; | ||
}; | ||
annotatedImage: string; | ||
} | ||
import { Coordinates, Point } from "../types"; | ||
export declare function getCoordinatesFor(prompt: string, base64Image: string): Promise<Coordinates>; | ||
export declare function extractTapCoordinateFromString(inputString: string): Point; | ||
//# sourceMappingURL=index.d.ts.map |
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.extractTapCoordinateFromString = exports.getCoordinatesFor = void 0; | ||
const canvas_1 = require("canvas"); | ||
const image_size_1 = __importDefault(require("image-size")); | ||
const image_1 = require("../image"); | ||
const API_BASE_URL = process.env.VISION_MODEL_ENDPOINT; | ||
async function getCoordinatesFor(prompt, base64Image) { | ||
const llmResponse = await getCoordinateForElement(base64Image, prompt); | ||
const coordinates = extractTapCoordinateFromString(llmResponse); | ||
return drawRedDotAtPoint(base64Image, coordinates); | ||
const llmResponse = await getLlmResponse(base64Image, prompt); | ||
const pointFromLlm = extractTapCoordinateFromString(llmResponse); | ||
const scaledPoint = scaleForImage(pointFromLlm, base64Image); | ||
const annotatedImage = await (0, image_1.drawRedDotAtPoint)(base64Image, scaledPoint); | ||
const dims = (0, image_1.dimensions)(base64Image); | ||
return { | ||
x: scaledPoint.x, | ||
y: scaledPoint.y, | ||
container: { | ||
width: dims.width, | ||
height: dims.height, | ||
}, | ||
annotatedImage, | ||
}; | ||
} | ||
exports.getCoordinatesFor = getCoordinatesFor; | ||
function scaleForImage(coordinates, base64Image) { | ||
const dimensions = (0, image_size_1.default)(Buffer.from(base64Image, "base64")); | ||
const { width, height } = dimensions; | ||
if (!width || !height) { | ||
throw new Error("Failed to get image dimensions"); | ||
} | ||
const { width, height } = (0, image_1.dimensions)(base64Image); | ||
function sy(value) { | ||
// Molmo returns coordinates as percentages | ||
return (value * height) / 100; | ||
@@ -57,29 +61,3 @@ } | ||
exports.extractTapCoordinateFromString = extractTapCoordinateFromString; | ||
async function drawRedDotAtPoint(base64Image, coordinate) { | ||
const point = scaleForImage(coordinate, base64Image); | ||
const imgBuffer = Buffer.from(base64Image, "base64"); | ||
const dimensions = (0, image_size_1.default)(imgBuffer); | ||
if (!dimensions.width || !dimensions.height) { | ||
throw new Error("Failed to get image dimensions"); | ||
} | ||
const { width, height } = dimensions; | ||
const canvas = (0, canvas_1.createCanvas)(dimensions.width, dimensions.height); | ||
const ctx = canvas.getContext("2d"); | ||
const image = await (0, canvas_1.loadImage)(imgBuffer); | ||
ctx.drawImage(image, 0, 0); | ||
ctx.fillStyle = "red"; | ||
ctx.beginPath(); | ||
ctx.arc(point.x, point.y, 5, 0, 2 * Math.PI); | ||
ctx.fill(); | ||
return { | ||
x: point.x, | ||
y: point.y, | ||
container: { | ||
width, | ||
height, | ||
}, | ||
annotatedImage: canvas.toDataURL().split(",")[1], | ||
}; | ||
} | ||
async function getCoordinateForElement(base64Image, prompt) { | ||
async function getLlmResponse(base64Image, prompt) { | ||
const url = `${API_BASE_URL}/predict`; | ||
@@ -86,0 +64,0 @@ const response = await fetch(url, { |
{ | ||
"name": "@empiricalrun/llm", | ||
"version": "0.9.13", | ||
"version": "0.9.14", | ||
"main": "dist/index.js", | ||
@@ -38,2 +38,3 @@ "exports": { | ||
"portkey-ai": "^1.3.2", | ||
"sharp": "^0.33.5", | ||
"zod": "^3.23.8" | ||
@@ -44,5 +45,2 @@ }, | ||
}, | ||
"peerDependencies": { | ||
"canvas": "^2.11.2" | ||
}, | ||
"scripts": { | ||
@@ -49,0 +47,0 @@ "dev": "tsc --build --watch", |
@@ -64,7 +64,1 @@ # llm | ||
``` | ||
To use this, you need to install the canvas npm package, which is a peer dependency. | ||
```sh | ||
npm install canvas | ||
``` |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
7463276
54
665
64
+ Addedsharp@^0.33.5
+ Added@emnapi/runtime@1.3.1(transitive)
+ Added@img/sharp-darwin-arm64@0.33.5(transitive)
+ Added@img/sharp-darwin-x64@0.33.5(transitive)
+ Added@img/sharp-libvips-darwin-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-darwin-x64@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-arm@1.0.5(transitive)
+ Added@img/sharp-libvips-linux-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-s390x@1.0.4(transitive)
+ Added@img/sharp-libvips-linux-x64@1.0.4(transitive)
+ Added@img/sharp-libvips-linuxmusl-arm64@1.0.4(transitive)
+ Added@img/sharp-libvips-linuxmusl-x64@1.0.4(transitive)
+ Added@img/sharp-linux-arm@0.33.5(transitive)
+ Added@img/sharp-linux-arm64@0.33.5(transitive)
+ Added@img/sharp-linux-s390x@0.33.5(transitive)
+ Added@img/sharp-linux-x64@0.33.5(transitive)
+ Added@img/sharp-linuxmusl-arm64@0.33.5(transitive)
+ Added@img/sharp-linuxmusl-x64@0.33.5(transitive)
+ Added@img/sharp-wasm32@0.33.5(transitive)
+ Added@img/sharp-win32-ia32@0.33.5(transitive)
+ Added@img/sharp-win32-x64@0.33.5(transitive)
+ Addedcolor@4.2.3(transitive)
+ Addedcolor-convert@2.0.1(transitive)
+ Addedcolor-name@1.1.4(transitive)
+ Addedcolor-string@1.9.1(transitive)
+ Addedis-arrayish@0.3.2(transitive)
+ Addedsharp@0.33.5(transitive)
+ Addedsimple-swizzle@0.2.2(transitive)
+ Addedtslib@2.8.1(transitive)
- Removed@mapbox/node-pre-gyp@1.0.11(transitive)
- Removedabbrev@1.1.1(transitive)
- Removedagent-base@6.0.2(transitive)
- Removedansi-regex@5.0.1(transitive)
- Removedaproba@2.0.0(transitive)
- Removedare-we-there-yet@2.0.0(transitive)
- Removedbalanced-match@1.0.2(transitive)
- Removedbrace-expansion@1.1.11(transitive)
- Removedcanvas@2.11.2(transitive)
- Removedchownr@2.0.0(transitive)
- Removedcolor-support@1.1.3(transitive)
- Removedconcat-map@0.0.1(transitive)
- Removedconsole-control-strings@1.1.0(transitive)
- Removeddebug@4.4.0(transitive)
- Removeddecompress-response@4.2.1(transitive)
- Removeddelegates@1.0.0(transitive)
- Removedemoji-regex@8.0.0(transitive)
- Removedfs-minipass@2.1.0(transitive)
- Removedfs.realpath@1.0.0(transitive)
- Removedgauge@3.0.2(transitive)
- Removedglob@7.2.3(transitive)
- Removedhas-unicode@2.0.1(transitive)
- Removedhttps-proxy-agent@5.0.1(transitive)
- Removedinflight@1.0.6(transitive)
- Removedis-fullwidth-code-point@3.0.0(transitive)
- Removedmake-dir@3.1.0(transitive)
- Removedmimic-response@2.1.0(transitive)
- Removedminimatch@3.1.2(transitive)
- Removedminipass@3.3.65.0.0(transitive)
- Removedminizlib@2.1.2(transitive)
- Removedmkdirp@1.0.4(transitive)
- Removednan@2.22.0(transitive)
- Removednopt@5.0.0(transitive)
- Removednpmlog@5.0.1(transitive)
- Removedobject-assign@4.1.1(transitive)
- Removedonce@1.4.0(transitive)
- Removedpath-is-absolute@1.0.1(transitive)
- Removedreadable-stream@3.6.2(transitive)
- Removedrimraf@3.0.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsemver@6.3.1(transitive)
- Removedset-blocking@2.0.0(transitive)
- Removedsignal-exit@3.0.7(transitive)
- Removedsimple-concat@1.0.1(transitive)
- Removedsimple-get@3.1.1(transitive)
- Removedstring-width@4.2.3(transitive)
- Removedstring_decoder@1.3.0(transitive)
- Removedstrip-ansi@6.0.1(transitive)
- Removedtar@6.2.1(transitive)
- Removedutil-deprecate@1.0.2(transitive)
- Removedwide-align@1.1.5(transitive)
- Removedwrappy@1.0.2(transitive)
- Removedyallist@4.0.0(transitive)