@zumer/snapdom-plugins
Advanced tools
+376
| /** | ||
| * agentMap – Official SnapDOM Plugin | ||
| * | ||
| * Produces a Set-of-Mark package for visual agents: an annotated screenshot | ||
| * with numbered badges on interactive elements, plus a compact JSON map from | ||
| * badge index → role / name / bbox / state. Designed for one-call capture | ||
| * on the client side — visual agents, computer-use harnesses, dataset | ||
| * generation for vision training, visual QA. | ||
| * | ||
| * Usage: | ||
| * import { agentMap } from '@zumer/snapdom-plugins/agent-map'; | ||
| * const result = await snapdom(el, { plugins: [agentMap()] }); | ||
| * const { image, map, dimensions } = await result.toAgentMap(); | ||
| * | ||
| * // model reply: "click element 2" → map[2].b gives [x, y, w, h] | ||
| * | ||
| * @param {Object} [options] | ||
| * @param {'annotated'|'raw'|false} [options.image='annotated'] Image output | ||
| * mode. 'annotated' draws numbered badges on the rendered image, 'raw' | ||
| * returns the image without badges, false skips image generation (cheapest). | ||
| * @param {'minimal'|'full'} [options.fields='minimal'] Per-entry shape. | ||
| * 'minimal' returns {i, n, r, b, s?}. 'full' adds {t (text), a (attrs)}. | ||
| * @param {boolean} [options.semantic=false] Include non-interactive semantic | ||
| * elements (headings, paragraphs, nav, main, landmarks). Off by default — | ||
| * agents typically only act on interactive. | ||
| * @param {number} [options.maxImageWidth=1024] Downscale target for the image. | ||
| * @param {'png'|'jpg'|'webp'} [options.imageFormat='png'] Image format. | ||
| * @param {number} [options.imageQuality=0.8] Quality for lossy formats. | ||
| * @param {string} [options.interactiveSelector] CSS selector (default below). | ||
| * @param {string} [options.semanticSelector] CSS selector (default below). | ||
| * @param {Object} [options.labelStyle={}] Override badge styles. | ||
| * @returns {Object} SnapDOM plugin | ||
| */ | ||
| const DEFAULT_INTERACTIVE = | ||
| 'a[href], button, input, select, textarea, ' + | ||
| '[role="button"], [role="link"], [role="tab"], [role="menuitem"], [role="checkbox"], [role="radio"], [role="switch"], [role="slider"], [role="combobox"], [role="textbox"], ' + | ||
| '[tabindex]:not([tabindex="-1"]), summary, [contenteditable="true"]'; | ||
| const DEFAULT_SEMANTIC = | ||
| 'h1, h2, h3, h4, h5, h6, nav, main, article, section, header, footer, ' + | ||
| 'figcaption, blockquote, legend, p'; | ||
| export function agentMap(options = {}) { | ||
| const { | ||
| image = 'annotated', | ||
| fields = 'minimal', | ||
| semantic = false, | ||
| maxImageWidth = 1024, | ||
| imageFormat = 'png', | ||
| imageQuality = 0.8, | ||
| interactiveSelector = DEFAULT_INTERACTIVE, | ||
| semanticSelector = DEFAULT_SEMANTIC, | ||
| labelStyle = {}, | ||
| } = options; | ||
| return { | ||
| name: 'agent-map', | ||
| afterClone(ctx) { | ||
| const meta = extractMap( | ||
| ctx.element, | ||
| interactiveSelector, | ||
| semantic ? semanticSelector : null, | ||
| fields | ||
| ); | ||
| // snapdom's export ctx is a fresh spread of ctx.options, so we stash on | ||
| // both for the agentMap() call below to find it. | ||
| ctx.__agentMapMeta = meta; | ||
| if (ctx.options) ctx.options.__agentMapMeta = meta; | ||
| if (image === 'annotated') { | ||
| addAnnotations(ctx.clone, meta.map, labelStyle); | ||
| } | ||
| }, | ||
| defineExports() { | ||
| return { | ||
| agentMap: async (ctx, opts = {}) => { | ||
| const meta = ctx.__agentMapMeta; | ||
| const wantImage = opts.image !== undefined ? opts.image : image; | ||
| if (!meta || !meta.map.length) { | ||
| const out = { dimensions: { width: 0, height: 0 }, map: [] }; | ||
| if (wantImage) out.image = ctx.export.url; | ||
| return out; | ||
| } | ||
| const format = opts.imageFormat || imageFormat; | ||
| const quality = opts.imageQuality || imageQuality; | ||
| const maxWidth = opts.maxImageWidth || maxImageWidth; | ||
| // Scale dimensions — whether we rasterize or not, bboxes get resized | ||
| // to the target output size so callers can overlay them on the image. | ||
| let w, h, dataURL; | ||
| if (wantImage) { | ||
| const img = new Image(); | ||
| img.src = ctx.export.url; | ||
| await new Promise((res, rej) => { img.onload = res; img.onerror = rej; }); | ||
| const ratio = img.naturalWidth > maxWidth ? maxWidth / img.naturalWidth : 1; | ||
| w = Math.round(img.naturalWidth * ratio); | ||
| h = Math.round(img.naturalHeight * ratio); | ||
| const canvas = document.createElement('canvas'); | ||
| canvas.width = w; | ||
| canvas.height = h; | ||
| canvas.getContext('2d').drawImage(img, 0, 0, w, h); | ||
| const mime = | ||
| format === 'jpg' || format === 'jpeg' ? 'image/jpeg' | ||
| : format === 'webp' ? 'image/webp' | ||
| : 'image/png'; | ||
| dataURL = canvas.toDataURL(mime, quality); | ||
| } else { | ||
| const sourceW = meta.dimensions.width || 1; | ||
| const ratio = sourceW > maxWidth ? maxWidth / sourceW : 1; | ||
| w = Math.round(sourceW * ratio); | ||
| h = Math.round(meta.dimensions.height * ratio); | ||
| } | ||
| const sx = w / (meta.dimensions.width || 1); | ||
| const sy = h / (meta.dimensions.height || 1); | ||
| const scaledMap = meta.map.map(e => { | ||
| const scaled = { ...e, b: [ | ||
| Math.round(e.b[0] * sx), | ||
| Math.round(e.b[1] * sy), | ||
| Math.round(e.b[2] * sx), | ||
| Math.round(e.b[3] * sy), | ||
| ] }; | ||
| return scaled; | ||
| }); | ||
| const out = { dimensions: { width: w, height: h }, map: scaledMap }; | ||
| if (wantImage) out.image = dataURL; | ||
| return out; | ||
| }, | ||
| }; | ||
| }, | ||
| }; | ||
| } | ||
| /* ── Role derivation ────────────────────────────── */ | ||
| function deriveRole(el) { | ||
| const explicit = el.getAttribute('role'); | ||
| if (explicit) return explicit; | ||
| const tag = el.tagName.toLowerCase(); | ||
| const type = (el.type || '').toLowerCase(); | ||
| if (tag === 'button') return 'button'; | ||
| if (tag === 'a' && el.hasAttribute('href')) return 'link'; | ||
| if (tag === 'input') { | ||
| if (type === 'checkbox') return 'checkbox'; | ||
| if (type === 'radio') return 'radio'; | ||
| if (type === 'range') return 'slider'; | ||
| if (type === 'file') return 'file'; | ||
| if (type === 'submit' || type === 'button' || type === 'reset' || type === 'image') return 'button'; | ||
| return 'textbox'; | ||
| } | ||
| if (tag === 'select') return 'combobox'; | ||
| if (tag === 'textarea') return 'textbox'; | ||
| if (tag === 'summary') return 'button'; | ||
| if (tag === 'details') return 'group'; | ||
| if (/^h[1-6]$/.test(tag)) return 'heading'; | ||
| if (tag === 'nav') return 'navigation'; | ||
| if (tag === 'main') return 'main'; | ||
| if (tag === 'header') return 'banner'; | ||
| if (tag === 'footer') return 'contentinfo'; | ||
| if (tag === 'article') return 'article'; | ||
| if (tag === 'section') return 'region'; | ||
| if (tag === 'p') return 'paragraph'; | ||
| if (tag === 'img') return 'image'; | ||
| return tag; | ||
| } | ||
| /* ── Accessible name ────────────────────────────── */ | ||
| function accessibleName(el) { | ||
| const ariaLabel = el.getAttribute('aria-label'); | ||
| if (ariaLabel && ariaLabel.trim()) return ariaLabel.trim(); | ||
| const labelledBy = el.getAttribute('aria-labelledby'); | ||
| if (labelledBy) { | ||
| const root = el.getRootNode(); | ||
| const getById = (id) => | ||
| root && typeof root.getElementById === 'function' | ||
| ? root.getElementById(id) : document.getElementById(id); | ||
| const parts = labelledBy.trim().split(/\s+/) | ||
| .map(id => { const r = getById(id); return r ? (r.textContent || '').trim() : ''; }) | ||
| .filter(Boolean); | ||
| if (parts.length) return parts.join(' '); | ||
| } | ||
| if (el.tagName === 'IMG' || (el.tagName === 'INPUT' && (el.type || '').toLowerCase() === 'image')) { | ||
| const alt = el.getAttribute('alt'); | ||
| if (alt && alt.trim()) return alt.trim(); | ||
| } | ||
| const title = el.getAttribute('title'); | ||
| if (title && title.trim()) return title.trim(); | ||
| if (el.labels && el.labels[0]) { | ||
| const t = (el.labels[0].textContent || '').trim(); | ||
| if (t) return t; | ||
| } | ||
| const text = (el.textContent || '').replace(/\s+/g, ' ').trim(); | ||
| if (text) return text.length > 60 ? text.slice(0, 59) + '…' : text; | ||
| return ''; | ||
| } | ||
| /* ── State extraction ───────────────────────────── */ | ||
| /** | ||
| * Builds the `s` (state) object. Only meaningful states — never default | ||
| * values that add no signal for an agent. Critically, aria-expanded and | ||
| * aria-pressed are included for BOTH values (true and false) because | ||
| * "pressed: false" on a toggle is meaningful information. | ||
| */ | ||
| function deriveState(el, role, rect) { | ||
| const s = {}; | ||
| try { | ||
| if (el.matches(':checked')) s.checked = true; | ||
| else if (role === 'checkbox' || role === 'radio') { | ||
| // include checked:false for form groups where an agent needs to | ||
| // know "unchecked" is a valid state distinct from "not a checkbox". | ||
| s.checked = false; | ||
| } | ||
| if (el.matches(':disabled')) s.disabled = true; | ||
| if (el.matches(':focus')) s.focus = true; | ||
| } catch { /* exotic nodes */ } | ||
| const expanded = el.getAttribute('aria-expanded'); | ||
| if (expanded === 'true') s.expanded = true; | ||
| else if (expanded === 'false') s.expanded = false; | ||
| const pressed = el.getAttribute('aria-pressed'); | ||
| if (pressed === 'true') s.pressed = true; | ||
| else if (pressed === 'false') s.pressed = false; | ||
| const selected = el.getAttribute('aria-selected'); | ||
| if (selected === 'true') s.selected = true; | ||
| else if (selected === 'false' && (role === 'tab' || role === 'option')) s.selected = false; | ||
| if (el.tagName === 'INPUT') { | ||
| const type = (el.type || 'text').toLowerCase(); | ||
| if (type !== 'checkbox' && type !== 'radio' && type !== 'submit' && type !== 'button' && type !== 'reset' && el.value) { | ||
| s.value = el.value; | ||
| } | ||
| } else if (el.tagName === 'TEXTAREA') { | ||
| if (el.value) s.value = el.value; | ||
| } else if (el.tagName === 'SELECT') { | ||
| s.value = el.value; | ||
| const opt = el.options && el.options[el.selectedIndex]; | ||
| if (opt) s.selectedText = opt.text || ''; | ||
| } else if (el.tagName === 'DETAILS') { | ||
| s.open = !!el.open; | ||
| } | ||
| // Covered — element visually occluded by something else at its center. | ||
| if (rect && rect.width && rect.height) { | ||
| const cx = rect.left + rect.width / 2; | ||
| const cy = rect.top + rect.height / 2; | ||
| if (cx >= 0 && cy >= 0) { | ||
| const doc = el.ownerDocument || document; | ||
| if (doc.elementFromPoint) { | ||
| const top = doc.elementFromPoint(cx, cy); | ||
| if (top && top !== el && !(el.contains && el.contains(top))) { | ||
| s.covered = true; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return Object.keys(s).length ? s : null; | ||
| } | ||
| /* ── Map extraction ─────────────────────────────── */ | ||
| function extractMap(element, interactiveSelector, semanticSelector, fields) { | ||
| const rootRect = element.getBoundingClientRect(); | ||
| const map = []; | ||
| let i = 0; | ||
| const tracked = new Set(); | ||
| for (const el of element.querySelectorAll(interactiveSelector)) { | ||
| const entry = buildEntry(el, rootRect, i, fields, 'interactive'); | ||
| if (entry) { map.push(entry); tracked.add(el); i++; } | ||
| } | ||
| if (semanticSelector) { | ||
| for (const el of element.querySelectorAll(semanticSelector)) { | ||
| if (tracked.has(el)) continue; | ||
| const entry = buildEntry(el, rootRect, i, fields, 'semantic'); | ||
| if (entry) { map.push(entry); i++; } | ||
| } | ||
| } | ||
| return { | ||
| map, | ||
| dimensions: { width: rootRect.width, height: rootRect.height }, | ||
| }; | ||
| } | ||
| function buildEntry(el, rootRect, i, fields, kind) { | ||
| const rect = el.getBoundingClientRect(); | ||
| const b = [ | ||
| Math.round(rect.left - rootRect.left), | ||
| Math.round(rect.top - rootRect.top), | ||
| Math.round(rect.width), | ||
| Math.round(rect.height), | ||
| ]; | ||
| if (b[2] <= 0 && b[3] <= 0) return null; | ||
| const role = deriveRole(el); | ||
| const n = accessibleName(el); | ||
| const entry = { i, n, r: role, b }; | ||
| if (kind === 'interactive') { | ||
| const s = deriveState(el, role, rect); | ||
| if (s) entry.s = s; | ||
| } | ||
| if (fields === 'full') { | ||
| const t = (el.textContent || '').replace(/\s+/g, ' ').trim(); | ||
| if (t && t !== n) entry.t = t.length > 160 ? t.slice(0, 159) + '…' : t; | ||
| const a = {}; | ||
| for (const name of ['href', 'type', 'name', 'placeholder', 'alt', 'title', 'role', 'aria-label']) { | ||
| const v = el.getAttribute(name); | ||
| if (v && v !== 'false') a[name] = v; | ||
| } | ||
| if (Object.keys(a).length) entry.a = a; | ||
| } | ||
| return entry; | ||
| } | ||
| /* ── Annotations ────────────────────────────────── */ | ||
| function addAnnotations(clone, entries, customStyle) { | ||
| const interactive = entries.filter(e => !e.isSemanticOnly); | ||
| if (!interactive.length) return; | ||
| const overlay = document.createElement('div'); | ||
| overlay.setAttribute('data-snap-agent-overlay', 'true'); | ||
| Object.assign(overlay.style, { | ||
| position: 'absolute', | ||
| top: '0', left: '0', width: '100%', height: '100%', | ||
| pointerEvents: 'none', | ||
| zIndex: '2147483647', | ||
| overflow: 'visible', | ||
| }); | ||
| for (const e of interactive) { | ||
| const badge = document.createElement('span'); | ||
| badge.textContent = String(e.i); | ||
| const cx = e.b[0] + e.b[2] / 2; | ||
| const cy = e.b[1] + e.b[3] / 2; | ||
| Object.assign(badge.style, { | ||
| position: 'absolute', | ||
| left: cx + 'px', top: cy + 'px', | ||
| transform: 'translate(-50%, -50%)', | ||
| minWidth: '18px', height: '18px', | ||
| lineHeight: '18px', fontSize: '11px', fontWeight: '700', | ||
| fontFamily: 'system-ui, -apple-system, sans-serif', | ||
| color: '#fff', backgroundColor: 'rgba(220, 38, 38, 0.92)', | ||
| borderRadius: '9px', textAlign: 'center', padding: '0 4px', | ||
| boxSizing: 'border-box', boxShadow: '0 1px 3px rgba(0,0,0,0.3)', | ||
| ...customStyle, | ||
| }); | ||
| overlay.appendChild(badge); | ||
| } | ||
| clone.style.position = 'relative'; | ||
| clone.appendChild(overlay); | ||
| } |
+33
-17
@@ -12,9 +12,17 @@ /** | ||
| function isDrawElementImageAvailable() { | ||
| /** | ||
| * The WICG canvas-place-element spec evolved: Chrome ~130+ exposes drawElement(), | ||
| * earlier flagged builds shipped drawElementImage(). Detect either. | ||
| * @returns {'drawElement'|'drawElementImage'|null} | ||
| */ | ||
| function detectDrawApi() { | ||
| try { | ||
| const c = document.createElement('canvas') | ||
| const ctx = c.getContext('2d') | ||
| return ctx && typeof ctx.drawElementImage === 'function' | ||
| if (!ctx) return null | ||
| if (typeof ctx.drawElement === 'function') return 'drawElement' | ||
| if (typeof ctx.drawElementImage === 'function') return 'drawElementImage' | ||
| return null | ||
| } catch { | ||
| return false | ||
| return null | ||
| } | ||
@@ -27,5 +35,6 @@ } | ||
| export function htmlInCanvasPlugin() { | ||
| const available = isDrawElementImageAvailable() | ||
| const drawApi = detectDrawApi() | ||
| const available = !!drawApi | ||
| if (!available) { | ||
| console.warn('[snapdom] html-in-canvas plugin: drawElementImage not available. Enable chrome://flags/#canvas-draw-element') | ||
| console.warn('[snapdom] html-in-canvas plugin: drawElement / drawElementImage not available. Enable chrome://flags/#canvas-draw-element') | ||
| } | ||
@@ -97,23 +106,30 @@ | ||
| const container = document.createElement('div') | ||
| container.id = 'snapdom-html-in-canvas-temp' | ||
| container.style.cssText = 'position:fixed;left:-9999px;top:0;visibility:hidden;' | ||
| container.appendChild(canvas) | ||
| document.body.appendChild(container) | ||
| // Append directly to body, taken out of flow with position:fixed + z-index:-1 | ||
| // so it sits behind the page's content (covered by body/main backgrounds) | ||
| // while still being painted. visibility:hidden / opacity:0 / left:-9999px | ||
| // skip the paint pass and trigger "No cached paint record". | ||
| canvas.style.cssText = 'position:fixed;top:0;left:0;z-index:-1;' | ||
| document.body.appendChild(canvas) | ||
| try { | ||
| await new Promise(r => requestAnimationFrame(r)) | ||
| canvas.getBoundingClientRect() | ||
| await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r))) | ||
| const ctx2d = canvas.getContext('2d') | ||
| if (!ctx2d || typeof ctx2d.drawElementImage !== 'function') { | ||
| throw new Error('drawElementImage not available') | ||
| const fn = ctx2d && (ctx2d[drawApi] || ctx2d.drawElement || ctx2d.drawElementImage) | ||
| if (typeof fn !== 'function') { | ||
| throw new Error('drawElement / drawElementImage not available on this canvas context') | ||
| } | ||
| ctx2d.save() | ||
| ctx2d.scale(dpr * scale, dpr * scale) | ||
| ctx2d.drawElementImage(wrapper, 0, 0, width, height) | ||
| fn.call(ctx2d, wrapper, 0, 0, width, height) | ||
| ctx2d.restore() | ||
| return canvas | ||
| } catch (e) { | ||
| if (e && /paint record/i.test(e.message || '')) { | ||
| throw new Error('Browser had no paint record for the element. Make sure the document is fully loaded and visible before calling html-in-canvas (drawElement requires a real paint pass).') | ||
| } | ||
| throw e | ||
| } finally { | ||
| try { | ||
| document.body.removeChild(container) | ||
| } catch {} | ||
| try { canvas.remove() } catch {} | ||
| } | ||
@@ -120,0 +136,0 @@ } |
+1
-1
@@ -15,3 +15,3 @@ /** | ||
| export { pdfImage } from './pdf-image.js'; | ||
| export { promptExport } from './prompt-export.js'; | ||
| export { agentMap } from './agent-map.js'; | ||
| // export { htmlInCanvas } from './html-in-canvas.js'; |
+2
-2
| { | ||
| "name": "@zumer/snapdom-plugins", | ||
| "version": "1.2.0", | ||
| "version": "2.1.0", | ||
| "description": "Official plugins for SnapDOM", | ||
@@ -15,3 +15,3 @@ "type": "module", | ||
| "./pdf-image": "./pdf-image.js", | ||
| "./prompt-export": "./prompt-export.js" | ||
| "./agent-map": "./agent-map.js" | ||
| }, | ||
@@ -18,0 +18,0 @@ "files": [ |
+44
-60
@@ -173,83 +173,67 @@ # @zumer/snapdom-plugins | ||
| ### `prompt-export` | ||
| ### `agent-map` | ||
| Adds a `toPrompt()` export method that returns an LLM-ready package: a structured element map with bounding boxes, a pre-formatted prompt text, and (optionally) an annotated screenshot. Tuned for vision-language models, browser-agent pipelines, visual QA, and any workflow that pairs a capture with structured metadata. | ||
| Produces a Set-of-Mark package for **visual agents**: an annotated screenshot with numbered badges on interactive elements, plus a compact JSON map from badge index → role / accessible name / bbox / state. One call, fully client-side. | ||
| ```js | ||
| import { promptExport } from '@zumer/snapdom-plugins/prompt-export'; | ||
| import { agentMap } from '@zumer/snapdom-plugins/agent-map'; | ||
| const result = await snapdom(el, { plugins: [promptExport()] }); | ||
| // Default: no image, just the structured map + prompt text (cheapest) | ||
| const { elements, prompt, dimensions } = await result.toPrompt(); | ||
| ``` | ||
| const result = await snapdom(el, { plugins: [agentMap()] }); | ||
| const { image, map, dimensions } = await result.toAgentMap(); | ||
| To also include the annotated image (for tasks that truly depend on vision): | ||
| ```js | ||
| const result = await snapdom(el, { | ||
| plugins: [promptExport({ include: ['image', 'elements', 'prompt'] })] | ||
| }); | ||
| const { image, elements, prompt, dimensions } = await result.toPrompt(); | ||
| // image: data URL of the screenshot with numbered red badges overlaid | ||
| // map: [{ i, n, r, b, s? }, …] — index, name, role, bbox, state | ||
| // Agent says "click element 2" → map[2].b gives [x, y, w, h] | ||
| ``` | ||
| The returned object (fields present only if requested via `include`): | ||
| Map entry shape (default `fields: 'minimal'`): | ||
| | Field | Type | Description | | ||
| |-------|------|-------------| | ||
| | `elements` | `Array` | One entry per detected element: `{ id, tag, type, name, text, bbox, attributes, state?, styles?, covered? }` | | ||
| | `prompt` | `string` | Pre-formatted text describing interactive + semantic elements | | ||
| | `image` | `string` | Data URL of the (optionally annotated) screenshot — **only when `include` contains `'image'`** | | ||
| | `dimensions` | `{width, height}` | Scaled dimensions (always present) | | ||
| | Key | Type | Description | | ||
| |-----|------|-------------| | ||
| | `i` | `number` | Index matching the badge drawn on the image | | ||
| | `n` | `string` | Accessible name (aria-label → labelledby → alt → title → labels → textContent, truncated to 60 chars) | | ||
| | `r` | `string` | ARIA-style role (`button`, `link`, `checkbox`, `radio`, `textbox`, `combobox`, `slider`, `heading`, …) — derived from `role` attribute or implicit role of the element | | ||
| | `b` | `[x, y, w, h]` | Bounding box in pixels, scaled against `maxImageWidth` | | ||
| | `s` | `object?` | State: included only when at least one key is meaningful — `checked`, `disabled`, `focus`, `expanded`, `pressed`, `selected`, `value`, `open`, `selectedText`, `covered` | | ||
| `elements` is split into two `type`s: | ||
| - `'interactive'` — buttons, links, inputs, `[role]`/`[tabindex]` targets. These get numbered badges overlaid on the screenshot when `annotate` is on. | ||
| - `'semantic'` — headings, paragraphs, `<nav>`, `<main>`, images with `alt`, table cells, etc. Structural context, not overlaid. | ||
| Example map for a checkout form: | ||
| Each `bbox` is in pixel coordinates of the returned image (scaled against `maxImageWidth`). | ||
| Each interactive entry also carries: | ||
| - `name` — the computed accessible name (aria-label → labelledby → alt → title → labels[0] → textContent) | ||
| - `state` — runtime state: `{ checked, disabled, focus, open, value, selectedText }` (only keys that apply) | ||
| - `styles` — visually-meaningful computed props filtered to drop defaults | ||
| - `covered: true` when another element is painted on top of the bbox center (an agent won't click through a modal) | ||
| ```js | ||
| // Example — feed a vision-capable LLM | ||
| const { image, elements } = await result.toPrompt({ | ||
| include: ['image', 'elements', 'prompt'] | ||
| }); | ||
| // image is a data URL → pass as image input | ||
| // elements is JSON → pass as structured context alongside the image | ||
| // "Click element [3]" → look up elements[3].bbox for real coordinates | ||
| [ | ||
| { i: 0, n: 'Email', r: 'textbox', b: [28, 80, 280, 34], s: { value: 'ada@example.com' } }, | ||
| { i: 1, n: 'Send product updates', r: 'checkbox', b: [28, 134, 13, 13], s: { checked: true } }, | ||
| { i: 2, n: 'Apply coupon', r: 'button', b: [28, 176, 114, 38], s: { expanded: false } }, | ||
| { i: 3, n: 'Remove coupon', r: 'button', b: [150, 176, 140, 38], s: { disabled: true } }, | ||
| { i: 4, n: 'Pay $53.90', r: 'button', b: [28, 220, 97, 38] } | ||
| ] | ||
| ``` | ||
| #### Options | ||
| | Option | Type | Default | Description | | ||
| |--------|------|---------|-------------| | ||
| | `include` | `string[]` | `['elements', 'prompt']` | Fields to return. Add `'image'` for tasks that need vision (chart content, layout QA, canvas). Use `['prompt']` for the cheapest text-only mode. | | ||
| | `annotate` | `boolean` | `true` | Overlay numbered badges on interactive elements (only affects the image when included) | | ||
| | `promptMode` | `'compact' \| 'verbose'` | `'compact'` | Prompt text verbosity. Compact omits coords when badges are on the image. | | ||
| | `includeCoords` | `boolean` | `true` | Include bbox in the prompt text | | ||
| | `imageFormat` | `'png' \| 'jpg' \| 'webp'` | `'png'` | Output image format (only used when `image` is included) | | ||
| | `imageQuality` | `number` | `0.8` | Quality for lossy formats (0–1) | | ||
| | `maxImageWidth` | `number` | `1024` | Max width in px; downscales and rescales bboxes if larger | | ||
| | `interactiveSelector` | `string` | see below | CSS selector for the interactive element set | | ||
| | `semanticSelector` | `string` | see below | CSS selector for the semantic element set | | ||
| | `labelStyle` | `object` | `{}` | Override styles for the numbered badges (`position`, `color`, `backgroundColor`, etc.) | | ||
| | `image` | `'annotated' \| 'raw' \| false` | `'annotated'` | `'annotated'` overlays numbered badges; `'raw'` skips badges; `false` skips image generation entirely (no canvas draw, no toDataURL — cheapest path). | | ||
| | `fields` | `'minimal' \| 'full'` | `'minimal'` | `'full'` adds `t` (raw text content) and `a` (meaningful attributes) per entry. | | ||
| | `semantic` | `boolean` | `false` | Include non-interactive structural elements (headings, paragraphs, landmarks). Off by default — agents act on interactive. | | ||
| | `maxImageWidth` | `number` | `1024` | Downscale target for the image; bboxes rescale to match. | | ||
| | `imageFormat` | `'png' \| 'jpg' \| 'webp'` | `'png'` | Image format (only used when image is rendered). | | ||
| | `imageQuality` | `number` | `0.8` | Quality for lossy formats. | | ||
| | `interactiveSelector` | `string` | see below | CSS selector for interactive elements. | | ||
| | `semanticSelector` | `string` | see below | CSS selector for semantic elements (used when `semantic: true`). | | ||
| | `labelStyle` | `object` | `{}` | Override badge styles. | | ||
| Defaults: | ||
| - **interactive**: `a[href], button, input, select, textarea, [role="button"|"link"|"tab"|"menuitem"|"checkbox"|"radio"], [tabindex]:not([tabindex="-1"]), summary, [contenteditable="true"]` | ||
| - **semantic**: `h1–h6, p, li, img[alt], nav, main, article, section, header, footer, label, td, th, figcaption, blockquote, legend` | ||
| - **interactive**: `a[href], button, input, select, textarea, [role="button"|"link"|"tab"|"menuitem"|"checkbox"|"radio"|"switch"|"slider"|"combobox"|"textbox"], [tabindex]:not([tabindex="-1"]), summary, [contenteditable="true"]` | ||
| - **semantic**: `h1–h6, nav, main, article, section, header, footer, figcaption, blockquote, legend, p` | ||
| Both per-call options (`opts.include`, `opts.imageFormat`, etc.) and constructor options are supported; per-call wins. | ||
| Per-call options override constructor options (e.g. `result.toAgentMap({ image: false })`). | ||
| The image is the most expensive part of `toPrompt()` to produce (canvas draw + data-URL serialization), so the default skips it. Add `'image'` to `include` when the task actually uses vision: | ||
| #### When to use | ||
| ```js | ||
| // Vision-dependent task (chart content, layout QA, visual diff) | ||
| await result.toPrompt({ include: ['image', 'elements', 'prompt'] }); | ||
| - Visual agents using Set-of-Mark prompting — one call gives you both the labelled image and the coordinate lookup table. | ||
| - Computer-use / browser-agent harnesses that need click coordinates for a vision model's output. | ||
| - Visual QA with an LLM judge — compare before/after captures with structured element identity. | ||
| - Dataset generation for vision-LLM fine-tuning — (image, map) pairs. | ||
| // Pure structured agent loop (cheapest) | ||
| await result.toPrompt({ include: ['prompt'] }); | ||
| ``` | ||
| Because it runs entirely in the browser, it works in contexts where Playwright / Puppeteer can't: Chrome extensions, SaaS web apps capturing the user's own page, Electron apps capturing their own window. | ||
@@ -256,0 +240,0 @@ --- |
-550
| /** | ||
| * promptExport – Official SnapDOM Plugin | ||
| * Produces an LLM-ready package: annotated screenshot + structured element | ||
| * map + prompt text. Tuned for vision-capable LLMs reading the image + map | ||
| * together (Set-of-Mark pattern). | ||
| * | ||
| * Usage: | ||
| * import { promptExport } from '@zumer/snapdom-plugins/prompt-export'; | ||
| * const result = await snapdom(el, { plugins: [promptExport()] }); | ||
| * const { image, elements, dimensions, prompt } = await result.toPrompt(); | ||
| * | ||
| * @param {Object} [options] | ||
| * @param {boolean} [options.annotate=true] - Overlay numbered badges on interactive elements | ||
| * @param {string} [options.imageFormat='png'] - Output image format ('png'|'jpg'|'webp') | ||
| * @param {number} [options.imageQuality=0.8] - Quality for lossy formats (0..1) | ||
| * @param {number} [options.maxImageWidth=1024] - Max width in px (downscales if larger) | ||
| * @param {string} [options.interactiveSelector] - Custom CSS selector for interactive elements | ||
| * @param {string} [options.semanticSelector] - Custom CSS selector for semantic elements | ||
| * @param {Object} [options.labelStyle={}] - Override styles for annotation badges | ||
| * @param {'compact'|'verbose'} [options.promptMode='compact'] - Prompt text verbosity | ||
| * @param {boolean} [options.includeCoords=true] - Include bbox in the prompt text | ||
| * @param {string[]} [options.include] - Which fields to return. Default | ||
| * ['elements', 'prompt']. For vision-dependent tasks (chart content, layout QA, | ||
| * canvas) pass ['image', 'elements', 'prompt'] or add 'image' to the array. For | ||
| * text-only agent prompts pass ['prompt'] (cheapest — skips canvas draw entirely). | ||
| * Accepted values: 'image', 'elements', 'prompt'. | ||
| * @returns {Object} SnapDOM plugin | ||
| */ | ||
| const DEFAULT_INTERACTIVE = | ||
| 'a[href], button, input, select, textarea, ' + | ||
| '[role="button"], [role="link"], [role="tab"], [role="menuitem"], [role="checkbox"], [role="radio"], ' + | ||
| '[tabindex]:not([tabindex="-1"]), summary, [contenteditable="true"]'; | ||
| const DEFAULT_SEMANTIC = | ||
| 'h1, h2, h3, h4, h5, h6, p, li, img[alt], nav, main, article, section, ' + | ||
| 'header, footer, label, td, th, figcaption, blockquote, legend'; | ||
| const COLLECTED_ATTRS = [ | ||
| 'role', 'aria-label', 'aria-expanded', 'aria-checked', 'aria-disabled', | ||
| 'alt', 'href', 'placeholder', 'name', 'type', 'value', 'title', 'disabled', | ||
| ]; | ||
| const VISUAL_FIELDS = [ | ||
| 'display', 'visibility', 'opacity', | ||
| 'color', 'backgroundColor', | ||
| 'fontSize', 'fontWeight', | ||
| 'cursor', 'overflow', | ||
| ]; | ||
| // Common computed-style values that carry no information. Keeping the | ||
| // `styles` object small is the difference between a useful LLM input and | ||
| // token bloat on every element. | ||
| const VISUAL_SKIP = new Set(['initial', 'normal', 'visible', 'auto', 'static', '0']); | ||
| function isDefaultStyleValue(prop, value) { | ||
| if (!value) return true; | ||
| if (VISUAL_SKIP.has(value)) return true; | ||
| if (prop === 'cursor' && value === 'none') return true; | ||
| if (prop === 'color' && value === 'rgb(0, 0, 0)') return true; | ||
| if (prop === 'backgroundColor' && value === 'rgba(0, 0, 0, 0)') return true; | ||
| if (prop === 'fontWeight' && (value === '400' || value === 'normal')) return true; | ||
| if (prop === 'opacity' && value === '1') return true; | ||
| return false; | ||
| } | ||
| // Default omits 'image'. Benchmarking showed the text + JSON map is enough | ||
| // to answer most UI-inspection questions and uses ~14× fewer tokens. Pass | ||
| // `include: ['image', 'elements', 'prompt']` explicitly when the task truly | ||
| // depends on vision (charts, canvas content, layout QA). | ||
| const DEFAULT_INCLUDE = ['elements', 'prompt']; | ||
| export function promptExport(options = {}) { | ||
| const { | ||
| annotate = true, | ||
| imageFormat = 'png', | ||
| imageQuality = 0.8, | ||
| maxImageWidth = 1024, | ||
| interactiveSelector = DEFAULT_INTERACTIVE, | ||
| semanticSelector = DEFAULT_SEMANTIC, | ||
| labelStyle = {}, | ||
| promptMode = 'compact', | ||
| includeCoords = true, | ||
| include = DEFAULT_INCLUDE, | ||
| } = options; | ||
| return { | ||
| name: 'prompt-export', | ||
| afterClone(ctx) { | ||
| const meta = extractMetadata(ctx.element, interactiveSelector, semanticSelector); | ||
| // snapdom spreads a fresh ctx for the export phase from ctx.options, | ||
| // so write to both so the prompt() call below can read it. | ||
| ctx.__promptMetadata = meta; | ||
| if (ctx.options) ctx.options.__promptMetadata = meta; | ||
| if (annotate) { | ||
| addAnnotations(ctx.clone, meta.elements, labelStyle); | ||
| } | ||
| }, | ||
| defineExports() { | ||
| return { | ||
| prompt: async (ctx, opts = {}) => { | ||
| const meta = ctx.__promptMetadata; | ||
| const wantSet = new Set(opts.include || include || DEFAULT_INCLUDE); | ||
| const wantImage = wantSet.has('image'); | ||
| const wantElements = wantSet.has('elements'); | ||
| const wantPrompt = wantSet.has('prompt'); | ||
| if (!meta || !meta.elements.length) { | ||
| const empty = { dimensions: { width: 0, height: 0 } }; | ||
| if (wantImage) empty.image = ctx.export.url; | ||
| if (wantElements) empty.elements = []; | ||
| if (wantPrompt) empty.prompt = ''; | ||
| return empty; | ||
| } | ||
| const format = opts.imageFormat || imageFormat; | ||
| const quality = opts.imageQuality || imageQuality; | ||
| const maxWidth = opts.maxImageWidth || maxImageWidth; | ||
| const mode = opts.promptMode || promptMode; | ||
| const withCoords = opts.includeCoords !== undefined ? opts.includeCoords : includeCoords; | ||
| // Only load + rasterize the SVG when the caller actually wants the | ||
| // image. Skipping saves the img decode + canvas draw + toDataURL — | ||
| // the most expensive steps of this export. | ||
| let w, h, dataURL; | ||
| if (wantImage) { | ||
| const img = new Image(); | ||
| img.src = ctx.export.url; | ||
| await new Promise((res, rej) => { img.onload = res; img.onerror = rej; }); | ||
| const ratio = img.naturalWidth > maxWidth ? maxWidth / img.naturalWidth : 1; | ||
| w = Math.round(img.naturalWidth * ratio); | ||
| h = Math.round(img.naturalHeight * ratio); | ||
| const canvas = document.createElement('canvas'); | ||
| canvas.width = w; | ||
| canvas.height = h; | ||
| canvas.getContext('2d').drawImage(img, 0, 0, w, h); | ||
| const mime = | ||
| format === 'jpg' || format === 'jpeg' ? 'image/jpeg' | ||
| : format === 'webp' ? 'image/webp' | ||
| : 'image/png'; | ||
| dataURL = canvas.toDataURL(mime, quality); | ||
| } else { | ||
| // No image — scale bboxes to the same target width the image would | ||
| // have used, so downstream callers can still render the map over | ||
| // a separately-rendered screenshot at the same scale. | ||
| const sourceW = meta.dimensions.width || 1; | ||
| const ratio = sourceW > maxWidth ? maxWidth / sourceW : 1; | ||
| w = Math.round(sourceW * ratio); | ||
| h = Math.round(meta.dimensions.height * ratio); | ||
| } | ||
| const sx = w / (meta.dimensions.width || 1); | ||
| const sy = h / (meta.dimensions.height || 1); | ||
| const scaledElements = meta.elements.map((el) => ({ | ||
| ...el, | ||
| bbox: { | ||
| x: Math.round(el.bbox.x * sx), | ||
| y: Math.round(el.bbox.y * sy), | ||
| width: Math.round(el.bbox.width * sx), | ||
| height: Math.round(el.bbox.height * sy), | ||
| }, | ||
| })); | ||
| const emitCoords = mode === 'verbose' ? true : (withCoords && !annotate); | ||
| const promptText = (wantPrompt) | ||
| ? (mode === 'verbose' | ||
| ? formatPromptVerbose(scaledElements, { width: w, height: h }, emitCoords) | ||
| : formatPromptCompact(scaledElements, { width: w, height: h }, emitCoords)) | ||
| : null; | ||
| const out = { dimensions: { width: w, height: h } }; | ||
| if (wantImage) out.image = dataURL; | ||
| if (wantElements) out.elements = scaledElements; | ||
| if (wantPrompt) out.prompt = promptText; | ||
| return out; | ||
| }, | ||
| }; | ||
| }, | ||
| }; | ||
| } | ||
| /* ── Accessible name ──────────────────────────── */ | ||
| function truncate(str, max) { | ||
| if (!str) return ''; | ||
| if (str.length <= max) return str; | ||
| return str.slice(0, max - 1) + '…'; | ||
| } | ||
| /** | ||
| * Compute the element's accessible name following a simplified WAI-ARIA order. | ||
| * This is what an LLM agent reads first to know what the element IS. | ||
| */ | ||
| function accessibleName(el) { | ||
| const ariaLabel = el.getAttribute('aria-label'); | ||
| if (ariaLabel && ariaLabel.trim()) return ariaLabel.trim(); | ||
| const labelledBy = el.getAttribute('aria-labelledby'); | ||
| if (labelledBy) { | ||
| const root = el.getRootNode(); | ||
| const getById = (id) => | ||
| root && typeof root.getElementById === 'function' | ||
| ? root.getElementById(id) | ||
| : document.getElementById(id); | ||
| const parts = labelledBy.trim().split(/\s+/) | ||
| .map((id) => { | ||
| const ref = getById(id); | ||
| return ref ? (ref.textContent || '').trim() : ''; | ||
| }) | ||
| .filter(Boolean); | ||
| if (parts.length) return parts.join(' '); | ||
| } | ||
| if (el.tagName === 'IMG') { | ||
| const alt = el.getAttribute('alt'); | ||
| if (alt && alt.trim()) return alt.trim(); | ||
| } | ||
| const title = el.getAttribute('title'); | ||
| if (title && title.trim()) return title.trim(); | ||
| if (el.labels && el.labels[0]) { | ||
| const t = (el.labels[0].textContent || '').trim(); | ||
| if (t) return t; | ||
| } | ||
| const text = (el.textContent || '').trim(); | ||
| return text ? truncate(text, 40) : ''; | ||
| } | ||
| /* ── DOM state ────────────────────────────────── */ | ||
| /** | ||
| * Capture real runtime state (not just ARIA attributes). This is what | ||
| * separates a useful map from a static screenshot annotation: an agent can | ||
| * see "checkbox is unchecked / select has value=X / details is closed". | ||
| */ | ||
| function computeState(el) { | ||
| const state = {}; | ||
| try { | ||
| if (el.matches(':checked')) state.checked = true; | ||
| if (el.matches(':disabled')) state.disabled = true; | ||
| if (el.matches(':focus')) state.focus = true; | ||
| } catch { /* some selectors may fail on exotic nodes */ } | ||
| const tag = el.tagName; | ||
| if (tag === 'INPUT') { | ||
| // Checkbox/radio: `value` defaults to "on" when no explicit value attr | ||
| // is set — noise. The meaningful signal is `checked`. Skip value here. | ||
| const type = (el.type || 'text').toLowerCase(); | ||
| if (type !== 'checkbox' && type !== 'radio' && el.value) { | ||
| state.value = el.value; | ||
| } | ||
| } else if (tag === 'TEXTAREA') { | ||
| if (el.value) state.value = el.value; | ||
| } else if (tag === 'SELECT') { | ||
| state.value = el.value; | ||
| const opt = el.options && el.options[el.selectedIndex]; | ||
| if (opt) state.selectedText = opt.text || ''; | ||
| } else if (tag === 'DETAILS') { | ||
| state.open = !!el.open; | ||
| } else if (el.hasAttribute && el.hasAttribute('open')) { | ||
| state.open = true; | ||
| } | ||
| return Object.keys(state).length ? state : null; | ||
| } | ||
| /* ── Visual styles ────────────────────────────── */ | ||
| /** | ||
| * Pull a small set of visually-meaningful computed styles. Reads from the | ||
| * live element (the clone uses class-based styling so its inline style is | ||
| * empty and a detached `getComputedStyle(cloneNode)` returns initial values). | ||
| * The computed style of the original is what snapdom used to build the | ||
| * capture, so the result matches what the screenshot shows. | ||
| */ | ||
| function computeVisualStyles(el) { | ||
| let cs; | ||
| try { cs = getComputedStyle(el); } catch { return null; } | ||
| if (!cs) return null; | ||
| const out = {}; | ||
| for (const prop of VISUAL_FIELDS) { | ||
| const v = cs[prop]; | ||
| if (isDefaultStyleValue(prop, v)) continue; | ||
| out[prop] = v; | ||
| } | ||
| return Object.keys(out).length ? out : null; | ||
| } | ||
| /* ── Covered detection ────────────────────────── */ | ||
| /** | ||
| * True when another element is painted on top of the center of this one. | ||
| * An agent that knows a button is covered by a modal won't try to click it. | ||
| */ | ||
| function isCovered(el, rect) { | ||
| if (!rect.width || !rect.height) return false; | ||
| const cx = rect.left + rect.width / 2; | ||
| const cy = rect.top + rect.height / 2; | ||
| if (cx < 0 || cy < 0) return false; | ||
| const doc = el.ownerDocument || document; | ||
| if (!doc.elementFromPoint) return false; | ||
| const top = doc.elementFromPoint(cx, cy); | ||
| if (!top || top === el) return false; | ||
| if (el.contains && el.contains(top)) return false; | ||
| return true; | ||
| } | ||
| /* ── Metadata extraction ──────────────────────── */ | ||
| function extractMetadata(element, interactiveSelector, semanticSelector) { | ||
| const rootRect = element.getBoundingClientRect(); | ||
| const elements = []; | ||
| let id = 0; | ||
| const tracked = new Set(); | ||
| for (const el of element.querySelectorAll(interactiveSelector)) { | ||
| const entry = buildEntry(el, rootRect, id, 'interactive'); | ||
| if (entry) { | ||
| elements.push(entry); | ||
| tracked.add(el); | ||
| id++; | ||
| } | ||
| } | ||
| for (const el of element.querySelectorAll(semanticSelector)) { | ||
| if (tracked.has(el)) continue; | ||
| const entry = buildEntry(el, rootRect, id, 'semantic'); | ||
| if (entry) { | ||
| elements.push(entry); | ||
| id++; | ||
| } | ||
| } | ||
| return { | ||
| elements, | ||
| dimensions: { width: rootRect.width, height: rootRect.height }, | ||
| }; | ||
| } | ||
| function buildEntry(el, rootRect, id, type) { | ||
| const rect = el.getBoundingClientRect(); | ||
| const bbox = { | ||
| x: Math.round(rect.left - rootRect.left), | ||
| y: Math.round(rect.top - rootRect.top), | ||
| width: Math.round(rect.width), | ||
| height: Math.round(rect.height), | ||
| }; | ||
| if (bbox.width <= 0 && bbox.height <= 0) return null; | ||
| const tag = el.tagName.toLowerCase(); | ||
| const maxText = type === 'interactive' ? 200 : 120; | ||
| const text = (el.textContent || '').trim().slice(0, maxText); | ||
| const attributes = {}; | ||
| for (const attr of COLLECTED_ATTRS) { | ||
| const val = el.getAttribute(attr); | ||
| if (val == null || val === '' || val === 'false') continue; | ||
| attributes[attr] = val; | ||
| } | ||
| // For <img>, replace the (potentially long, cache-busted) src with just the | ||
| // filename — "logo.svg" is more useful to an LLM than the full URL. | ||
| if (tag === 'img' && el.src) { | ||
| try { | ||
| const base = (el.ownerDocument && el.ownerDocument.location) | ||
| ? el.ownerDocument.location.href | ||
| : (typeof location !== 'undefined' ? location.href : undefined); | ||
| const url = base ? new URL(el.src, base) : new URL(el.src); | ||
| attributes.src = url.pathname.split('/').pop() || el.src; | ||
| } catch { | ||
| attributes.src = el.src; | ||
| } | ||
| } | ||
| const entry = { | ||
| id, | ||
| tag, | ||
| type, | ||
| name: accessibleName(el), | ||
| text, | ||
| bbox, | ||
| attributes, | ||
| }; | ||
| const styles = computeVisualStyles(el); | ||
| if (styles) entry.styles = styles; | ||
| if (type === 'interactive') { | ||
| const state = computeState(el); | ||
| if (state) { | ||
| // Drop state.value when it just echoes attributes.value — no | ||
| // divergence between the initial HTML attribute and the current | ||
| // property, nothing for the LLM to learn from the repeat. | ||
| if (state.value !== undefined && state.value === attributes.value) { | ||
| delete state.value; | ||
| } | ||
| if (Object.keys(state).length) entry.state = state; | ||
| } | ||
| if (isCovered(el, rect)) entry.covered = true; | ||
| } | ||
| return entry; | ||
| } | ||
| /* ── Visual annotations ───────────────────────── */ | ||
| function addAnnotations(clone, elements, customStyle) { | ||
| const interactive = elements.filter((e) => e.type === 'interactive'); | ||
| if (!interactive.length) return; | ||
| const overlay = document.createElement('div'); | ||
| overlay.setAttribute('data-snap-prompt-overlay', 'true'); | ||
| Object.assign(overlay.style, { | ||
| position: 'absolute', | ||
| top: '0', | ||
| left: '0', | ||
| width: '100%', | ||
| height: '100%', | ||
| pointerEvents: 'none', | ||
| zIndex: '2147483647', | ||
| overflow: 'visible', | ||
| }); | ||
| for (const el of interactive) { | ||
| const badge = document.createElement('span'); | ||
| badge.textContent = String(el.id); | ||
| badge.setAttribute('data-snap-prompt-label', String(el.id)); | ||
| // Center the badge on the element's bbox, not on its top-left corner: | ||
| // `translate(-50%, -50%)` offsets from the anchor point. | ||
| const cx = el.bbox.x + el.bbox.width / 2; | ||
| const cy = el.bbox.y + el.bbox.height / 2; | ||
| Object.assign(badge.style, { | ||
| position: 'absolute', | ||
| left: `${cx}px`, | ||
| top: `${cy}px`, | ||
| transform: 'translate(-50%, -50%)', | ||
| minWidth: '18px', | ||
| height: '18px', | ||
| lineHeight: '18px', | ||
| fontSize: '11px', | ||
| fontWeight: '700', | ||
| fontFamily: 'system-ui, -apple-system, sans-serif', | ||
| color: '#fff', | ||
| backgroundColor: 'rgba(220, 38, 38, 0.92)', | ||
| borderRadius: '9px', | ||
| textAlign: 'center', | ||
| padding: '0 4px', | ||
| boxSizing: 'border-box', | ||
| boxShadow: '0 1px 3px rgba(0,0,0,0.3)', | ||
| ...customStyle, | ||
| }); | ||
| overlay.appendChild(badge); | ||
| } | ||
| clone.style.position = 'relative'; | ||
| clone.appendChild(overlay); | ||
| } | ||
| /* ── Prompt text formatters ───────────────────── */ | ||
| function stateToStr(state) { | ||
| if (!state) return ''; | ||
| const flags = []; | ||
| const pairs = []; | ||
| for (const k of Object.keys(state)) { | ||
| const v = state[k]; | ||
| if (v === true) flags.push(k); | ||
| else pairs.push(`${k}=${JSON.stringify(v)}`); | ||
| } | ||
| if (!flags.length && !pairs.length) return ''; | ||
| return ` {${[...flags, ...pairs].join(', ')}}`; | ||
| } | ||
| function coordsToStr(bbox) { | ||
| return ` (${bbox.x},${bbox.y} ${bbox.width}×${bbox.height})`; | ||
| } | ||
| function formatPromptCompact(elements, dimensions, withCoords) { | ||
| const lines = [`Screenshot (${dimensions.width}×${dimensions.height}px).`]; | ||
| const interactive = elements.filter((e) => e.type === 'interactive'); | ||
| const semantic = elements.filter((e) => e.type === 'semantic'); | ||
| if (interactive.length) { | ||
| lines.push('', 'Interactive:'); | ||
| for (const el of interactive) { | ||
| const name = el.name ? ` "${truncate(el.name, 60)}"` : ''; | ||
| const st = stateToStr(el.state); | ||
| const cov = el.covered ? ' (covered)' : ''; | ||
| const coords = withCoords ? coordsToStr(el.bbox) : ''; | ||
| lines.push(` [${el.id}] ${el.tag}${name}${st}${cov}${coords}`); | ||
| } | ||
| } | ||
| if (semantic.length) { | ||
| lines.push('', 'Semantic:'); | ||
| for (const el of semantic) { | ||
| const name = el.name ? ` "${truncate(el.name, 60)}"` : ''; | ||
| lines.push(` [${el.id}] ${el.tag}${name}`); | ||
| } | ||
| } | ||
| return lines.join('\n'); | ||
| } | ||
| function formatPromptVerbose(elements, dimensions, withCoords) { | ||
| const lines = [ | ||
| `Screenshot of a web page (${dimensions.width}×${dimensions.height}px).`, | ||
| '', | ||
| ]; | ||
| const interactive = elements.filter((e) => e.type === 'interactive'); | ||
| const semantic = elements.filter((e) => e.type === 'semantic'); | ||
| if (interactive.length) { | ||
| lines.push('Interactive elements:'); | ||
| for (const el of interactive) { | ||
| const name = el.name ? ` "${truncate(el.name, 80)}"` : ''; | ||
| const attrParts = Object.entries(el.attributes).map(([k, v]) => `${k}="${v}"`); | ||
| const attrs = attrParts.length ? ' ' + attrParts.join(' ') : ''; | ||
| const pos = withCoords ? coordsToStr(el.bbox) : ''; | ||
| const st = stateToStr(el.state); | ||
| const cov = el.covered ? ' (covered)' : ''; | ||
| lines.push(` [${el.id}] <${el.tag}>${name}${pos}${attrs}${st}${cov}`); | ||
| } | ||
| lines.push(''); | ||
| } | ||
| if (semantic.length) { | ||
| lines.push('Semantic structure:'); | ||
| for (const el of semantic) { | ||
| const name = el.name ? ` "${truncate(el.name, 80)}"` : ''; | ||
| const attrParts = []; | ||
| if (el.attributes.alt) attrParts.push(`alt="${el.attributes.alt}"`); | ||
| if (el.attributes.role) attrParts.push(`role="${el.attributes.role}"`); | ||
| const attrs = attrParts.length ? ' ' + attrParts.join(' ') : ''; | ||
| lines.push(` [${el.id}] <${el.tag}>${name}${attrs}`); | ||
| } | ||
| lines.push(''); | ||
| } | ||
| return lines.join('\n'); | ||
| } |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
42130
-12.06%783
-15.08%279
-5.42%1
Infinity%