Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@zumer/snapdom-plugins

Package Overview
Dependencies
Maintainers
1
Versions
8
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@zumer/snapdom-plugins - npm Package Compare versions

Comparing version
1.0.3
to
1.1.0
+1
-1
package.json
{
"name": "@zumer/snapdom-plugins",
"version": "1.0.3",
"version": "1.1.0",
"description": "Official plugins for SnapDOM",

@@ -5,0 +5,0 @@ "type": "module",

/**
* promptExport – Official SnapDOM Plugin
* Produces an LLM-ready package: annotated screenshot + structured element map + prompt text.
* Produces an LLM-ready package: annotated screenshot + structured element
* map + prompt text. Tuned for vision-capable LLMs reading the image + map
* together (Set-of-Mark pattern).
*

@@ -12,8 +14,10 @@ * Usage:

* @param {boolean} [options.annotate=true] - Overlay numbered badges on interactive elements
* @param {string} [options.imageFormat='png'] - Output image format ('png'|'jpg'|'webp')
* @param {number} [options.imageQuality=0.8] - Quality for lossy formats (0..1)
* @param {number} [options.maxImageWidth=1024] - Max width in px (downscales if larger)
* @param {string} [options.interactiveSelector] - Custom CSS selector for interactive elements
* @param {string} [options.semanticSelector] - Custom CSS selector for semantic elements
* @param {Object} [options.labelStyle={}] - Override styles for annotation badges
* @param {string} [options.imageFormat='png'] - Output image format ('png'|'jpg'|'webp')
* @param {number} [options.imageQuality=0.8] - Quality for lossy formats (0..1)
* @param {number} [options.maxImageWidth=1024] - Max width in px (downscales if larger)
* @param {string} [options.interactiveSelector] - Custom CSS selector for interactive elements
* @param {string} [options.semanticSelector] - Custom CSS selector for semantic elements
* @param {Object} [options.labelStyle={}] - Override styles for annotation badges
* @param {'compact'|'verbose'} [options.promptMode='compact'] - Prompt text verbosity
* @param {boolean} [options.includeCoords=true] - Include bbox in the prompt text
* @returns {Object} SnapDOM plugin

@@ -31,2 +35,29 @@ */

const COLLECTED_ATTRS = [
'role', 'aria-label', 'aria-expanded', 'aria-checked', 'aria-disabled',
'alt', 'href', 'placeholder', 'name', 'type', 'value', 'title', 'disabled',
];
const VISUAL_FIELDS = [
'display', 'visibility', 'opacity',
'color', 'backgroundColor',
'fontSize', 'fontWeight',
'cursor', 'overflow',
];
// Common computed-style values that carry no information. Keeping the
// `styles` object small is the difference between a useful LLM input and
// token bloat on every element.
const VISUAL_SKIP = new Set(['initial', 'normal', 'visible', 'auto', 'static', '0']);
function isDefaultStyleValue(prop, value) {
if (!value) return true;
if (VISUAL_SKIP.has(value)) return true;
if (prop === 'cursor' && value === 'none') return true;
if (prop === 'color' && value === 'rgb(0, 0, 0)') return true;
if (prop === 'backgroundColor' && value === 'rgba(0, 0, 0, 0)') return true;
if (prop === 'fontWeight' && (value === '400' || value === 'normal')) return true;
if (prop === 'opacity' && value === '1') return true;
return false;
}
export function promptExport(options = {}) {

@@ -41,2 +72,4 @@ const {

labelStyle = {},
promptMode = 'compact',
includeCoords = true,
} = options;

@@ -49,6 +82,4 @@

const meta = extractMetadata(ctx.element, interactiveSelector, semanticSelector);
// The ctx passed to afterClone is NOT the same object passed to the
// prompt() export later — snapdom spreads from ctx.options there. Stash
// on both so (a) standalone tests that pass a minimal ctx still see it
// and (b) the prompt() call can read it through the shared options ref.
// snapdom spreads a fresh ctx for the export phase from ctx.options,
// so write to both so the prompt() call below can read it.
ctx.__promptMetadata = meta;

@@ -78,2 +109,4 @@ if (ctx.options) ctx.options.__promptMetadata = meta;

const maxWidth = opts.maxImageWidth || maxImageWidth;
const mode = opts.promptMode || promptMode;
const withCoords = opts.includeCoords !== undefined ? opts.includeCoords : includeCoords;

@@ -87,4 +120,3 @@ const img = new Image();

const ratio =
img.naturalWidth > maxWidth ? maxWidth / img.naturalWidth : 1;
const ratio = img.naturalWidth > maxWidth ? maxWidth / img.naturalWidth : 1;
const w = Math.round(img.naturalWidth * ratio);

@@ -96,11 +128,8 @@ const h = Math.round(img.naturalHeight * ratio);

canvas.height = h;
const c2d = canvas.getContext('2d');
c2d.drawImage(img, 0, 0, w, h);
canvas.getContext('2d').drawImage(img, 0, 0, w, h);
const mime =
format === 'jpg' || format === 'jpeg'
? 'image/jpeg'
: format === 'webp'
? 'image/webp'
: 'image/png';
format === 'jpg' || format === 'jpeg' ? 'image/jpeg'
: format === 'webp' ? 'image/webp'
: 'image/png';
const dataURL = canvas.toDataURL(mime, quality);

@@ -121,2 +150,9 @@

// In compact + annotate, badges already encode positions on the
// image itself, so repeating coords in the prompt text is noise.
const emitCoords = mode === 'verbose' ? true : (withCoords && !annotate);
const prompt = mode === 'verbose'
? formatPromptVerbose(elements, { width: w, height: h }, emitCoords)
: formatPromptCompact(elements, { width: w, height: h }, emitCoords);
return {

@@ -126,3 +162,3 @@ image: dataURL,

dimensions: { width: w, height: h },
prompt: formatPromptText(elements, { width: w, height: h }),
prompt,
};

@@ -135,2 +171,130 @@ },

/* ── Accessible name ──────────────────────────── */
function truncate(str, max) {
if (!str) return '';
if (str.length <= max) return str;
return str.slice(0, max - 1) + '…';
}
/**
* Compute the element's accessible name following a simplified WAI-ARIA order.
* This is what an LLM agent reads first to know what the element IS.
*/
function accessibleName(el) {
const ariaLabel = el.getAttribute('aria-label');
if (ariaLabel && ariaLabel.trim()) return ariaLabel.trim();
const labelledBy = el.getAttribute('aria-labelledby');
if (labelledBy) {
const root = el.getRootNode();
const getById = (id) =>
root && typeof root.getElementById === 'function'
? root.getElementById(id)
: document.getElementById(id);
const parts = labelledBy.trim().split(/\s+/)
.map((id) => {
const ref = getById(id);
return ref ? (ref.textContent || '').trim() : '';
})
.filter(Boolean);
if (parts.length) return parts.join(' ');
}
if (el.tagName === 'IMG') {
const alt = el.getAttribute('alt');
if (alt && alt.trim()) return alt.trim();
}
const title = el.getAttribute('title');
if (title && title.trim()) return title.trim();
if (el.labels && el.labels[0]) {
const t = (el.labels[0].textContent || '').trim();
if (t) return t;
}
const text = (el.textContent || '').trim();
return text ? truncate(text, 40) : '';
}
/* ── DOM state ────────────────────────────────── */
/**
* Capture real runtime state (not just ARIA attributes). This is what
* separates a useful map from a static screenshot annotation: an agent can
* see "checkbox is unchecked / select has value=X / details is closed".
*/
function computeState(el) {
const state = {};
try {
if (el.matches(':checked')) state.checked = true;
if (el.matches(':disabled')) state.disabled = true;
if (el.matches(':focus')) state.focus = true;
} catch { /* some selectors may fail on exotic nodes */ }
const tag = el.tagName;
if (tag === 'INPUT') {
// Checkbox/radio: `value` defaults to "on" when no explicit value attr
// is set — noise. The meaningful signal is `checked`. Skip value here.
const type = (el.type || 'text').toLowerCase();
if (type !== 'checkbox' && type !== 'radio' && el.value) {
state.value = el.value;
}
} else if (tag === 'TEXTAREA') {
if (el.value) state.value = el.value;
} else if (tag === 'SELECT') {
state.value = el.value;
const opt = el.options && el.options[el.selectedIndex];
if (opt) state.selectedText = opt.text || '';
} else if (tag === 'DETAILS') {
state.open = !!el.open;
} else if (el.hasAttribute && el.hasAttribute('open')) {
state.open = true;
}
return Object.keys(state).length ? state : null;
}
/* ── Visual styles ────────────────────────────── */
/**
* Pull a small set of visually-meaningful computed styles. Reads from the
* live element (the clone uses class-based styling so its inline style is
* empty and a detached `getComputedStyle(cloneNode)` returns initial values).
* The computed style of the original is what snapdom used to build the
* capture, so the result matches what the screenshot shows.
*/
function computeVisualStyles(el) {
let cs;
try { cs = getComputedStyle(el); } catch { return null; }
if (!cs) return null;
const out = {};
for (const prop of VISUAL_FIELDS) {
const v = cs[prop];
if (isDefaultStyleValue(prop, v)) continue;
out[prop] = v;
}
return Object.keys(out).length ? out : null;
}
/* ── Covered detection ────────────────────────── */
/**
* True when another element is painted on top of the center of this one.
* An agent that knows a button is covered by a modal won't try to click it.
*/
function isCovered(el, rect) {
if (!rect.width || !rect.height) return false;
const cx = rect.left + rect.width / 2;
const cy = rect.top + rect.height / 2;
if (cx < 0 || cy < 0) return false;
const doc = el.ownerDocument || document;
if (!doc.elementFromPoint) return false;
const top = doc.elementFromPoint(cx, cy);
if (!top || top === el) return false;
if (el.contains && el.contains(top)) return false;
return true;
}
/* ── Metadata extraction ──────────────────────── */

@@ -169,7 +333,2 @@

const COLLECTED_ATTRS = [
'role', 'aria-label', 'aria-expanded', 'aria-checked', 'aria-disabled',
'alt', 'href', 'placeholder', 'name', 'type', 'value', 'title', 'disabled',
];
function buildEntry(el, rootRect, id, type) {

@@ -187,3 +346,4 @@ const rect = el.getBoundingClientRect();

const tag = el.tagName.toLowerCase();
const text = (el.textContent || '').trim().slice(0, 200);
const maxText = type === 'interactive' ? 200 : 120;
const text = (el.textContent || '').trim().slice(0, maxText);

@@ -193,6 +353,48 @@ const attributes = {};

const val = el.getAttribute(attr);
if (val != null) attributes[attr] = val;
if (val == null || val === '' || val === 'false') continue;
attributes[attr] = val;
}
return { id, tag, type, text, bbox, attributes };
// For <img>, replace the (potentially long, cache-busted) src with just the
// filename — "logo.svg" is more useful to an LLM than the full URL.
if (tag === 'img' && el.src) {
try {
const base = (el.ownerDocument && el.ownerDocument.location)
? el.ownerDocument.location.href
: (typeof location !== 'undefined' ? location.href : undefined);
const url = base ? new URL(el.src, base) : new URL(el.src);
attributes.src = url.pathname.split('/').pop() || el.src;
} catch {
attributes.src = el.src;
}
}
const entry = {
id,
tag,
type,
name: accessibleName(el),
text,
bbox,
attributes,
};
const styles = computeVisualStyles(el);
if (styles) entry.styles = styles;
if (type === 'interactive') {
const state = computeState(el);
if (state) {
// Drop state.value when it just echoes attributes.value — no
// divergence between the initial HTML attribute and the current
// property, nothing for the LLM to learn from the repeat.
if (state.value !== undefined && state.value === attributes.value) {
delete state.value;
}
if (Object.keys(state).length) entry.state = state;
}
if (isCovered(el, rect)) entry.covered = true;
}
return entry;
}

@@ -223,6 +425,10 @@

badge.setAttribute('data-snap-prompt-label', String(el.id));
// Center the badge on the element's bbox, not on its top-left corner:
// `translate(-50%, -50%)` offsets from the anchor point.
const cx = el.bbox.x + el.bbox.width / 2;
const cy = el.bbox.y + el.bbox.height / 2;
Object.assign(badge.style, {
position: 'absolute',
left: `${el.bbox.x}px`,
top: `${el.bbox.y}px`,
left: `${cx}px`,
top: `${cy}px`,
transform: 'translate(-50%, -50%)',

@@ -251,7 +457,52 @@ minWidth: '18px',

/* ── Prompt text formatter ────────────────────── */
/* ── Prompt text formatters ───────────────────── */
function formatPromptText(elements, dimensions) {
function stateToStr(state) {
if (!state) return '';
const flags = [];
const pairs = [];
for (const k of Object.keys(state)) {
const v = state[k];
if (v === true) flags.push(k);
else pairs.push(`${k}=${JSON.stringify(v)}`);
}
if (!flags.length && !pairs.length) return '';
return ` {${[...flags, ...pairs].join(', ')}}`;
}
function coordsToStr(bbox) {
return ` (${bbox.x},${bbox.y} ${bbox.width}×${bbox.height})`;
}
function formatPromptCompact(elements, dimensions, withCoords) {
const lines = [`Screenshot (${dimensions.width}×${dimensions.height}px).`];
const interactive = elements.filter((e) => e.type === 'interactive');
const semantic = elements.filter((e) => e.type === 'semantic');
if (interactive.length) {
lines.push('', 'Interactive:');
for (const el of interactive) {
const name = el.name ? ` "${truncate(el.name, 60)}"` : '';
const st = stateToStr(el.state);
const cov = el.covered ? ' (covered)' : '';
const coords = withCoords ? coordsToStr(el.bbox) : '';
lines.push(` [${el.id}] ${el.tag}${name}${st}${cov}${coords}`);
}
}
if (semantic.length) {
lines.push('', 'Semantic:');
for (const el of semantic) {
const name = el.name ? ` "${truncate(el.name, 60)}"` : '';
lines.push(` [${el.id}] ${el.tag}${name}`);
}
}
return lines.join('\n');
}
function formatPromptVerbose(elements, dimensions, withCoords) {
const lines = [
`Screenshot of a web page (${dimensions.width}\u00d7${dimensions.height}px).`,
`Screenshot of a web page (${dimensions.width}×${dimensions.height}px).`,
'',

@@ -266,9 +517,9 @@ ];

for (const el of interactive) {
const attrParts = Object.entries(el.attributes).map(
([k, v]) => `${k}="${v}"`
);
const text = el.text ? ` "${truncate(el.text, 60)}"` : '';
const pos = `(${el.bbox.x},${el.bbox.y} ${el.bbox.width}\u00d7${el.bbox.height})`;
const name = el.name ? ` "${truncate(el.name, 80)}"` : '';
const attrParts = Object.entries(el.attributes).map(([k, v]) => `${k}="${v}"`);
const attrs = attrParts.length ? ' ' + attrParts.join(' ') : '';
lines.push(` [${el.id}] <${el.tag}>${text} ${pos}${attrs}`);
const pos = withCoords ? coordsToStr(el.bbox) : '';
const st = stateToStr(el.state);
const cov = el.covered ? ' (covered)' : '';
lines.push(` [${el.id}] <${el.tag}>${name}${pos}${attrs}${st}${cov}`);
}

@@ -281,3 +532,3 @@ lines.push('');

for (const el of semantic) {
const text = el.text ? ` "${truncate(el.text, 80)}"` : '';
const name = el.name ? ` "${truncate(el.name, 80)}"` : '';
const attrParts = [];

@@ -287,3 +538,3 @@ if (el.attributes.alt) attrParts.push(`alt="${el.attributes.alt}"`);

const attrs = attrParts.length ? ' ' + attrParts.join(' ') : '';
lines.push(` [${el.id}] <${el.tag}>${text}${attrs}`);
lines.push(` [${el.id}] <${el.tag}>${name}${attrs}`);
}

@@ -295,6 +546,1 @@ lines.push('');

}
function truncate(str, max) {
if (str.length <= max) return str;
return str.slice(0, max - 1) + '\u2026';
}