@godscene/core - npm Package Compare versions

+5

-5

dist/es/agent/agent.mjs

		@@ -7,3 +7,3 @@ import { isAutoGLM, isUITars } from "../ai-model/auto-glm/util.mjs";
		import { ReportGenerator, assertReportGenerationOptions } from "../report-generator.mjs";
		import { getVersion, processCacheConfig, reportHTMLContent } from "../utils.mjs";
		import { getVersion, processCacheConfig } from "../utils.mjs";
		import { ScriptPlayer, buildDetailedLocateParam, parseYamlScript } from "../yaml/index.mjs";
		@@ -64,5 +64,5 @@ import { existsSync } from "node:fs";
		};
		const defaultReplanningCycleLimit = 20;
		const defaultVlmUiTarsReplanningCycleLimit = 40;
		const defaultAutoGlmReplanningCycleLimit = 100;
		const defaultReplanningCycleLimit = 200;
		const defaultVlmUiTarsReplanningCycleLimit = 200;
		const defaultAutoGlmReplanningCycleLimit = 200;
		class Agent {
		@@ -165,3 +165,3 @@ get onDumpUpdate() {
		reportHTMLString(opt) {
		return reportHTMLContent(this.dumpDataString(opt));
		return '';
		}
		@@ -168,0 +168,0 @@ writeOutActionDumps(executionDump) {

+1

-1

dist/es/agent/utils.mjs

		@@ -126,3 +126,3 @@ import { ScreenshotItem } from "../screenshot-item.mjs";
		}
		const getMidsceneVersion = ()=>"1.7.21";
		const getMidsceneVersion = ()=>"1.7.22";
		const parsePrompt = (prompt)=>{
		@@ -129,0 +129,0 @@ if ('string' == typeof prompt) return {

+2

-1

dist/es/ai-model/prompt/llm-planning.mjs

		@@ -140,3 +140,4 @@ import { findAllMidsceneLocatorField } from "../../common.mjs";
		const step1Description = shouldIncludeSubGoals ? "First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot." : 'First, observe the current screenshot and previous logs to understand the current state.';
		const explicitInstructionRule = 'CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type \'hello\'" means only type, do NOT press Enter.';
		const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type 'hello'" means only type, do NOT press Enter.
		CRITICAL - Sequential Step Execution: When the user provides a numbered step sequence (e.g., "Step 1: click X, Step 2: wait for popup..."), you MUST execute each step in the EXACT order given. Do NOT skip, merge, or assume completion of any step based on the current screenshot state. Even if the screenshot shows a field already contains the expected value, you MUST still execute the complete step sequence (icon click → popup → search → select → verify → confirm). The only exception: when the user's instruction explicitly says "verify", "check", or "assert", you should verify the current state.`;
		const thoughtTagDescription = shouldIncludeSubGoals ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.
		@@ -143,0 +144,0 @@

+5

-1

dist/es/common.mjs

		@@ -202,2 +202,6 @@ import { assert, isPlainObject } from "@godscene/shared/utils";
		}
		function isSmallElement(rect, screenSize) {
		const threshold = 0.03 * Math.min(screenSize.width, screenSize.height);
		return rect.width <= threshold \|\| rect.height <= threshold;
		}
		async function markupImageForLLM(screenshotBase64, tree, size) {
		@@ -383,2 +387,2 @@ const elementsInfo = treeToList(tree);
		};
		export { PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptGpt5Bbox, adaptQwen2_5Bbox, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, fillBboxParam, finalizeActionName, findAllMidsceneLocatorField, getMidsceneLocationSchema, getReadableTimeString, ifMidsceneLocatorField, markupImageForLLM, mergeRects, normalized01000, parseActionParam, pointToBbox };
		export { PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptGpt5Bbox, adaptQwen2_5Bbox, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, fillBboxParam, finalizeActionName, findAllMidsceneLocatorField, getMidsceneLocationSchema, getReadableTimeString, ifMidsceneLocatorField, isSmallElement, markupImageForLLM, mergeRects, normalized01000, parseActionParam, pointToBbox };

+59

-18

dist/es/service/index.mjs

		import { isAutoGLM } from "../ai-model/auto-glm/util.mjs";
		import { AIResponseParseError, AiExtractElementInfo, AiLocateElement, callAIWithObjectResponse } from "../ai-model/index.mjs";
		import { AiLocateSection, buildSearchAreaConfig } from "../ai-model/inspect.mjs";
		import { AiLocateSection } from "../ai-model/inspect.mjs";
		import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
		import { expandSearchArea } from "../common.mjs";
		import { expandSearchArea, isSmallElement } from "../common.mjs";
		import { ServiceError } from "../types.mjs";
		@@ -45,18 +45,59 @@ import { compositeElementInfoImg, cropByRect } from "@godscene/shared/img";
		if (query.deepLocate && hasPlanLocatedElement) {
		const searchAreaConfig = await buildSearchAreaConfig({
		context,
		baseRect: opt.planLocatedElement.rect,
		modelFamily
		});
		searchArea = searchAreaConfig.rect;
		searchAreaRawResponse = JSON.stringify({
		source: 'plan-located-element',
		rect: opt.planLocatedElement.rect
		});
		searchAreaResponse = {
		rect: searchArea,
		imageBase64: searchAreaConfig.imageBase64,
		scale: searchAreaConfig.scale,
		rawResponse: searchAreaRawResponse
		};
		console.log('hasPlanLocatedElement');
		const planRect = opt.planLocatedElement.rect;
		if (isSmallElement(planRect, context.shotSize)) {
		console.log('isSmallElement');
		searchArea = {
		left: 0,
		top: 0,
		width: context.shotSize.width,
		height: context.shotSize.height
		};
		searchAreaRawResponse = JSON.stringify({
		source: 'plan-small-element',
		rect: planRect,
		fullScreen: true
		});
		searchAreaResponse = {
		rect: searchArea,
		imageBase64: context.screenshot.base64,
		scale: 1,
		rawResponse: searchAreaRawResponse
		};
		} else {
		console.log('opt?.planLocatedElement', opt.planLocatedElement);
		const rect = planRect;
		return {
		element: opt.planLocatedElement \|\| null,
		rect,
		dump: createServiceDump({
		type: 'locate',
		userQuery: {
		element: queryPrompt
		},
		matchedElement: opt.planLocatedElement ? [
		opt.planLocatedElement
		] : [],
		matchedRect: rect,
		data: null,
		taskInfo: {
		...this.taskInfo ? this.taskInfo : {},
		durationMs: 0,
		rawResponse: JSON.stringify({
		source: 'plan-large-element'
		}),
		formatResponse: JSON.stringify({
		rect
		}),
		usage: void 0,
		searchArea: void 0,
		searchAreaRawResponse: void 0,
		searchAreaUsage: void 0,
		reasoning_content: void 0
		},
		deepLocate: false,
		error: void 0
		})
		};
		}
		} else if (searchAreaPrompt) {
		@@ -63,0 +104,0 @@ searchAreaResponse = await AiLocateSection({

+4

-4

dist/lib/agent/agent.js

		@@ -103,5 +103,5 @@ "use strict";
		};
		const defaultReplanningCycleLimit = 20;
		const defaultVlmUiTarsReplanningCycleLimit = 40;
		const defaultAutoGlmReplanningCycleLimit = 100;
		const defaultReplanningCycleLimit = 200;
		const defaultVlmUiTarsReplanningCycleLimit = 200;
		const defaultAutoGlmReplanningCycleLimit = 200;
		class Agent {
		@@ -204,3 +204,3 @@ get onDumpUpdate() {
		reportHTMLString(opt) {
		return (0, external_utils_js_namespaceObject.reportHTMLContent)(this.dumpDataString(opt));
		return '';
		}
		@@ -207,0 +207,0 @@ writeOutActionDumps(executionDump) {

+1

-1

dist/lib/agent/utils.js

		@@ -173,3 +173,3 @@ "use strict";
		}
		const getMidsceneVersion = ()=>"1.7.21";
		const getMidsceneVersion = ()=>"1.7.22";
		const parsePrompt = (prompt)=>{
		@@ -176,0 +176,0 @@ if ('string' == typeof prompt) return {

+2

-1

dist/lib/ai-model/prompt/llm-planning.js

		@@ -169,3 +169,4 @@ "use strict";
		const step1Description = shouldIncludeSubGoals ? "First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot." : 'First, observe the current screenshot and previous logs to understand the current state.';
		const explicitInstructionRule = 'CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type \'hello\'" means only type, do NOT press Enter.';
		const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type 'hello'" means only type, do NOT press Enter.
		CRITICAL - Sequential Step Execution: When the user provides a numbered step sequence (e.g., "Step 1: click X, Step 2: wait for popup..."), you MUST execute each step in the EXACT order given. Do NOT skip, merge, or assume completion of any step based on the current screenshot state. Even if the screenshot shows a field already contains the expected value, you MUST still execute the complete step sequence (icon click → popup → search → select → verify → confirm). The only exception: when the user's instruction explicitly says "verify", "check", or "assert", you should verify the current state.`;
		const thoughtTagDescription = shouldIncludeSubGoals ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.
		@@ -172,0 +173,0 @@

+7

-0

dist/lib/common.js

		@@ -37,2 +37,3 @@ "use strict";
		getReadableTimeString: ()=>getReadableTimeString,
		isSmallElement: ()=>isSmallElement,
		expandSearchArea: ()=>expandSearchArea,
		@@ -256,2 +257,6 @@ mergeRects: ()=>mergeRects,
		}
		function isSmallElement(rect, screenSize) {
		const threshold = 0.03 * Math.min(screenSize.width, screenSize.height);
		return rect.width <= threshold \|\| rect.height <= threshold;
		}
		async function markupImageForLLM(screenshotBase64, tree, size) {
		@@ -458,2 +463,3 @@ const elementsInfo = (0, extractor_namespaceObject.treeToList)(tree);
		exports.ifMidsceneLocatorField = __webpack_exports__.ifMidsceneLocatorField;
		exports.isSmallElement = __webpack_exports__.isSmallElement;
		exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
		@@ -486,2 +492,3 @@ exports.mergeRects = __webpack_exports__.mergeRects;
		"ifMidsceneLocatorField",
		"isSmallElement",
		"markupImageForLLM",
		@@ -488,0 +495,0 @@ "mergeRects",

+57

-16

dist/lib/service/index.js

		@@ -73,18 +73,59 @@ "use strict";
		if (query.deepLocate && hasPlanLocatedElement) {
		const searchAreaConfig = await (0, inspect_js_namespaceObject.buildSearchAreaConfig)({
		context,
		baseRect: opt.planLocatedElement.rect,
		modelFamily
		});
		searchArea = searchAreaConfig.rect;
		searchAreaRawResponse = JSON.stringify({
		source: 'plan-located-element',
		rect: opt.planLocatedElement.rect
		});
		searchAreaResponse = {
		rect: searchArea,
		imageBase64: searchAreaConfig.imageBase64,
		scale: searchAreaConfig.scale,
		rawResponse: searchAreaRawResponse
		};
		console.log('hasPlanLocatedElement');
		const planRect = opt.planLocatedElement.rect;
		if ((0, external_common_js_namespaceObject.isSmallElement)(planRect, context.shotSize)) {
		console.log('isSmallElement');
		searchArea = {
		left: 0,
		top: 0,
		width: context.shotSize.width,
		height: context.shotSize.height
		};
		searchAreaRawResponse = JSON.stringify({
		source: 'plan-small-element',
		rect: planRect,
		fullScreen: true
		});
		searchAreaResponse = {
		rect: searchArea,
		imageBase64: context.screenshot.base64,
		scale: 1,
		rawResponse: searchAreaRawResponse
		};
		} else {
		console.log('opt?.planLocatedElement', opt.planLocatedElement);
		const rect = planRect;
		return {
		element: opt.planLocatedElement \|\| null,
		rect,
		dump: (0, external_utils_js_namespaceObject.createServiceDump)({
		type: 'locate',
		userQuery: {
		element: queryPrompt
		},
		matchedElement: opt.planLocatedElement ? [
		opt.planLocatedElement
		] : [],
		matchedRect: rect,
		data: null,
		taskInfo: {
		...this.taskInfo ? this.taskInfo : {},
		durationMs: 0,
		rawResponse: JSON.stringify({
		source: 'plan-large-element'
		}),
		formatResponse: JSON.stringify({
		rect
		}),
		usage: void 0,
		searchArea: void 0,
		searchAreaRawResponse: void 0,
		searchAreaUsage: void 0,
		reasoning_content: void 0
		},
		deepLocate: false,
		error: void 0
		})
		};
		}
		} else if (searchAreaPrompt) {
		@@ -91,0 +132,0 @@ searchAreaResponse = await (0, inspect_js_namespaceObject.AiLocateSection)({

+3

-3

dist/lib/types.js

		@@ -81,7 +81,7 @@ "use strict";
		"UIContext",
		"GroupedActionDump",
		"ServiceError",
		"default",
		"ReportActionDump",
		"ExecutionDump",
		"ServiceError"
		"GroupedActionDump",
		"ExecutionDump"
		].indexOf(__rspack_import_key) < 0) __rspack_reexport[__rspack_import_key] = ()=>_yaml__rspack_import_5[__rspack_import_key];
		@@ -88,0 +88,0 @@ __webpack_require__.d(__webpack_exports__, __rspack_reexport);

+11

-0

dist/types/common.d.ts

		@@ -44,2 +44,13 @@ import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from './types';
		export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
		/**
		* Check if an element is small based on screen size threshold (short edge 3%)
		*
		* Used to determine whether to use DeepLocate with full screen screenshot
		* when deepLocate is enabled and plan provides element coordinates.
		*
		* @param rect - The element rectangle
		* @param screenSize - The screen size
		* @returns true if element is small (any dimension <= short edge * 0.03)
		*/
		export declare function isSmallElement(rect: Rect, screenSize: Size): boolean;
		export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
		@@ -46,0 +57,0 @@ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];

+2

-2

package.json

		{
		"name": "@godscene/core",
		"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
		"version": "1.7.22",
		"version": "1.7.23",
		"repository": "https://github.com/web-infra-dev/midscene",
		@@ -100,3 +100,3 @@ "homepage": "https://midscenejs.com/",
		"zod": "^3.25.1",
		"@godscene/shared": "1.7.22"
		"@godscene/shared": "1.7.23"
		},
		@@ -103,0 +103,0 @@ "devDependencies": {

dist/es/utils.mjs

Sorry, the diff of this file is too big to display

dist/lib/utils.js

Sorry, the diff of this file is too big to display

@godscene/core - npm Package Compare versions

New alerts

Fixed alerts

Worsened metrics

Dependency changes